diff options
Diffstat (limited to 'src/gallium/drivers')
104 files changed, 6572 insertions, 9499 deletions
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 7bb7893d93..bd059d5716 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -30,7 +30,6 @@ #include "i915_context.h" #include "i915_batch.h" #include "i915_debug.h" -#include "i915_reg.h" #include "i915_resource.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index bd8b9174a8..36c04a3165 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -40,6 +40,7 @@ #include <stdint.h> #include <string.h> +#include "util/u_memory.h" #include "util/u_string.h" #include "intel_decode.h" @@ -116,8 +117,7 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) }; - for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) { if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { unsigned int len = 1, i; @@ -275,8 +275,7 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; } - for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_2d); opcode++) { if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) { unsigned int i; @@ -1037,9 +1036,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, return len; } - for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); - opcode++) - { + for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) { if (opcodes_3d_1d[opcode].i830_only && !i830) continue; @@ -1291,8 +1288,7 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { unsigned int len = 1, i; @@ -1637,8 +1633,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { unsigned int i; len = 1; @@ -1705,8 +1700,7 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { unsigned int len = 1, i; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 55b877b4ab..08da2286b0 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -28,8 +28,6 @@ C_SOURCES = \ lp_scene_queue.c \ lp_screen.c \ lp_setup.c \ - lp_setup_coef.c \ - lp_setup_coef_intrin.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ @@ -38,6 +36,7 @@ C_SOURCES = \ lp_state_clip.c \ lp_state_derived.c \ lp_state_fs.c \ + lp_state_setup.c \ lp_state_gs.c \ lp_state_rasterizer.c \ lp_state_sampler.c \ @@ -63,12 +62,12 @@ PROGS := lp_test_format \ # Need this for the lp_test_*.o files CLEAN_EXTRA = *.o +include ../../Makefile.template + lp_test_sincos.o : sse_mathfun.h PROGS_DEPS := ../../auxiliary/libgallium.a -include ../../Makefile.template - lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 650435f0f1..49950153a4 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [ ]) -# Only enable SSSE3 for lp_tile_soa_sse3.c -ssse3_env = env.Clone() -if env['gcc'] \ - and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \ - and env['machine'] in ('x86', 'x86_64') : - ssse3_env.Append(CCFLAGS = ['-mssse3']) -lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c') +lp_tile_soa_os = env.SharedObject('lp_tile_soa.c') llvmpipe = env.ConvenienceLibrary( @@ -64,13 +58,12 @@ llvmpipe = env.ConvenienceLibrary( 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', - 'lp_setup_coef.c', - 'lp_setup_coef_intrin.c', 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', 'lp_state_fs.c', + 'lp_state_setup.c', 'lp_state_gs.c', 'lp_state_rasterizer.c', 'lp_state_sampler.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index e28efe778f..e50643790c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref) + LLVMValueRef ref, + boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; @@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder, lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 44603b418c..27ca8aad4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref); + LLVMValueRef ref, + boolean do_branch); #endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 7561899a74..7eb76d4fb3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -53,15 +53,8 @@ * ... ... ... ... ... ... ... ... ... * * - * Stencil test: - * Two-sided stencil test is supported but probably not as efficient as - * it could be. Currently, we use if/then/else constructs to do the - * operations for front vs. back-facing polygons. We could probably do - * both the front and back arithmetic then use a Select() instruction to - * choose the result depending on polyon orientation. We'd have to - * measure performance both ways and see which is better. - * * @author Jose Fonseca <jfonseca@vmware.com> + * @author Brian Paul <jfonseca@vmware.com> */ #include "pipe/p_state.h" @@ -71,6 +64,7 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_intr.h" @@ -128,57 +122,32 @@ lp_build_stencil_test_single(struct lp_build_context *bld, /** * Do the one or two-sided stencil test comparison. * \sa lp_build_stencil_test_single - * \param face an integer indicating front (+) or back (-) facing polygon. - * If NULL, assume front-facing. + * \param front_facing an integer vector mask, indicating front (~0) or back + * (0) facing polygon. If NULL, assume front-facing. */ static LLVMValueRef lp_build_stencil_test(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, - LLVMValueRef face) + LLVMValueRef front_facing) { LLVMValueRef res; assert(stencil[0].enabled); - if (stencil[1].enabled && face) { - /* do two-sided test */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; - - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + /* do front face test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); - lp_build_flow_scope_declare(flow_ctx, &result); + if (stencil[1].enabled && front_facing) { + /* do back face test */ + LLVMValueRef back_res; - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_test_single(bld, &stencil[1], - stencilRefs[1], stencilVals); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); - - res = result; - } - else { - /* do single-side test */ - res = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); + res = lp_build_select(bld, front_facing, res, back_res); } return res; @@ -195,14 +164,12 @@ lp_build_stencil_op_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, enum stencil_op op, LLVMValueRef stencilRef, - LLVMValueRef stencilVals, - LLVMValueRef mask) + LLVMValueRef stencilVals) { - const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; - LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + LLVMValueRef max = lp_build_const_int_vec(type, 0xff); unsigned stencil_op; assert(type.sign); @@ -255,19 +222,7 @@ lp_build_stencil_op_single(struct lp_build_context *bld, break; default: assert(0 && "bad stencil op mode"); - res = NULL; - } - - if (stencil->writemask != stencilMax) { - /* mask &= stencil->writemask */ - LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); - mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); - /* res = (res & mask) | (stencilVals & ~mask) */ - res = lp_build_select_bitwise(bld, writemask, res, stencilVals); - } - else { - /* res = mask ? res : stencilVals */ - res = lp_build_select(bld, mask, res, stencilVals); + res = bld->undef; } return res; @@ -284,49 +239,40 @@ lp_build_stencil_op(struct lp_build_context *bld, LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef mask, - LLVMValueRef face) + LLVMValueRef front_facing) { - assert(stencil[0].enabled); - - if (stencil[1].enabled && face) { - /* do two-sided op */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; + LLVMValueRef res; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + assert(stencil[0].enabled); - lp_build_flow_scope_declare(flow_ctx, &result); + /* do front face op */ + res = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals); - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + if (stencil[1].enabled && front_facing) { + /* do back face op */ + LLVMValueRef back_res; - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_op_single(bld, &stencil[1], op, - stencilRefs[1], stencilVals, mask); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + res = lp_build_select(bld, front_facing, res, back_res); + } - return result; + if (stencil->writemask != 0xff) { + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); } else { - /* do single-sided op */ - return lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } + + return res; } @@ -358,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc, } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); - if(format_desc->channel[swizzle].normalized) - type.norm = TRUE; + assert(format_desc->channel[swizzle].normalized); + if (format_desc->channel[swizzle].size < format_desc->block.bits) { + /* Prefer signed integers when possible, as SSE has less support + * for unsigned comparison; + */ + type.sign = TRUE; + } } else assert(0); @@ -381,7 +332,7 @@ lp_depth_type(const struct util_format_description *format_desc, */ static boolean get_z_shift_and_mask(const struct util_format_description *format_desc, - unsigned *shift, unsigned *mask) + unsigned *shift, unsigned *width, unsigned *mask) { const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; @@ -397,12 +348,14 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) return FALSE; + *width = format_desc->channel[z_swizzle].size; + padding_right = 0; for (chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; padding_left = - total_bits - (padding_right + format_desc->channel[z_swizzle].size); + total_bits - (padding_right + *width); if (padding_left || padding_right) { unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; @@ -413,7 +366,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, *mask = 0xffffffff; } - *shift = padding_left; + *shift = padding_right; return TRUE; } @@ -457,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ -static void +void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, @@ -494,31 +447,57 @@ lp_build_occlusion_count(LLVMBuilderRef builder, * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) - * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer - * \param facing contains float value indicating front/back facing polygon + * \param facing contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], - struct lp_type type, + struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter) + LLVMValueRef *zs_value, + boolean do_branch) { - struct lp_build_context bld; - struct lp_build_context sbld; + struct lp_type z_type; + struct lp_build_context z_bld; + struct lp_build_context s_bld; struct lp_type s_type; + unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; - LLVMValueRef orig_mask = mask->value; + LLVMValueRef orig_mask = lp_build_mask_value(mask); + LLVMValueRef front_facing = NULL; + + + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(z_src_type.floating) { + z_src_type.sign = FALSE; + z_src_type.norm = TRUE; + } + else { + assert(!z_src_type.sign); + assert(z_src_type.norm); + } + + /* Pick the depth type. */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + + /* FIXME: Cope with a depth test type with a different bit width. */ + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); /* Sanity checking */ { @@ -540,8 +519,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if (type.floating) { + assert(format_desc->block.bits == z_type.width); + if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); @@ -552,54 +531,56 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + assert(!z_type.fixed); } } /* Setup build context for Z vals */ - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&z_bld, builder, z_type); /* Setup build context for stencil vals */ - s_type = lp_type_int_vec(type.width); - lp_build_context_init(&sbld, builder, s_type); + s_type = lp_type_int_vec(z_type.width); + lp_build_context_init(&s_bld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); - lp_build_name(zs_dst, "zsbufval"); + lp_build_name(zs_dst, "zs_dst"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { - unsigned z_shift, z_mask; unsigned s_shift, s_mask; - if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { - if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); - z_src = LLVMBuildLShr(builder, z_src, shift, ""); - } - + if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { if (z_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); - z_src = LLVMBuildAnd(builder, z_src, mask, ""); - z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); - z_bitmask = mask; /* used below */ + z_bitmask = lp_build_const_int_vec(z_type, z_mask); } - else { + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + } else if (z_bitmask) { + /* TODO: Instead of loading a mask from memory and ANDing, it's + * probably faster to just shake the bits with two shifts. */ + z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + } else { z_dst = zs_dst; + lp_build_name(z_dst, "z_dst"); } - - lp_build_name(z_dst, "zsbuf.z"); } if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } @@ -608,35 +589,85 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (s_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } - lp_build_name(stencil_vals, "stencil"); + lp_build_name(stencil_vals, "s_dst"); } } - if (stencil[0].enabled) { + + if (face) { + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntType(s_bld.type.length*s_bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + s_bld.int_vec_type, ""); + } + /* convert scalar stencil refs into vectors */ - stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); - stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); - s_pass_mask = lp_build_stencil_test(&sbld, stencil, - stencil_refs, stencil_vals, face); + s_pass_mask = lp_build_stencil_test(&s_bld, stencil, + stencil_refs, stencil_vals, + front_facing); /* apply stencil-fail operator */ { - LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask); - stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, - s_fail_mask, face); + s_fail_mask, front_facing); } } if (depth->enabled) { + /* + * Convert fragment Z to the desired type, aligning the LSB to the right. + */ + + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + assert(lp_check_value(z_src_type, z_src)); + if (z_src_type.floating) { + /* + * Convert from floating point values + */ + + if (!z_type.floating) { + z_src = lp_build_clamped_float_to_unsigned_norm(builder, + z_src_type, + z_width, + z_src); + } + } else { + /* + * Convert from unsigned normalized values. + */ + + assert(!z_src_type.sign); + assert(!z_src_type.fixed); + assert(z_src_type.norm); + assert(!z_type.floating); + if (z_src_type.width > z_width) { + LLVMValueRef shift = lp_build_const_int_vec(z_src_type, + z_src_type.width - z_width); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + } + assert(lp_check_value(z_type, z_src)); + + lp_build_name(z_src, "z_src"); + /* compare src Z to dst Z, returning 'pass' mask */ - z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only @@ -644,28 +675,28 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + do_branch = FALSE; + } } if (depth->writemask) { - LLVMValueRef zselectmask = mask->value; + LLVMValueRef zselectmask; /* mask off bits that failed Z test */ - zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); + zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } - /* if combined Z/stencil format, mask off the stencil bits */ - if (z_bitmask) { - zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); - } - /* Mix the old and new Z buffer values. - * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ - z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { @@ -673,33 +704,35 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ - z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, - z_fail_mask, face); + z_fail_mask, front_facing); /* apply Z-pass operator */ - z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - z_pass_mask, face); + z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ - s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - s_pass_mask, face); + s_pass_mask, front_facing); } - /* The Z bits are already in the right place but we may need to shift the - * stencil bits before ORing Z with Stencil to make the final pixel value. - */ + /* Put Z and ztencil bits in the right place */ + if (z_dst && z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildShl(builder, z_dst, shift, ""); + } if (stencil_vals && stencil_shift) - stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ @@ -707,13 +740,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else zs_dst = stencil_vals; - LLVMBuildStore(builder, zs_dst, zs_dst_ptr); + *zs_value = zs_dst; } if (s_pass_mask) @@ -722,6 +755,47 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); + if (do_branch) + lp_build_mask_check(mask); + +} + + +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); + + LLVMBuildStore(builder, zs_value, zs_dst_ptr); +} + + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + struct lp_type z_type; + struct lp_build_context z_bld; + LLVMValueRef z_dst; + + /* XXX: pointlessly redo type logic: + */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&z_bld, builder, z_type); + + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); + + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); + + LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index e257a5bd7d..a54ef3a711 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,7 +61,27 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter); + LLVMValueRef *zs_value, + boolean do_branch); +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_occlusion_count(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 2a374f8c39..c9da8900d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* - * a = a0 + x * dadx + y * dady + * a = a0 + (x * dadx + y * dady) */ if (attrib == 0 && chan == 0) { @@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - LLVMValueRef tmp; - tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); - tmp = LLVMBuildFMul(builder, bld->y, dady, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); + LLVMValueRef ax, ay, axy; + ax = LLVMBuildFMul(builder, bld->x, dadx, ""); + ay = LLVMBuildFMul(builder, bld->y, dady, ""); + axy = LLVMBuildFAdd(builder, ax, ay, ""); + a = LLVMBuildFAdd(builder, a, axy, ""); } } @@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * This is called when we move from one quad to the next. */ static void -attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +attribs_update(struct lp_build_interp_soa_context *bld, + int quad_index, + int start, + int end) { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); @@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) assert(quad_index < 4); - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + for(attrib = start; attrib < end; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -350,6 +353,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) } #endif + if (attrib == 0 && chan == 2) { + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + attrib_name(a, attrib, chan, ""); } bld->attribs[attrib][chan] = a; @@ -434,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); - - attribs_update(bld, 0); } @@ -443,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index) +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index) +{ + assert(quad_index < 4); + + attribs_update(bld, quad_index, 1, bld->num_attribs); +} + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, + int quad_index) { assert(quad_index < 4); - attribs_update(bld, quad_index); + attribs_update(bld, quad_index, 0, 1); } + diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 3054030f73..a7ebdd1bfa 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -46,7 +46,31 @@ #include "tgsi/tgsi_exec.h" -#include "lp_setup.h" +/** + * Describes how to compute the interpolation coefficients (a0, dadx, dady) + * from the vertices passed into our triangle/line/point functions by the + * draw module. + * + * Vertices are treated as an array of float[4] values, indexed by + * src_index. + * + * LP_INTERP_COLOR is translated to either LP_INTERP_CONSTANT or + * LINEAR depending on flatshade state. + */ +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_COLOR, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + +struct lp_shader_input { + ushort interp:4; /* enum lp_interp */ + ushort usage_mask:4; /* bitmask of TGSI_WRITEMASK_x flags */ + ushort src_index:8; /* where to find values in incoming vertices */ +}; struct lp_build_interp_soa_context @@ -89,7 +113,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef y); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index); + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, int quad_index); diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 39f2c6085e..763432ed71 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -82,6 +82,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } } + lp_delete_setup_variants(llvmpipe); + align_free( llvmpipe ); } @@ -108,6 +110,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) memset(llvmpipe, 0, sizeof *llvmpipe); make_empty_list(&llvmpipe->fs_variants_list); + make_empty_list(&llvmpipe->setup_variants_list); llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 34fa20e204..db09c95b27 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -39,6 +39,7 @@ #include "lp_jit.h" #include "lp_setup.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" struct llvmpipe_vbuf_render; @@ -48,6 +49,7 @@ struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; struct lp_setup_context; +struct lp_setup_variant; struct lp_velems_state; struct llvmpipe_context { @@ -105,12 +107,9 @@ struct llvmpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /** Fragment shader input interpolation info */ - unsigned num_inputs; - struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - /** The tiling engine */ struct lp_setup_context *setup; + struct lp_setup_variant setup_variant; /** The primitive drawing context */ struct draw_context *draw; @@ -120,6 +119,9 @@ struct llvmpipe_context { struct lp_fs_variant_list_item fs_variants_list; unsigned nr_fs_variants; + + struct lp_setup_variant_list_item setup_variants_list; + unsigned nr_setup_variants; }; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index bb538b2bd8..3626ce4a86 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -32,6 +32,7 @@ struct pipe_context; struct pipe_fence_handle; +struct pipe_resource; void llvmpipe_flush(struct pipe_context *pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 04b12dedcc..e09ec504ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -162,9 +162,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) void lp_jit_screen_cleanup(struct llvmpipe_screen *screen) { - if(screen->engine) - LLVMDisposeExecutionEngine(screen->engine); - if(screen->pass) LLVMDisposePassManager(screen->pass); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 16e04fce0c..114f21f2d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -144,7 +144,7 @@ typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, - float facing, + uint32_t facing, const void *a0, const void *dadx, const void *dady, diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h index d1c431475d..2538164ffa 100644 --- a/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -72,4 +72,14 @@ */ #define LP_MAX_SHADER_VARIANTS 1024 +/** + * Max number of setup variants that will be kept around. + * + * These are determined by the combination of the fragment shader + * input signature and a small amount of rasterization state (eg + * flatshading). It is likely that many active fragment shaders will + * share the same setup variant. + */ +#define LP_MAX_SETUP_VARIANTS 64 + #endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d7e6415e13..d358a98394 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -211,8 +211,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_scene *scene = task->scene; - unsigned clear_value = arg.clear_zstencil.value; - unsigned clear_mask = arg.clear_zstencil.mask; + uint32_t clear_value = arg.clear_zstencil.value; + uint32_t clear_mask = arg.clear_zstencil.mask; const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; const unsigned block_size = scene->zsbuf.blocksize; @@ -220,7 +220,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint8_t *dst; unsigned i, j; - LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask); + LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", + __FUNCTION__, clear_value, clear_mask); /* * Clear the aera of the swizzled depth/depth buffer matching this tile, in @@ -232,16 +233,31 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, dst = task->depth_tile; + clear_value &= clear_mask; + switch (block_size) { case 1: + assert(clear_mask == 0xff); memset(dst, (uint8_t) clear_value, height * width); break; case 2: - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) - *row++ = (uint16_t) clear_value; - dst += dst_stride; + if (clear_mask == 0xffff) { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) clear_value; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) { + uint16_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } } break; case 4: @@ -258,7 +274,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint32_t *row = (uint32_t *)dst; for (j = 0; j < width; j++) { uint32_t tmp = ~clear_mask & *row; - *row++ = (clear_value & clear_mask) | tmp; + *row++ = clear_value | tmp; } dst += dst_stride; } @@ -318,7 +334,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, { const struct lp_scene *scene = task->scene; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -349,10 +365,10 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -398,7 +414,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, unsigned x, unsigned y, unsigned mask) { - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -430,10 +446,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, mask, @@ -474,6 +490,14 @@ lp_rast_end_query(struct lp_rasterizer_task *task, } +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + task->state = arg.state; +} + + /** * Set top row and left column of the tile's pixels to white. For debugging. @@ -581,10 +605,12 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_triangle_8, lp_rast_triangle_3_4, lp_rast_triangle_3_16, + lp_rast_triangle_4_16, lp_rast_shade_tile, lp_rast_shade_tile_opaque, lp_rast_begin_query, lp_rast_end_query, + lp_rast_set_state, }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index c55b97a9d1..a64c152cf8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -78,30 +78,28 @@ struct lp_rast_state { * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - float facing; /** Positive for front-facing, negative for back-facing */ - unsigned disable:1; /** Partially binned, disable this command */ - unsigned opaque:1; /** Is opaque */ - - float (*a0)[4]; - float (*dadx)[4]; - float (*dady)[4]; - - const struct lp_rast_state *state; + unsigned frontfacing:1; /** True for front-facing */ + unsigned disable:1; /** Partially binned, disable this command */ + unsigned opaque:1; /** Is opaque */ + unsigned pad0:29; /* wasted space */ + unsigned stride; /* how much to advance data between a0, dadx, dady */ + unsigned pad2; /* wasted space */ + unsigned pad3; /* wasted space */ + /* followed by a0, dadx, dady and planes[] */ }; - +/* Note: the order of these values is important as they are loaded by + * sse code in rasterization: + */ struct lp_rast_plane { - /* one-pixel sized trivial accept offsets for each plane */ - int ei; - - /* one-pixel sized trivial reject offsets for each plane */ - int eo; - /* edge function values at minx,miny ?? */ int c; int dcdx; int dcdy; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo; }; /** @@ -111,17 +109,24 @@ struct lp_rast_plane { * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { - /* inputs for the shader */ - struct lp_rast_shader_inputs inputs; - #ifdef DEBUG float v[3][2]; + float pad0; + float pad1; #endif - struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ + /* inputs for the shader */ + struct lp_rast_shader_inputs inputs; + /* planes are also allocated here */ }; +#define GET_A0(inputs) ((float (*)[4])((inputs)+1)) +#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride)) +#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride)) +#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride)) + + struct lp_rasterizer * lp_rast_create( unsigned num_threads ); @@ -149,9 +154,10 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; uint8_t clear_color[4]; struct { - unsigned value; - unsigned mask; + uint32_t value; + uint32_t mask; } clear_zstencil; + const struct lp_rast_state *state; struct lp_fence *fence; struct llvmpipe_query *query_obj; }; @@ -238,12 +244,14 @@ lp_rast_arg_null( void ) #define LP_RAST_OP_TRIANGLE_8 0x9 #define LP_RAST_OP_TRIANGLE_3_4 0xa #define LP_RAST_OP_TRIANGLE_3_16 0xb -#define LP_RAST_OP_SHADE_TILE 0xc -#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd -#define LP_RAST_OP_BEGIN_QUERY 0xe -#define LP_RAST_OP_END_QUERY 0xf - -#define LP_RAST_OP_MAX 0x10 +#define LP_RAST_OP_TRIANGLE_4_16 0xc +#define LP_RAST_OP_SHADE_TILE 0xd +#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe +#define LP_RAST_OP_BEGIN_QUERY 0xf +#define LP_RAST_OP_END_QUERY 0x10 +#define LP_RAST_OP_SET_STATE 0x11 + +#define LP_RAST_OP_MAX 0x12 #define LP_RAST_OP_MASK 0xff void diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 9fc78645a3..64ac616f62 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -12,6 +12,7 @@ static INLINE int u_bit_scan(unsigned *mask) struct tile { int coverage; int overdraw; + const struct lp_rast_state *state; char data[TILE_SIZE][TILE_SIZE]; }; @@ -42,10 +43,12 @@ static const char *cmd_names[LP_RAST_OP_MAX] = "triangle_8", "triangle_3_4", "triangle_3_16", + "triangle_4_16", "shade_tile", "shade_tile_opaque", "begin_query", "end_query", + "set_state", }; static const char *cmd_name(unsigned cmd) @@ -55,31 +58,31 @@ static const char *cmd_name(unsigned cmd) } static const struct lp_fragment_shader_variant * -get_variant( const struct cmd_block *block, - int k ) +get_variant( const struct lp_rast_state *state, + const struct cmd_block *block, + int k ) { if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || - block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) - return block->arg[k].shade_tile->state->variant; - - if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE || + block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || block->cmd[k] == LP_RAST_OP_TRIANGLE_7) - return block->arg[k].triangle.tri->inputs.state->variant; + return state->variant; return NULL; } static boolean -is_blend( const struct cmd_block *block, +is_blend( const struct lp_rast_state *state, + const struct cmd_block *block, int k ) { - const struct lp_fragment_shader_variant *variant = get_variant(block, k); + const struct lp_fragment_shader_variant *variant = get_variant(state, block, k); if (variant) return variant->key.blend.rt[0].blend_enable; @@ -92,6 +95,7 @@ is_blend( const struct cmd_block *block, static void debug_bin( const struct cmd_bin *bin ) { + const struct lp_rast_state *state = NULL; const struct cmd_block *head = bin->head; int i, j = 0; @@ -99,9 +103,12 @@ debug_bin( const struct cmd_bin *bin ) while (head) { for (i = 0; i < head->count; i++, j++) { + if (head->cmd[i] == LP_RAST_OP_SET_STATE) + state = head->arg[i].state; + debug_printf("%d: %s %s\n", j, cmd_name(head->cmd[i]), - is_blend(head, i) ? "blended" : ""); + is_blend(state, head, i) ? "blended" : ""); } head = head->next; } @@ -133,7 +140,7 @@ debug_shade_tile(int x, int y, char val) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; unsigned i,j; if (inputs->disable) @@ -171,11 +178,12 @@ debug_triangle(int tilex, int tiley, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); struct lp_rast_plane plane[8]; int x, y; int count = 0; unsigned i, nr_planes = 0; - boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; if (tri->inputs.disable) { /* This triangle was partially binned and has been disabled */ @@ -183,7 +191,7 @@ debug_triangle(int tilex, int tiley, } while (plane_mask) { - plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)]; + plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + plane[nr_planes].dcdy * tiley - plane[nr_planes].dcdx * tilex); @@ -232,15 +240,19 @@ do_debug_bin( struct tile *tile, memset(tile->data, ' ', sizeof tile->data); tile->coverage = 0; tile->overdraw = 0; + tile->state = NULL; for (block = bin->head; block; block = block->next) { for (k = 0; k < block->count; k++, j++) { - boolean blend = is_blend(block, k); + boolean blend = is_blend(tile->state, block, k); char val = get_label(j); int count = 0; if (print_cmds) debug_printf("%c: %15s", val, cmd_name(block->cmd[k])); + + if (block->cmd[k] == LP_RAST_OP_SET_STATE) + tile->state = block->arg[k].state; if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR || block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 7370119e96..b30408f097 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -77,6 +77,7 @@ struct cmd_bin; struct lp_rasterizer_task { const struct cmd_bin *bin; + const struct lp_rast_state *state; struct lp_scene *scene; unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ @@ -244,7 +245,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, unsigned x, unsigned y ) { const struct lp_scene *scene = task->scene; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -260,10 +261,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -293,6 +294,14 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *, void lp_rast_triangle_3_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); + +void lp_rast_triangle_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + void lp_debug_bin( const struct cmd_bin *bin ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index a1f309d4b0..042c315635 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -123,6 +123,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, } void +lp_rast_triangle_4_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<4)-1; + lp_rast_triangle_3(task, arg2); +} + +void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { @@ -230,144 +240,207 @@ sign_bits4(const __m128i *cstep, int cdiff) } -/* Special case for 3 plane triangle which is contained entirely - * within a 16x16 block. - */ +#define NR_PLANES 3 + + + + + + + void lp_rast_triangle_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned outmask, inmask, partmask, partial_mask; - unsigned j; - __m128i cstep4[3][4]; - - outmask = 0; /* outside one or more trivial reject planes */ - partmask = 0; /* outside one or more trivial accept planes */ - - for (j = 0; j < 3; j++) { - const int dcdx = -plane[j].dcdx * 4; - const int dcdy = plane[j].dcdy * 4; - __m128i xdcdy = _mm_set1_epi32(dcdy); - - cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); - cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); - cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); - cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); - - { - const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; - - outmask |= sign_bits4(cstep4[j], c + cox); - partmask |= sign_bits4(cstep4[j], c + cio); - } - } + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + unsigned i, j; + + struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; + unsigned nr = 0; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + __m128i rej4; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &rej4); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); - if (outmask == 0xffff) - return; + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + rej4 = _mm_slli_epi32(rej4, 2); - /* Mask of sub-blocks which are inside all trivial accept planes: - */ - inmask = ~partmask & 0xffff; + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); - /* Mask of sub-blocks which are inside all trivial reject planes, - * but outside at least one trivial accept plane: - */ - partial_mask = partmask & ~outmask; + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); - assert((partial_mask & inmask) == 0); + for (i = 0; i < 4; i++) { + __m128i cx = c; - /* Iterate over partials: - */ - while (partial_mask) { - int i = ffs(partial_mask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; - unsigned mask = 0xffff; - - partial_mask &= ~(1 << i); - - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * px - + plane[j].dcdy * py) * 4; - - mask &= ~sign_bits4(cstep4[j], cx); - } + for (j = 0; j < 4; j++) { + __m128i c4rej = _mm_add_epi32(cx, rej4); + __m128i rej_masks = _mm_srai_epi32(c4rej, 31); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); - } + /* if (is_zero(rej_masks)) */ + if (_mm_movemask_epi8(rej_masks) == 0) { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2); - /* Iterate over fulls: - */ - while (inmask) { - int i = ffs(inmask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - inmask &= ~(1 << i); + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - block_full_4(task, tri, px, py); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + out[nr].i = i; + out[nr].j = j; + out[nr].mask = mask; + if (mask != 0xffff) + nr++; + } + cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2)); + } + + c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2)); } + + for (i = 0; i < nr; i++) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x + 4 * out[i].j, + y + 4 * out[i].i, + 0xffff & ~out[i].mask); } + + + void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) + const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned j; - - /* Iterate over partials: + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &unused); + + /* Adjust dcdx; */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); + + { - unsigned mask = 0xffff; + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * x - + plane[j].dcdy * y); + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); - const int dcdx = -plane[j].dcdx; - const int dcdy = plane[j].dcdy; - __m128i xdcdy = _mm_set1_epi32(dcdy); + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); - __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); - __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); - __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); - __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); - __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); - __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); - __m128i result = _mm_packs_epi16(cstep01, cstep23); + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - /* Extract the sign bits - */ - mask &= ~_mm_movemask_epi8(result); - } + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + if (mask != 0xffff) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x, + y, + 0xffff & ~mask); } } - +#undef NR_PLANES #endif @@ -383,10 +456,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, #define TAG(x) x##_3 #define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ #include "lp_rast_tri_tmp.h" #define TAG(x) x##_4 #define NR_PLANES 4 +#define TRI_16 lp_rast_triangle_4_16 #include "lp_rast_tri_tmp.h" #define TAG(x) x##_5 diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 9830a43ba5..4825d651c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -82,7 +82,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 4 - 1; build_masks(c[j] + cox, cio - cox, @@ -156,6 +157,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); const int x = task->x, y = task->y; struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; @@ -172,7 +174,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, while (plane_mask) { int i = ffs(plane_mask) - 1; - plane[j] = tri->plane[i]; + plane[j] = tri_plane[i]; plane_mask &= ~(1 << i); c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; @@ -180,7 +182,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; const int cox = plane[j].eo * 16; - const int cio = plane[j].ei * 16 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 16 - 1; build_masks(c[j] + cox, cio - cox, @@ -245,6 +248,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, } } +#if defined(PIPE_ARCH_SSE) && defined(TRI_16) +/* XXX: special case this when intersection is not required. + * - tile completely within bbox, + * - bbox completely within tile. + */ +void +TRI_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + unsigned outmask, partial_mask; + unsigned j; + __m128i cstep4[NR_PLANES][4]; + + int x = (mask & 0xff); + int y = (mask >> 8); + + outmask = 0; /* outside one or more trivial reject planes */ + + x += task->x; + y += task->y; + + for (j = 0; j < NR_PLANES; j++) { + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); + cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); + cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); + cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); + + { + const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + const int cox = plane[j].eo * 4; + + outmask |= sign_bits4(cstep4[j], c + cox); + } + } + + if (outmask == 0xffff) + return; + + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = 0xffff & ~outmask; + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + unsigned mask = 0xffff; + + partial_mask &= ~(1 << i); + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c - 1 + - plane[j].dcdx * px + + plane[j].dcdy * py) * 4; + + mask &= ~sign_bits4(cstep4[j], cx); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); + } +} +#endif + +#if defined(PIPE_ARCH_SSE) && defined(TRI_4) +void +TRI_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); + unsigned j; + + /* Iterate over partials: + */ + { + unsigned mask = 0xffff; + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * x + + plane[j].dcdy * y); + + const int dcdx = -plane[j].dcdx; + const int dcdy = plane[j].dcdy; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + mask &= ~_mm_movemask_epi8(result); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + } +} +#endif + + + #undef TAG +#undef TRI_4 +#undef TRI_16 #undef NR_PLANES diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 8b504f23a3..a4fdf7cff3 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -203,7 +203,9 @@ lp_scene_end_rasterization(struct lp_scene *scene ) for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->head = bin->tail = NULL; + bin->head = NULL; + bin->tail = NULL; + bin->last_state = NULL; } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index dbef7692e4..622c522f11 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -41,6 +41,7 @@ #include "lp_debug.h" struct lp_scene_queue; +struct lp_rast_state; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -94,6 +95,7 @@ struct data_block { struct cmd_bin { ushort x; ushort y; + const struct lp_rast_state *last_state; /* most recent state set in bin */ struct cmd_block *head; struct cmd_block *tail; }; @@ -297,7 +299,7 @@ lp_scene_bin_command( struct lp_scene *scene, assert(x < scene->tiles_x); assert(y < scene->tiles_y); - assert(cmd <= LP_RAST_OP_END_QUERY); + assert(cmd < LP_RAST_OP_MAX); if (tail == NULL || tail->count == CMD_BLOCK_MAX) { tail = lp_scene_new_cmd_block( scene, bin ); @@ -318,6 +320,30 @@ lp_scene_bin_command( struct lp_scene *scene, } +static INLINE boolean +lp_scene_bin_cmd_with_state( struct lp_scene *scene, + unsigned x, unsigned y, + const struct lp_rast_state *state, + unsigned cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + + if (state != bin->last_state) { + bin->last_state = state; + if (!lp_scene_bin_command(scene, x, y, + LP_RAST_OP_SET_STATE, + lp_rast_arg_state(state))) + return FALSE; + } + + if (!lp_scene_bin_command( scene, x, y, cmd, arg )) + return FALSE; + + return TRUE; +} + + /* Add a command to all active bins. */ static INLINE boolean diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index eade400087..6118434d3d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -56,7 +56,7 @@ #include "draw/draw_vbuf.h" -static void set_scene_state( struct lp_setup_context *, enum setup_state, +static boolean set_scene_state( struct lp_setup_context *, enum setup_state, const char *reason); static boolean try_update_scene_state( struct lp_setup_context *setup ); @@ -167,7 +167,7 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup ) -static void +static boolean begin_binning( struct lp_setup_context *setup ) { struct lp_scene *scene = setup->scene; @@ -181,6 +181,8 @@ begin_binning( struct lp_setup_context *setup ) /* Always create a fence: */ scene->fence = lp_fence_create(MAX2(1, setup->num_threads)); + if (!scene->fence) + return FALSE; /* Initialize the bin flags and x/y coords: */ @@ -192,7 +194,8 @@ begin_binning( struct lp_setup_context *setup ) } ok = try_update_scene_state(setup); - assert(ok); + if (!ok) + return FALSE; if (setup->fb.zsbuf && ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && @@ -208,7 +211,8 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_COLOR, setup->clear.color ); - assert(ok); + if (!ok) + return FALSE; } } @@ -216,12 +220,14 @@ begin_binning( struct lp_setup_context *setup ) if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { if (!need_zsload) scene->has_depthstencil_clear = TRUE; + ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_ZSTENCIL, lp_rast_arg_clearzs( setup->clear.zsvalue, setup->clear.zsmask)); - assert(ok); + if (!ok) + return FALSE; } } @@ -229,15 +235,16 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(setup->active_query) ); - assert(ok); + if (!ok) + return FALSE; } - setup->clear.flags = 0; setup->clear.zsmask = 0; setup->clear.zsvalue = 0; LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); + return TRUE; } @@ -246,12 +253,12 @@ begin_binning( struct lp_setup_context *setup ) * * TODO: fast path for fullscreen clears and no triangles. */ -static void +static boolean execute_clears( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - begin_binning( setup ); + return begin_binning( setup ); } const char *states[] = { @@ -262,7 +269,7 @@ const char *states[] = { }; -static void +static boolean set_scene_state( struct lp_setup_context *setup, enum setup_state new_state, const char *reason) @@ -270,7 +277,7 @@ set_scene_state( struct lp_setup_context *setup, unsigned old_state = setup->state; if (old_state == new_state) - return; + return TRUE; if (LP_DEBUG & DEBUG_SCENE) { debug_printf("%s old %s new %s%s%s\n", @@ -294,12 +301,14 @@ set_scene_state( struct lp_setup_context *setup, break; case SETUP_ACTIVE: - begin_binning( setup ); + if (!begin_binning( setup )) + goto fail; break; case SETUP_FLUSHED: if (old_state == SETUP_CLEARED) - execute_clears( setup ); + if (!execute_clears( setup )) + goto fail; lp_setup_rasterize_scene( setup ); assert(setup->scene == NULL); @@ -307,9 +316,21 @@ set_scene_state( struct lp_setup_context *setup, default: assert(0 && "invalid setup state mode"); + goto fail; } setup->state = new_state; + return TRUE; + +fail: + if (setup->scene) { + lp_scene_end_rasterization(setup->scene); + setup->scene = NULL; + } + + setup->state = SETUP_FLUSHED; + lp_setup_reset( setup ); + return FALSE; } @@ -377,16 +398,19 @@ lp_setup_try_clear( struct lp_setup_context *setup, } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; - unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; + uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format, depth, stencil); - zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format, + + zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format, zmask, smask); + + zsvalue &= zsmask; } if (setup->state == SETUP_ACTIVE) { @@ -431,7 +455,7 @@ lp_setup_try_clear( struct lp_setup_context *setup, if (flags & PIPE_CLEAR_COLOR) { memcpy(setup->clear.color.clear_color, &color_arg, - sizeof color_arg); + sizeof setup->clear.color.clear_color); } } @@ -502,14 +526,12 @@ lp_setup_set_point_state( struct lp_setup_context *setup, } void -lp_setup_set_fs_inputs( struct lp_setup_context *setup, - const struct lp_shader_input *input, - unsigned nr ) +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant) { - LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr); - - memcpy( setup->fs.input, input, nr * sizeof input[0] ); - setup->fs.nr_inputs = nr; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->setup.variant = variant; } void @@ -617,8 +639,7 @@ lp_setup_set_vertex_info( struct lp_setup_context *setup, void lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, unsigned num, - struct pipe_sampler_view **views, - const struct pipe_sampler_state **samplers) + struct pipe_sampler_view **views) { unsigned i; @@ -629,7 +650,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { struct pipe_sampler_view *view = i < num ? views[i] : NULL; - if(view) { + if (view) { struct pipe_resource *tex = view->texture; struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex); struct lp_jit_texture *jit_tex; @@ -639,12 +660,6 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->depth = tex->depth0; jit_tex->last_level = tex->last_level; - /* sampler state */ - jit_tex->min_lod = samplers[i]->min_lod; - jit_tex->max_lod = samplers[i]->max_lod; - jit_tex->lod_bias = samplers[i]->lod_bias; - COPY_4V(jit_tex->border_color, samplers[i]->border_color); - /* We're referencing the texture's internal data, so save a * reference to it. */ @@ -694,6 +709,38 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, /** + * Called during state validation when LP_NEW_SAMPLER is set. + */ +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, + const struct pipe_sampler_state **samplers) +{ + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SAMPLERS); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL; + + if (sampler) { + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; + + jit_tex->min_lod = sampler->min_lod; + jit_tex->max_lod = sampler->max_lod; + jit_tex->lod_bias = sampler->lod_bias; + COPY_4V(jit_tex->border_color, sampler->border_color); + } + } + + setup->dirty |= LP_SETUP_NEW_FS; +} + + +/** * Is the given texture referenced by any scene? * Note: we have to check all scenes including any scenes currently * being rendered and the current scene being built. @@ -850,7 +897,7 @@ try_update_scene_state( struct lp_setup_context *setup ) return TRUE; } -void +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene ) { @@ -872,22 +919,47 @@ lp_setup_update_state( struct lp_setup_context *setup, setup->psize = lp->psize_slot; assert(lp->dirty == 0); + + assert(lp->setup_variant.key.size == + setup->setup.variant->key.size); + + assert(memcmp(&lp->setup_variant.key, + &setup->setup.variant->key, + setup->setup.variant->key.size) == 0); } - if (update_scene) - set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ); + if (update_scene) { + if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ )) + return FALSE; + } /* Only call into update_scene_state() if we already have a * scene: */ if (update_scene && setup->scene) { assert(setup->state == SETUP_ACTIVE); - if (!try_update_scene_state(setup)) { - lp_setup_flush_and_restart(setup); - if (!try_update_scene_state(setup)) - assert(0); - } + + if (try_update_scene_state(setup)) + return TRUE; + + /* Update failed, try to restart the scene. + * + * Cannot call lp_setup_flush_and_restart() directly here + * because of potential recursion. + */ + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__)) + return FALSE; + + if (!setup->scene) + return FALSE; + + return try_update_scene_state(setup); } + + return TRUE; } @@ -991,12 +1063,12 @@ lp_setup_begin_query(struct lp_setup_context *setup, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!lp_scene_bin_everywhere(setup->scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - assert(0); return; } } @@ -1040,14 +1112,20 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) } -void +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup) { if (0) debug_printf("%s\n", __FUNCTION__); assert(setup->state == SETUP_ACTIVE); - set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__); - lp_setup_update_state(setup, TRUE); + + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!lp_setup_update_state(setup, TRUE)) + return FALSE; + + return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 868bd3ad2f..ebb18f8134 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -33,28 +33,6 @@ struct draw_context; struct vertex_info; -enum lp_interp { - LP_INTERP_CONSTANT, - LP_INTERP_LINEAR, - LP_INTERP_PERSPECTIVE, - LP_INTERP_POSITION, - LP_INTERP_FACING -}; - - -/** - * Describes how to compute the interpolation coefficients (a0, dadx, dady) - * from the vertices passed into our triangle/line/point functions by the - * draw module. - * - * Vertices are treated as an array of float[4] values, indexed by - * src_index. - */ -struct lp_shader_input { - enum lp_interp interp; /* how to interpolate values */ - unsigned src_index; /* where to find values in incoming vertices */ - unsigned usage_mask; /* bitmask of TGSI_WRITEMASK_x flags */ -}; struct pipe_resource; struct pipe_query; @@ -66,7 +44,7 @@ struct lp_fragment_shader_variant; struct lp_jit_context; struct llvmpipe_query; struct pipe_fence_handle; - +struct lp_setup_variant; struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, @@ -111,9 +89,8 @@ lp_setup_set_point_state( struct lp_setup_context *setup, uint sprite_coord_origin); void -lp_setup_set_fs_inputs( struct lp_setup_context *setup, - const struct lp_shader_input *interp, - unsigned nr ); +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant ); void lp_setup_set_fs_variant( struct lp_setup_context *setup, @@ -143,7 +120,11 @@ lp_setup_set_scissor( struct lp_setup_context *setup, void lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, unsigned num, - struct pipe_sampler_view **views, + struct pipe_sampler_view **views); + +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, const struct pipe_sampler_state **samplers); unsigned diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c deleted file mode 100644 index 8dc2688ddb..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c +++ /dev/null @@ -1,279 +0,0 @@ -/************************************************************************** - * - * Copyright 2010, VMware. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Binning code for triangles - */ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_perf.h" -#include "lp_setup_context.h" -#include "lp_setup_coef.h" -#include "lp_rast.h" -#include "lp_state_fs.h" - -#if !defined(PIPE_ARCH_SSE) - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - */ -static void constant_coef( struct lp_rast_shader_inputs *inputs, - unsigned slot, - const float value, - unsigned i ) -{ - inputs->a0[slot][i] = value; - inputs->dadx[slot][i] = 0.0f; - inputs->dady[slot][i] = 0.0f; -} - - - -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - float a0 = info->v0[vert_attr][i]; - float a1 = info->v1[vert_attr][i]; - float a2 = info->v2[vert_attr][i]; - - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20); - float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01); - - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + - dady * info->y0_center); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a0 = info->v0[vert_attr][i] * info->v0[0][3]; - float a1 = info->v1[vert_attr][i] * info->v1[0][3]; - float a2 = info->v2[vert_attr][i] * info->v2[0][3]; - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20; - float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01; - - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + - dady * info->y0_center); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial - * Z and W are copied from position_coef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned usage_mask) -{ - /*X*/ - if (usage_mask & TGSI_WRITEMASK_X) { - inputs->a0[slot][0] = 0.0; - inputs->dadx[slot][0] = 1.0; - inputs->dady[slot][0] = 0.0; - } - - /*Y*/ - if (usage_mask & TGSI_WRITEMASK_Y) { - inputs->a0[slot][1] = 0.0; - inputs->dadx[slot][1] = 0.0; - inputs->dady[slot][1] = 1.0; - } - - /*Z*/ - if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(inputs, info, slot, 0, 2); - } - - /*W*/ - if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(inputs, info, slot, 0, 3); - } -} - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, - unsigned slot, - boolean frontface, - unsigned usage_mask) -{ - /* convert TRUE to 1.0 and FALSE to -1.0 */ - if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 ); - - if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */ -} - - -/** - * Compute the tri->coef[] array dadx, dady, a0 values. - */ -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing) -{ - unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; - unsigned slot; - unsigned i; - struct lp_tri_info info; - float dx01 = v0[0][0] - v1[0][0]; - float dy01 = v0[0][1] - v1[0][1]; - float dx20 = v2[0][0] - v0[0][0]; - float dy20 = v2[0][1] - v0[0][1]; - float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - - info.v0 = v0; - info.v1 = v1; - info.v2 = v2; - info.frontfacing = frontfacing; - info.x0_center = v0[0][0] - setup->pixel_offset; - info.y0_center = v0[0][1] - setup->pixel_offset; - info.dx01_ooa = dx01 * oneoverarea; - info.dx20_ooa = dx20 * oneoverarea; - info.dy01_ooa = dy01 * oneoverarea; - info.dy20_ooa = dy20 * oneoverarea; - - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v0[vert_attr][i], i); - } - else { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v2[vert_attr][i], i); - } - break; - - case LP_INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - linear_coef(inputs, &info, slot+1, vert_attr, i); - break; - - case LP_INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - perspective_coef(inputs, &info, slot+1, vert_attr, i); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0, so all need to ensure that the usage mask is covers all - * usages. - */ - fragcoord_usage_mask |= usage_mask; - break; - - case LP_INTERP_FACING: - setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask); - break; - - default: - assert(0); - } - } - - /* The internal position input is in slot zero: - */ - setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask); -} - -#else -extern void lp_setup_coef_dummy(void); -void lp_setup_coef_dummy(void) -{ -} - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h deleted file mode 100644 index 87a3255ccc..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h +++ /dev/null @@ -1,64 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * The setup code is concerned with point/line/triangle setup and - * putting commands/data into the bins. - */ - - -#ifndef LP_SETUP_COEF_H -#define LP_SETUP_COEF_H - - -struct lp_tri_info { - - float x0_center; - float y0_center; - - /* turn these into an aligned float[4] */ - float dy01_ooa; - float dy20_ooa; - float dx01_ooa; - float dx20_ooa; - - const float (*v0)[4]; - const float (*v1)[4]; - const float (*v2)[4]; - - boolean frontfacing; /* remove eventually */ -}; - -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing); - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c deleted file mode 100644 index 3742fd672b..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c +++ /dev/null @@ -1,228 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Binning code for triangles - */ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_perf.h" -#include "lp_setup_context.h" -#include "lp_setup_coef.h" -#include "lp_rast.h" - -#if defined(PIPE_ARCH_SSE) -#include <emmintrin.h> - - -static void constant_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - const float *attr) -{ - *(__m128 *)inputs->a0[slot] = *(__m128 *)attr; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); -} - - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot ) -{ - /* XXX: just pass frontface directly to the shader, don't bother - * treating it as an input. - */ - __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0, - 0, 0, 0); - - *(__m128 *)inputs->a0[slot] = a0; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); -} - - - -static void calc_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - __m128 a0, - __m128 a1, - __m128 a2) -{ - __m128 da01 = _mm_sub_ps(a0, a1); - __m128 da20 = _mm_sub_ps(a2, a0); - - __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa)); - __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa)); - __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); - - __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa)); - __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa)); - __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); - - __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center)); - __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center)); - __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); - __m128 attr_0 = _mm_sub_ps(a0, attr_v0); - - *(__m128 *)inputs->a0[slot] = attr_0; - *(__m128 *)inputs->dadx[slot] = dadx; - *(__m128 *)inputs->dady[slot] = dady; -} - - -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr) -{ - __m128 a0 = *(const __m128 *)info->v0[vert_attr]; - __m128 a1 = *(const __m128 *)info->v1[vert_attr]; - __m128 a2 = *(const __m128 *)info->v2[vert_attr]; - - calc_coef4(inputs, info, slot, a0, a1, a2); -} - - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - __m128 a0 = *(const __m128 *)info->v0[vert_attr]; - __m128 a1 = *(const __m128 *)info->v1[vert_attr]; - __m128 a2 = *(const __m128 *)info->v2[vert_attr]; - - __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3])); - __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3])); - __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3])); - - calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow); -} - - - - - -/** - * Compute the inputs-> dadx, dady, a0 values. - */ -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing) -{ - unsigned slot; - struct lp_tri_info info; - float dx01 = v0[0][0] - v1[0][0]; - float dy01 = v0[0][1] - v1[0][1]; - float dx20 = v2[0][0] - v0[0][0]; - float dy20 = v2[0][1] - v0[0][1]; - float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - - info.v0 = v0; - info.v1 = v1; - info.v2 = v2; - info.frontfacing = frontfacing; - info.x0_center = v0[0][0] - setup->pixel_offset; - info.y0_center = v0[0][1] - setup->pixel_offset; - info.dx01_ooa = dx01 * oneoverarea; - info.dx20_ooa = dx20 * oneoverarea; - info.dy01_ooa = dy01 * oneoverarea; - info.dy20_ooa = dy20 * oneoverarea; - - - /* The internal position input is in slot zero: - */ - linear_coef(inputs, &info, 0, 0); - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]); - } - else { - constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]); - } - break; - - case LP_INTERP_LINEAR: - linear_coef(inputs, &info, slot+1, vert_attr); - break; - - case LP_INTERP_PERSPECTIVE: - perspective_coef(inputs, &info, slot+1, vert_attr); - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0. - */ - break; - - case LP_INTERP_FACING: - setup_facing_coef(inputs, &info, slot+1); - break; - - default: - assert(0); - } - } -} - -#else -extern void lp_setup_coef_dummy(void); -void lp_setup_coef_dummy(void) -{ -} -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 8506ed2dc9..dc2533bedc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -39,6 +39,7 @@ #include "lp_rast.h" #include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_scene.h" +#include "lp_bld_interp.h" /* for struct lp_shader_input */ #include "draw/draw_vbuf.h" #include "util/u_rect.h" @@ -49,6 +50,8 @@ #define LP_SETUP_NEW_SCISSOR 0x08 +struct lp_setup_variant; + /** Max number of scenes */ #define MAX_SCENES 2 @@ -118,9 +121,6 @@ struct lp_setup_context } state; struct { - struct lp_shader_input input[PIPE_MAX_ATTRIBS]; - unsigned nr_inputs; - const struct lp_rast_state *stored; /**< what's in the scene */ struct lp_rast_state current; /**< currently set state */ struct pipe_resource *current_tex[PIPE_MAX_SAMPLERS]; @@ -139,6 +139,10 @@ struct lp_setup_context } blend_color; + struct { + const struct lp_setup_variant *variant; + } setup; + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ void (*point)( struct lp_setup_context *, @@ -160,12 +164,12 @@ void lp_setup_choose_point( struct lp_setup_context *setup ); void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct lp_setup_context *setup, +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene); void lp_setup_destroy( struct lp_setup_context *setup ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup); void lp_setup_print_triangle(struct lp_setup_context *setup, @@ -181,7 +185,7 @@ lp_setup_print_vertex(struct lp_setup_context *setup, struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, - unsigned nr_inputs, + unsigned num_inputs, unsigned nr_planes, unsigned *tri_size); @@ -191,6 +195,4 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, const struct u_rect *bbox, int nr_planes ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 156bd63375..827413bb33 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -35,6 +35,7 @@ #include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #define NUM_CHANNELS 4 @@ -46,6 +47,10 @@ struct lp_line_info { const float (*v1)[4]; const float (*v2)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -53,14 +58,14 @@ struct lp_line_info { * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, + struct lp_line_info *info, unsigned slot, const float value, unsigned i ) { - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -69,7 +74,6 @@ static void constant_coef( struct lp_setup_context *setup, * for a triangle. */ static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -82,10 +86,10 @@ static void linear_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - + info->a0[slot][i] = (a1 - (dadx * (info->v1[0][0] - setup->pixel_offset) + dady * (info->v1[0][1] - setup->pixel_offset))); } @@ -100,7 +104,6 @@ static void linear_coef( struct lp_setup_context *setup, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -115,43 +118,42 @@ static void perspective_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (info->v1[0][0] - setup->pixel_offset) + - dady * (info->v1[0][1] - setup->pixel_offset))); + info->a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); } static void setup_fragcoord_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, info, slot, 0, 2); + linear_coef(setup, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, info, slot, 0, 3); + linear_coef(setup, info, slot, 0, 3); } } @@ -159,43 +161,43 @@ setup_fragcoord_coef( struct lp_setup_context *setup, * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_line_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; unsigned i; - switch (setup->fs.input[slot].interp) { + switch (key->inputs[slot].interp) { case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { + if (key->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, info, slot+1, vert_attr, i); + linear_coef(setup, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, info, slot+1, vert_attr, i); + perspective_coef(setup, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -211,7 +213,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, case LP_INTERP_FACING: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, 1.0, i); + constant_coef(setup, info, slot+1, 1.0, i); break; default: @@ -221,7 +223,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, info, 0, + setup_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -241,14 +243,15 @@ print_line(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; uint i; debug_printf("llvmpipe line\n"); - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + key->num_inputs; i++) { debug_printf(" v1[%d]: %f %f %f %f\n", i, v1[i][0], v1[i][1], v1[i][2], v1[i][3]); } - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + key->num_inputs; i++) { debug_printf(" v2[%d]: %f %f %f %f\n", i, v2[i][0], v2[i][1], v2[i][2], v2[i][3]); } @@ -275,7 +278,9 @@ try_setup_line( struct lp_setup_context *setup, const float (*v2)[4]) { struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; + struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; @@ -475,7 +480,7 @@ try_setup_line( struct lp_setup_context *setup, else { /* do intersection test */ float xintersect = fracf(v2[0][0]) + y2diff * dxdy; - draw_end = (xintersect < 1.0 && xintersect > 0.0); + draw_end = (xintersect < 1.0 && xintersect >= 0.0); } /* Are we already drawing start/end? @@ -513,7 +518,7 @@ try_setup_line( struct lp_setup_context *setup, x_offset_end = y_offset_end * dxdy; } } - + /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; @@ -567,7 +572,7 @@ try_setup_line( struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); line = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &tri_bytes); if (!line) @@ -581,33 +586,35 @@ try_setup_line( struct lp_setup_context *setup, #endif /* calculate the deltas */ - line->plane[0].dcdy = x[0] - x[1]; - line->plane[1].dcdy = x[1] - x[2]; - line->plane[2].dcdy = x[2] - x[3]; - line->plane[3].dcdy = x[3] - x[0]; + plane = GET_PLANES(line); + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[3]; + plane[3].dcdy = x[3] - x[0]; - line->plane[0].dcdx = y[0] - y[1]; - line->plane[1].dcdx = y[1] - y[2]; - line->plane[2].dcdx = y[2] - y[3]; - line->plane[3].dcdx = y[3] - y[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[3]; + plane[3].dcdx = y[3] - y[0]; /* Setup parameter interpolants: */ - setup_line_coefficients( setup, line, &info); + info.a0 = GET_A0(&line->inputs); + info.dadx = GET_DADX(&line->inputs); + info.dady = GET_DADY(&line->inputs); + setup_line_coefficients(setup, &info); - line->inputs.facing = 1.0F; - line->inputs.state = setup->fs.stored; + line->inputs.frontfacing = TRUE; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; for (i = 0; i < 4; i++) { - struct lp_rast_plane *plane = &line->plane[i]; /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. @@ -623,38 +630,34 @@ try_setup_line( struct lp_setup_context *setup, * to its usual method, in which case it will probably want * to use the opposite, top-left convention. */ - if (plane->dcdx < 0) { + if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ - plane->c++; + plane[i].c++; } - else if (plane->dcdx == 0) { + else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ - if (plane->dcdy > 0) plane->c++; + if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ - if (plane->dcdy < 0) plane->c++; + if (plane[i].dcdy < 0) plane[i].c++; } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; - - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; } @@ -677,29 +680,25 @@ try_setup_line( struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 8) { - line->plane[4].dcdx = -1; - line->plane[4].dcdy = 0; - line->plane[4].c = 1-bbox.x0; - line->plane[4].ei = 0; - line->plane[4].eo = 1; - - line->plane[5].dcdx = 1; - line->plane[5].dcdy = 0; - line->plane[5].c = bbox.x1+1; - line->plane[5].ei = -1; - line->plane[5].eo = 0; - - line->plane[6].dcdx = 0; - line->plane[6].dcdy = 1; - line->plane[6].c = 1-bbox.y0; - line->plane[6].ei = 0; - line->plane[6].eo = 1; - - line->plane[7].dcdx = 0; - line->plane[7].dcdy = -1; - line->plane[7].c = bbox.y1+1; - line->plane[7].ei = -1; - line->plane[7].eo = 0; + plane[4].dcdx = -1; + plane[4].dcdy = 0; + plane[4].c = 1-bbox.x0; + plane[4].eo = 1; + + plane[5].dcdx = 1; + plane[5].dcdy = 0; + plane[5].c = bbox.x1+1; + plane[5].eo = 0; + + plane[6].dcdx = 0; + plane[6].dcdy = 1; + plane[6].c = 1-bbox.y0; + plane[6].eo = 1; + + plane[7].dcdx = 0; + plane[7].dcdy = -1; + plane[7].c = bbox.y1+1; + plane[7].eo = 0; } return lp_setup_bin_triangle(setup, line, &bbox, nr_planes); @@ -712,10 +711,11 @@ static void lp_setup_line( struct lp_setup_context *setup, { if (!try_setup_line( setup, v0, v1 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_line( setup, v0, v1 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index a95c15751c..146f1bd07c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -33,9 +33,9 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "lp_perf.h" -#include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #include "tgsi/tgsi_scan.h" #define NUM_CHANNELS 4 @@ -46,6 +46,10 @@ struct point_info { int dx01, dx12; const float (*v0)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -54,14 +58,37 @@ struct point_info { */ static void constant_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, + struct point_info *info, unsigned slot, const float value, unsigned i) { - point->inputs.a0[slot][i] = value; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; +} + + +static void +point_persp_coeff(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i) +{ + /* + * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A + * better stratergy would be to take the primitive in consideration when + * generating the fragment shader key, and therefore avoid the per-fragment + * perspective divide. + */ + + float w0 = info->v0[0][3]; + + assert(i < 4); + + info->a0[slot][i] = info->v0[slot][i]*w0; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -70,17 +97,19 @@ constant_coef(struct lp_setup_context *setup, * \param slot the vertex attribute slot to setup * \param i the attribute channel in [0,3] * \param sprite_coord_origin one of PIPE_SPRITE_COORD_x - * \param perspective_proj will the TEX instruction do a divide by Q? + * \param perspective does the shader expects pre-multiplied w, i.e., + * LP_INTERP_PERSPECTIVE is specified in the shader key */ static void texcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, const struct point_info *info, unsigned slot, unsigned i, unsigned sprite_coord_origin, - boolean perspective_proj) + boolean perspective) { + float w0 = info->v0[0][3]; + assert(i < 4); if (i == 0) { @@ -89,21 +118,14 @@ texcoord_coef(struct lp_setup_context *setup, float x0 = info->v0[0][0] - setup->pixel_offset; float y0 = info->v0[0][1] - setup->pixel_offset; - point->inputs.dadx[slot][0] = dadx; - point->inputs.dady[slot][0] = dady; - point->inputs.a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); + info->dadx[slot][0] = dadx; + info->dady[slot][0] = dady; + info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); - if (!perspective_proj) { - /* Divide coefficients by vertex.w here. - * - * It would be clearer to always multiply by w0 above and - * then divide it out for perspective projection here, but - * doing it this way involves less algebra. - */ - float w0 = info->v0[0][3]; - point->inputs.dadx[slot][0] *= w0; - point->inputs.dady[slot][0] *= w0; - point->inputs.a0[slot][0] *= w0; + if (perspective) { + info->dadx[slot][0] *= w0; + info->dady[slot][0] *= w0; + info->a0[slot][0] *= w0; } } else if (i == 1) { @@ -116,26 +138,25 @@ texcoord_coef(struct lp_setup_context *setup, dady = -dady; } - point->inputs.dadx[slot][1] = dadx; - point->inputs.dady[slot][1] = dady; - point->inputs.a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); + info->dadx[slot][1] = dadx; + info->dady[slot][1] = dady; + info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); - if (!perspective_proj) { - float w0 = info->v0[0][3]; - point->inputs.dadx[slot][1] *= w0; - point->inputs.dady[slot][1] *= w0; - point->inputs.a0[slot][1] *= w0; + if (perspective) { + info->dadx[slot][1] *= w0; + info->dady[slot][1] *= w0; + info->a0[slot][1] *= w0; } } else if (i == 2) { - point->inputs.a0[slot][2] = 0.0f; - point->inputs.dadx[slot][2] = 0.0f; - point->inputs.dady[slot][2] = 0.0f; + info->a0[slot][2] = 0.0f; + info->dadx[slot][2] = 0.0f; + info->dady[slot][2] = 0.0f; } else { - point->inputs.a0[slot][3] = 1.0f; - point->inputs.dadx[slot][3] = 0.0f; - point->inputs.dady[slot][3] = 0.0f; + info->a0[slot][3] = perspective ? w0 : 1.0f; + info->dadx[slot][3] = 0.0f; + info->dady[slot][3] = 0.0f; } } @@ -148,33 +169,32 @@ texcoord_coef(struct lp_setup_context *setup, */ static void setup_point_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info, + struct point_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - point->inputs.a0[slot][0] = 0.0; - point->inputs.dadx[slot][0] = 1.0; - point->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - point->inputs.a0[slot][1] = 0.0; - point->inputs.dadx[slot][1] = 0.0; - point->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - constant_coef(setup, point, slot, info->v0[0][2], 2); + constant_coef(setup, info, slot, info->v0[0][2], 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - constant_coef(setup, point, slot, info->v0[0][3], 3); + constant_coef(setup, info, slot, info->v0[0][3], 3); } } @@ -184,21 +204,27 @@ setup_point_fragcoord_coef(struct lp_setup_context *setup, */ static void setup_point_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info) + struct point_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; const struct lp_fragment_shader *shader = setup->fs.current.variant->shader; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; + enum lp_interp interp = key->inputs[slot].interp; + boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE); unsigned i; + + if (perspective & usage_mask) { + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + } - switch (setup->fs.input[slot].interp) { + switch (interp) { case LP_INTERP_POSITION: /* * The generated pixel interpolators will pick up the coeffs from @@ -211,39 +237,45 @@ setup_point_coefficients( struct lp_setup_context *setup, case LP_INTERP_LINEAR: /* Sprite tex coords may use linear interpolation someday */ /* fall-through */ - case LP_INTERP_PERSPECTIVE: /* check if the sprite coord flag is set for this attribute. * If so, set it up so it up so x and y vary from 0 to 1. */ - if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { - const int index = shader->info.input_semantic_index[slot]; + if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { + unsigned semantic_index = shader->info.base.input_semantic_index[slot]; /* Note that sprite_coord enable is a bitfield of * PIPE_MAX_SHADER_OUTPUTS bits. */ - if (index < PIPE_MAX_SHADER_OUTPUTS && - (setup->sprite_coord_enable & (1 << index))) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - texcoord_coef(setup, point, info, slot + 1, i, + if (semantic_index < PIPE_MAX_SHADER_OUTPUTS && + (setup->sprite_coord_enable & (1 << semantic_index))) { + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + texcoord_coef(setup, info, slot + 1, i, setup->sprite_coord_origin, - (usage_mask & TGSI_WRITEMASK_W)); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; + perspective); + } + } + break; } } - /* FALLTHROUGH */ + /* fall-through */ case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) { - if (usage_mask & (1 << i)) - constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i); + if (usage_mask & (1 << i)) { + if (perspective) { + point_persp_coeff(setup, info, slot+1, i); + } + else { + constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i); + } + } } break; case LP_INTERP_FACING: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, point, slot+1, 1.0, i); + constant_coef(setup, info, slot+1, 1.0, i); break; default: @@ -254,7 +286,7 @@ setup_point_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_point_fragcoord_coef(setup, point, info, 0, + setup_point_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -271,6 +303,7 @@ try_setup_point( struct lp_setup_context *setup, const float (*v0)[4] ) { /* x/y positions in fixed point */ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; const int sizeAttr = setup->psize; const float size = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] @@ -322,7 +355,7 @@ try_setup_point( struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); point = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &bytes); if (!point) @@ -338,40 +371,40 @@ try_setup_point( struct lp_setup_context *setup, info.dx12 = fixed_width; info.dy01 = fixed_width; info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); /* Setup parameter interpolants: */ - setup_point_coefficients(setup, point, &info); + setup_point_coefficients(setup, &info); - point->inputs.facing = 1.0F; - point->inputs.state = setup->fs.stored; + point->inputs.frontfacing = TRUE; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; { - point->plane[0].dcdx = -1; - point->plane[0].dcdy = 0; - point->plane[0].c = 1-bbox.x0; - point->plane[0].ei = 0; - point->plane[0].eo = 1; - - point->plane[1].dcdx = 1; - point->plane[1].dcdy = 0; - point->plane[1].c = bbox.x1+1; - point->plane[1].ei = -1; - point->plane[1].eo = 0; - - point->plane[2].dcdx = 0; - point->plane[2].dcdy = 1; - point->plane[2].c = 1-bbox.y0; - point->plane[2].ei = 0; - point->plane[2].eo = 1; - - point->plane[3].dcdx = 0; - point->plane[3].dcdy = -1; - point->plane[3].c = bbox.y1+1; - point->plane[3].ei = -1; - point->plane[3].eo = 0; + struct lp_rast_plane *plane = GET_PLANES(point); + + plane[0].dcdx = -1; + plane[0].dcdy = 0; + plane[0].c = 1-bbox.x0; + plane[0].eo = 1; + + plane[1].dcdx = 1; + plane[1].dcdy = 0; + plane[1].c = bbox.x1+1; + plane[1].eo = 0; + + plane[2].dcdx = 0; + plane[2].dcdy = 1; + plane[2].c = 1-bbox.y0; + plane[2].eo = 1; + + plane[3].dcdx = 0; + plane[3].dcdy = -1; + plane[3].c = bbox.y1+1; + plane[3].eo = 0; } return lp_setup_bin_triangle(setup, point, &bbox, nr_planes); @@ -384,10 +417,11 @@ lp_setup_point(struct lp_setup_context *setup, { if (!try_setup_point( setup, v0 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_point( setup, v0 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 9016bb8e24..4ab0b72a57 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -32,15 +32,18 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_sse.h" #include "lp_perf.h" #include "lp_setup_context.h" -#include "lp_setup_coef.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #define NUM_CHANNELS 4 - +#if defined(PIPE_ARCH_SSE) +#include <emmintrin.h> +#endif static INLINE int subpixel_snap(float a) @@ -65,7 +68,7 @@ fixed_to_float(int a) * immediately after it. * The memory is allocated from the per-scene pool, not per-tile. * \param tri_size returns number of bytes allocated - * \param nr_inputs number of fragment shader inputs + * \param num_inputs number of fragment shader inputs * \return pointer to triangle space */ struct lp_rast_triangle * @@ -75,22 +78,23 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); struct lp_rast_triangle *tri; - unsigned tri_bytes, bytes; - char *inputs; - tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); - bytes = tri_bytes + (3 * input_array_sz); + *tri_size = (sizeof(struct lp_rast_triangle) + + 3 * input_array_sz + + plane_sz); - tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); + if (tri == NULL) + return NULL; - if (tri) { - inputs = ((char *)tri) + tri_bytes; - tri->inputs.a0 = (float (*)[4]) inputs; - tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); - tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + tri->inputs.stride = input_array_sz; - *tri_size = bytes; + { + char *a = (char *)tri; + char *b = (char *)&GET_PLANES(tri)[nr_planes]; + assert(b - a == *tri_size); } return tri; @@ -101,25 +105,26 @@ lp_setup_print_vertex(struct lp_setup_context *setup, const char *name, const float (*v)[4]) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; int i, j; debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", name, v[0][0], v[0][1], v[0][2], v[0][3]); - for (i = 0; i < setup->fs.nr_inputs; i++) { - const float *in = v[setup->fs.input[i].src_index]; + for (i = 0; i < key->num_inputs; i++) { + const float *in = v[key->inputs[i].src_index]; debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", i, - name, setup->fs.input[i].src_index, - (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ", - (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ", - (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ", - (setup->fs.input[i].usage_mask & 0x8) ? "w" : " "); + name, key->inputs[i].src_index, + (key->inputs[i].usage_mask & 0x1) ? "x" : " ", + (key->inputs[i].usage_mask & 0x2) ? "y" : " ", + (key->inputs[i].usage_mask & 0x4) ? "z" : " ", + (key->inputs[i].usage_mask & 0x8) ? "w" : " "); for (j = 0; j < 4; j++) - if (setup->fs.input[i].usage_mask & (1<<j)) + if (key->inputs[i].usage_mask & (1<<j)) debug_printf("%.5f ", in[j]); debug_printf("\n"); @@ -200,14 +205,16 @@ lp_setup_whole_tile(struct lp_setup_context *setup, } LP_COUNT(nr_shade_opaque_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE_OPAQUE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); } else { LP_COUNT(nr_shade_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); } } @@ -225,13 +232,13 @@ do_triangle_ccw(struct lp_setup_context *setup, boolean frontfacing ) { struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; - int x[3]; - int y[3]; - int area; + struct lp_rast_plane *plane; + int x[4]; + int y[4]; struct u_rect bbox; unsigned tri_bytes; - int i; int nr_planes = 3; if (0) @@ -248,10 +255,12 @@ do_triangle_ccw(struct lp_setup_context *setup, x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[3] = 0; y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - + y[3] = 0; + /* Bounding rectangle (in pixels) */ { @@ -289,13 +298,13 @@ do_triangle_ccw(struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); tri = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &tri_bytes); if (!tri) return FALSE; -#ifdef DEBUG +#if 0 tri->v[0][0] = v0[0][0]; tri->v[1][0] = v1[0][0]; tri->v[2][0] = v2[0][0]; @@ -304,92 +313,172 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v2[0][1]; #endif - tri->plane[0].dcdy = x[0] - x[1]; - tri->plane[1].dcdy = x[1] - x[2]; - tri->plane[2].dcdy = x[2] - x[0]; - - tri->plane[0].dcdx = y[0] - y[1]; - tri->plane[1].dcdx = y[1] - y[2]; - tri->plane[2].dcdx = y[2] - y[0]; - - area = (tri->plane[0].dcdy * tri->plane[2].dcdx - - tri->plane[2].dcdy * tri->plane[0].dcdx); - LP_COUNT(nr_tris); - /* Cull non-ccw and zero-sized triangles. - * - * XXX: subject to overflow?? - */ - if (area <= 0) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return TRUE; - } - /* Setup parameter interpolants: */ - lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing ); - - tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + setup->setup.variant->jit_function( v0, + v1, + v2, + frontfacing, + GET_A0(&tri->inputs), + GET_DADX(&tri->inputs), + GET_DADY(&tri->inputs) ); + + tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; - tri->inputs.state = setup->fs.stored; - - for (i = 0; i < 3; i++) { - struct lp_rast_plane *plane = &tri->plane[i]; + if (0) + lp_dump_setup_coef(&setup->setup.variant->key, + (const float (*)[4])GET_A0(&tri->inputs), + (const float (*)[4])GET_DADX(&tri->inputs), + (const float (*)[4])GET_DADY(&tri->inputs)); + + plane = GET_PLANES(tri); + +#if defined(PIPE_ARCH_SSE) + { + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i zero = _mm_setzero_si128(); + + vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */ + verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */ + + shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); + shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); + + dcdx = _mm_sub_epi32(verty, shufy); + dcdy = _mm_sub_epi32(vertx, shufx); + + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + + top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0); - /* half-edge constants, will be interated over the whole render - * target. + c_inc_mask = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); + + c_inc = _mm_srli_epi32(c_inc_mask, 31); + + c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + mm_mullo_epi32(dcdy, verty)); + + c = _mm_add_epi32(c, c_inc); + + /* Scale up to match c: */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; - - /* correct for top-left vs. bottom-left fill convention. - * - * note that we're overloading gl_rasterization_rules to mean - * both (0.5,0.5) pixel centers *and* bottom-left filling - * convention. - * - * GL actually has a top-left filling convention, but GL's - * notion of "top" differs from gallium's... - * - * Also, sometimes (in FBO cases) GL will render upside down - * to its usual method, in which case it will probably want - * to use the opposite, top-left convention. - */ - if (plane->dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane->c++; - } - else if (plane->dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane->dcdy > 0) plane->c++; + dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); + dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + + /* Pointless transpose which gets undone immediately in + * rasterization: + */ + transpose4_epi32(&c, &dcdx, &dcdy, &eo, + &p0, &p1, &p2, &unused); + + _mm_store_si128((__m128i *)&plane[0], p0); + _mm_store_si128((__m128i *)&plane[1], p1); + _mm_store_si128((__m128i *)&plane[2], p2); + } +#else + { + int i; + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[0]; + + for (i = 0; i < 3; i++) { + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; } - else { - /* correct for bottom-left fill convention: - */ - if (plane->dcdy < 0) plane->c++; + else if (plane[i].dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } } - } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + } +#endif - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + if (0) { + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[0].c, + plane[0].dcdx, + plane[0].dcdy, + plane[0].eo); + + debug_printf("p1: %08x/%08x/%08x/%08x\n", + plane[1].c, + plane[1].dcdx, + plane[1].dcdy, + plane[1].eo); + + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[2].c, + plane[2].dcdx, + plane[2].dcdy, + plane[2].eo); } @@ -412,29 +501,25 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].dcdx = -1; - tri->plane[3].dcdy = 0; - tri->plane[3].c = 1-bbox.x0; - tri->plane[3].ei = 0; - tri->plane[3].eo = 1; - - tri->plane[4].dcdx = 1; - tri->plane[4].dcdy = 0; - tri->plane[4].c = bbox.x1+1; - tri->plane[4].ei = -1; - tri->plane[4].eo = 0; - - tri->plane[5].dcdx = 0; - tri->plane[5].dcdy = 1; - tri->plane[5].c = 1-bbox.y0; - tri->plane[5].ei = 0; - tri->plane[5].eo = 1; - - tri->plane[6].dcdx = 0; - tri->plane[6].dcdy = -1; - tri->plane[6].c = bbox.y1+1; - tri->plane[6].ei = -1; - tri->plane[6].eo = 0; + plane[3].dcdx = -1; + plane[3].dcdy = 0; + plane[3].c = 1-bbox.x0; + plane[3].eo = 1; + + plane[4].dcdx = 1; + plane[4].dcdy = 0; + plane[4].c = bbox.x1+1; + plane[4].eo = 0; + + plane[5].dcdx = 0; + plane[5].dcdy = 1; + plane[5].c = 1-bbox.y0; + plane[5].eo = 1; + + plane[6].dcdx = 0; + plane[6].dcdy = -1; + plane[6].c = bbox.y1+1; + plane[6].eo = 0; } return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); @@ -487,51 +572,58 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) | (bbox->y1 - (bbox->y0 & ~3))); - if (nr_planes == 3) { - if (sz < 4 && dx < 64) - { - /* Triangle is contained in a single 4x4 stamp: - */ - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_4, - lp_rast_arg_triangle(tri, mask) ); - } - - if (sz < 16 && dx < 64) - { - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - /* Triangle is contained in a single 16x16 block: - */ - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_16, - lp_rast_arg_triangle(tri, mask) ); - } - } - - /* Determine which tile(s) intersect the triangle's bounding box */ if (dx < TILE_SIZE) { int ix0 = bbox->x0 / TILE_SIZE; int iy0 = bbox->y0 / TILE_SIZE; + int px = bbox->x0 & 63 & ~3; + int py = bbox->y0 & 63 & ~3; + int mask = px | (py << 8); assert(iy0 == bbox->y1 / TILE_SIZE && ix0 == bbox->x1 / TILE_SIZE); + if (nr_planes == 3) { + if (sz < 4) + { + /* Triangle is contained in a single 4x4 stamp: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); + } + + if (sz < 16) + { + /* Triangle is contained in a single 16x16 block: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle(tri, mask) ); + } + } + else if (nr_planes == 4 && sz < 16) + { + return lp_scene_bin_cmd_with_state(scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_4_16, + lp_rast_arg_triangle(tri, mask) ); + } + + /* Triangle is contained in a single tile: */ - return lp_scene_bin_command( scene, ix0, iy0, - lp_rast_tri_tab[nr_planes], - lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); } else { + struct lp_rast_plane *plane = GET_PLANES(tri); int c[MAX_PLANES]; int ei[MAX_PLANES]; int eo[MAX_PLANES]; @@ -545,14 +637,17 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int iy1 = bbox->y1 / TILE_SIZE; for (i = 0; i < nr_planes; i++) { - c[i] = (tri->plane[i].c + - tri->plane[i].dcdy * iy0 * TILE_SIZE - - tri->plane[i].dcdx * ix0 * TILE_SIZE); - - ei[i] = tri->plane[i].ei << TILE_ORDER; - eo[i] = tri->plane[i].eo << TILE_ORDER; - xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); - ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + c[i] = (plane[i].c + + plane[i].dcdy * iy0 * TILE_SIZE - + plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = (plane[i].dcdy - + plane[i].dcdx - + plane[i].eo) << TILE_ORDER; + + eo[i] = plane[i].eo << TILE_ORDER; + xstep[i] = -(plane[i].dcdx << TILE_ORDER); + ystep[i] = plane[i].dcdy << TILE_ORDER; } @@ -594,9 +689,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, */ int count = util_bitcount(partial); in = TRUE; - if (!lp_scene_bin_command( scene, x, y, - lp_rast_tri_tab[count], - lp_rast_arg_triangle(tri, partial) )) + + if (!lp_scene_bin_cmd_with_state( scene, x, y, + setup->fs.stored, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) goto fail; LP_COUNT(nr_partially_covered_64); @@ -635,40 +732,62 @@ fail: /** - * Draw triangle if it's CW, cull otherwise. + * Try to draw the triangle, restart the scene on failure. */ -static void triangle_cw( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void retry_triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front) { - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) + if (!do_triangle_ccw( setup, v0, v1, v2, front )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) - assert(0); + if (!do_triangle_ccw( setup, v0, v1, v2, front )) + return; } } +static INLINE float +calc_area(const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + return dx01 * dy20 - dx20 * dy01; +} + /** - * Draw triangle if it's CCW, cull otherwise. + * Draw triangle if it's CW, cull otherwise. */ -static void triangle_ccw( struct lp_setup_context *setup, +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - { - lp_setup_flush_and_restart(setup); - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - assert(0); - } + float area = calc_area(v0, v1, v2); + + if (area < 0.0f) + retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface); } +static void triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface); +} /** * Draw triangle whether it's CW or CCW. @@ -678,18 +797,12 @@ static void triangle_both( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - const float det = ex * fy - ey * fx; - if (det < 0.0f) - triangle_ccw( setup, v0, v1, v2 ); - else if (det > 0.0f) - triangle_cw( setup, v0, v1, v2 ); + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); + else if (area < 0.0f) + retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 6308561f24..9c1f0fe793 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -141,7 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: @@ -338,7 +339,8 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 86313e1c48..7893e9cdc0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -97,6 +97,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *, void llvmpipe_update_fs(struct llvmpipe_context *lp); +void +llvmpipe_update_setup(struct llvmpipe_context *lp); + void llvmpipe_update_derived(struct llvmpipe_context *llvmpipe); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index d2be22d7fc..0f5f7369e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -50,12 +50,13 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; - struct lp_shader_input *inputs = llvmpipe->inputs; unsigned vs_index; uint i; /* - * Match FS inputs against VS outputs, emitting the necessary attributes. + * Match FS inputs against VS outputs, emitting the necessary + * attributes. Could cache these structs and look them up with a + * combination of fragment shader, vertex shader ids. */ vinfo->num_attribs = 0; @@ -66,72 +67,18 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); - for (i = 0; i < lpfs->info.num_inputs; i++) { + for (i = 0; i < lpfs->info.base.num_inputs; i++) { /* * Search for each input in current vs output: */ vs_index = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); - if (vs_index < 0) { - /* - * This can happen with sprite coordinates - the vertex - * shader doesn't need to provide an output as we generate - * them internally. However, lets keep pretending that there - * is something there to not confuse other code. - */ - vs_index = 0; - } - - /* This can be pre-computed, except for flatshade: - */ - inputs[i].usage_mask = lpfs->info.input_usage_mask[i]; - - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - inputs[i].interp = LP_INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - inputs[i].interp = LP_INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - inputs[i].interp = LP_INTERP_PERSPECTIVE; - break; - default: - assert(0); - break; - } - - switch (lpfs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_FACE: - inputs[i].interp = LP_INTERP_FACING; - break; - case TGSI_SEMANTIC_POSITION: - /* Position was already emitted above - */ - inputs[i].interp = LP_INTERP_POSITION; - inputs[i].src_index = 0; - continue; - case TGSI_SEMANTIC_COLOR: - /* Colors are linearly inputs[i].interpolated in the fragment shader - * even when flatshading is active. This just tells the - * setup module to use coefficients with ddx==0 and - * ddy==0. - */ - if (llvmpipe->rasterizer->flatshade) - inputs[i].interp = LP_INTERP_CONSTANT; - break; - - default: - break; - } + lpfs->info.base.input_semantic_name[i], + lpfs->info.base.input_semantic_index[i]); /* * Emit the requested fs attribute for all but position. */ - - inputs[i].src_index = vinfo->num_attribs; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); } @@ -145,15 +92,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); } - llvmpipe->num_inputs = lpfs->info.num_inputs; - draw_compute_vertex_size(vinfo); - lp_setup_set_vertex_info(llvmpipe->setup, vinfo); - - lp_setup_set_fs_inputs(llvmpipe->setup, - inputs, - lpfs->info.num_inputs); } @@ -190,6 +130,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_QUERY)) llvmpipe_update_fs( llvmpipe ); + if (llvmpipe->dirty & (LP_NEW_FS | + LP_NEW_RASTERIZER)) + llvmpipe_update_setup( llvmpipe ); + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); @@ -208,11 +152,14 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]); - if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW | - LP_NEW_SAMPLER)) + if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW)) lp_setup_set_fragment_sampler_views(llvmpipe->setup, llvmpipe->num_fragment_sampler_views, - llvmpipe->fragment_sampler_views, + llvmpipe->fragment_sampler_views); + + if (llvmpipe->dirty & (LP_NEW_SAMPLER)) + lp_setup_set_fragment_sampler_state(llvmpipe->setup, + llvmpipe->num_samplers, llvmpipe->sampler); llvmpipe->dirty = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f0a15e11b9..9fbedac165 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -99,74 +99,12 @@ #include <llvm-c/Analysis.h> +#include <llvm-c/BitWriter.h> static unsigned fs_no = 0; -/** - * Generate the depth /stencil test code. - */ -static void -generate_depth_stencil(LLVMBuilderRef builder, - const struct lp_fragment_shader_variant_key *key, - struct lp_type src_type, - struct lp_build_mask_context *mask, - LLVMValueRef stencil_refs[2], - LLVMValueRef src, - LLVMValueRef dst_ptr, - LLVMValueRef facing, - LLVMValueRef counter) -{ - const struct util_format_description *format_desc; - struct lp_type dst_type; - - if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) - return; - - format_desc = util_format_description(key->zsbuf_format); - assert(format_desc); - - /* - * Depths are expected to be between 0 and 1, even if they are stored in - * floats. Setting these bits here will ensure that the lp_build_conv() call - * below won't try to unnecessarily clamp the incoming values. - */ - if(src_type.floating) { - src_type.sign = FALSE; - src_type.norm = TRUE; - } - else { - assert(!src_type.sign); - assert(src_type.norm); - } - - /* Pick the depth type. */ - dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); - - /* FIXME: Cope with a depth test type with a different bit width. */ - assert(dst_type.width == src_type.width); - assert(dst_type.length == src_type.length); - - /* Convert fragment Z from float to integer */ - lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); - - dst_ptr = LLVMBuildBitCast(builder, - dst_ptr, - LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); - lp_build_depth_stencil_test(builder, - &key->depth, - key->stencil, - dst_type, - format_desc, - mask, - stencil_refs, - src, - dst_ptr, - facing, - counter); -} - /** * Expand the relevent bits of mask_input to a 4-dword mask for the @@ -248,6 +186,26 @@ generate_quad_mask(LLVMBuilderRef builder, } +#define EARLY_DEPTH_TEST 0x1 +#define LATE_DEPTH_TEST 0x2 +#define EARLY_DEPTH_WRITE 0x4 +#define LATE_DEPTH_WRITE 0x8 + +static int +find_output_by_semantic( const struct tgsi_shader_info *info, + unsigned semantic, + unsigned index ) +{ + int i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return i; + + return -1; +} + /** * Generate the fragment shader, depth/stencil test, and alpha tests. @@ -255,14 +213,13 @@ generate_quad_mask(LLVMBuilderRef builder, * \param partial_mask if 1, do mask_input testing */ static void -generate_fs(struct llvmpipe_context *lp, - struct lp_fragment_shader *shader, +generate_fs(struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, unsigned i, - const struct lp_build_interp_soa_context *interp, + struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef (*color)[4], @@ -272,18 +229,52 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef mask_input, LLVMValueRef counter) { + const struct util_format_description *zs_format_desc = NULL; const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; - LLVMValueRef z = interp->pos[2]; + LLVMValueRef z; + LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask; - boolean early_depth_stencil_test; + boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.base.num_inputs < 3 && + shader->info.base.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; + unsigned depth_mode; + + if (key->depth.enabled || + key->stencil[0].enabled || + key->stencil[1].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.base.writes_z) { + if (key->alpha.enabled || shader->info.base.uses_kill) + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. + */ + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && key->stencil[0].writemask)) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } assert(i < 4); @@ -294,20 +285,14 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - flow = lp_build_flow_create(builder); - memset(outputs, 0, sizeof outputs); - lp_build_flow_scope_begin(flow); - /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[cbuf][chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color"); } } - lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ if (partial_mask) { @@ -319,74 +304,126 @@ generate_fs(struct llvmpipe_context *lp, } /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, flow, type, *pmask); - - early_depth_stencil_test = - (key->depth.enabled || key->stencil[0].enabled) && - !key->alpha.enabled && - !shader->info.uses_kill && - !shader->info.writes_z; - - if (early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + lp_build_mask_begin(&mask, builder, type, *pmask); + + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos(interp, i); + z = interp->pos[2]; + + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + lp_build_interp_soa_update_inputs(interp, i); + + /* Build the actual shader */ lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler, &shader->info); + outputs, sampler, &shader->info.base); - /* loop over fragment shader outputs/results */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { - LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); - - switch (shader->info.output_semantic_name[attrib]) { - case TGSI_SEMANTIC_COLOR: - { - unsigned cbuf = shader->info.output_semantic_index[attrib]; - - lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); - - /* Alpha test */ - /* XXX: should only test the final assignment to alpha */ - if (cbuf == 0 && chan == 3 && key->alpha.enabled) { - LLVMValueRef alpha = out; - LLVMValueRef alpha_ref_value; - alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); - alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value); - } - - color[cbuf][chan] = out; - break; - } - - case TGSI_SEMANTIC_POSITION: - if(chan == 2) - z = out; - break; - } - } + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1 && outputs[color0][3]) { + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); + alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); + + lp_build_alpha_test(builder, key->alpha.func, type, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); } } - if (!early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_POSITION, + 0); + + if (pos0 != -1 && outputs[pos0][2]) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + } - lp_build_mask_end(&mask); + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_deferred_depth_write(builder, + type, + zs_format_desc, + &mask, + depth_ptr, + zs_value); + } - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); + /* Color write */ + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) + { + if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR && + shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) + { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color[cbuf][chan]); + } + } + } + } - *pmask = mask.value; + if (counter) + lp_build_occlusion_count(builder, type, + lp_build_mask_value(&mask), counter); + *pmask = lp_build_mask_end(&mask); } @@ -407,10 +444,10 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, - LLVMValueRef dst_ptr) + LLVMValueRef dst_ptr, + boolean do_branch) { struct lp_build_context bld; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; @@ -421,10 +458,9 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); - flow = lp_build_flow_create(builder); - - /* we'll use this mask context to skip blending if all pixels are dead */ - lp_build_mask_begin(&mask_ctx, flow, type, mask); + lp_build_mask_begin(&mask_ctx, builder, type, mask); + if (do_branch) + lp_build_mask_check(&mask_ctx); vec_type = lp_build_vec_type(type); @@ -457,7 +493,6 @@ generate_blend(const struct pipe_blend_state *blend, } lp_build_mask_end(&mask_ctx); - lp_build_flow_destroy(flow); } @@ -468,13 +503,13 @@ generate_blend(const struct pipe_blend_state *blend, * 2x2 pixels. */ static void -generate_fragment(struct llvmpipe_context *lp, +generate_fragment(struct llvmpipe_screen *screen, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned partial_mask) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; char func_name[256]; struct lp_type fs_type; struct lp_type blend_type; @@ -502,11 +537,24 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; + const struct util_format_description *zs_format_desc; unsigned num_fs; unsigned i; unsigned chan; unsigned cbuf; + /* Adjust color input interpolation according to flatshade state: + */ + memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]); + for (i = 0; i < shader->info.base.num_inputs; i++) { + if (inputs[i].interp == LP_INTERP_COLOR) { + if (key->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; + } + } + /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@ -542,12 +590,12 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ - arg_types[3] = LLVMFloatType(); /* facing */ + arg_types[3] = LLVMInt32Type(); /* facing */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */ arg_types[9] = LLVMInt32Type(); /* mask_input */ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ @@ -558,7 +606,6 @@ generate_fragment(struct llvmpipe_context *lp, variant->function[partial_mask] = function; - /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ @@ -606,8 +653,8 @@ generate_fragment(struct llvmpipe_context *lp, * already included in the shader key. */ lp_build_interp_soa_init(&interp, - lp->num_inputs, - lp->inputs, + shader->info.base.num_inputs, + inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x, y); @@ -616,17 +663,18 @@ generate_fragment(struct llvmpipe_context *lp, sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ + zs_format_desc = util_format_description(key->zsbuf_format); + for(i = 0; i < num_fs; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(), + i*fs_type.length*zs_format_desc->block.bits/8, + 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - if(i != 0) - lp_build_interp_soa_update(&interp, i); + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); - - generate_fs(lp, shader, key, + generate_fs(shader, key, builder, fs_type, context_ptr, @@ -660,9 +708,18 @@ generate_fragment(struct llvmpipe_context *lp, * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { + LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH]; + + for (i = 0; i < num_fs; i++) { + fs_color_vals[i] = + LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); + } + lp_build_conv(builder, fs_type, blend_type, - fs_out_color[cbuf][chan], num_fs, + fs_color_vals, + num_fs, &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } @@ -685,14 +742,23 @@ generate_fragment(struct llvmpipe_context *lp, /* * Blending. */ - generate_blend(&key->blend, - rt, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); + { + /* Could the 4x4 have been killed? + */ + boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) && + !key->alpha.enabled && + !shader->info.base.uses_kill); + + generate_blend(&key->blend, + rt, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr, + do_branch); + } } #ifdef PIPE_ARCH_X86 @@ -717,12 +783,17 @@ generate_fragment(struct llvmpipe_context *lp, /* Apply optimizations to LLVM IR */ LLVMRunFunctionPassManager(screen->pass, function); - if (gallivm_debug & GALLIVM_DEBUG_IR) { + if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) { /* Print the LLVM IR to stderr */ lp_debug_dump_value(function); debug_printf("\n"); } + /* Dump byte code to a file */ + if (0) { + LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc"); + } + /* * Translate the LLVM IR into machine code. */ @@ -731,7 +802,7 @@ generate_fragment(struct llvmpipe_context *lp, variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); - if (gallivm_debug & GALLIVM_DEBUG_ASM) { + if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) { lp_disassemble(f); } lp_func_delete_body(function); @@ -746,6 +817,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("fs variant %p:\n", (void *) key); + if (key->flatshade) { + debug_printf("flatshade = 1\n"); + } for (i = 0; i < key->nr_cbufs; ++i) { debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); } @@ -770,6 +844,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); } + if (key->occlusion_count) { + debug_printf("occlusion_count = 1\n"); + } + if (key->blend.logicop_enable) { debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE)); } @@ -782,31 +860,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); - for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if (key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - util_format_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - util_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - } + for (i = 0; i < key->nr_samplers; ++i) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + util_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal); + debug_printf(" .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero); + debug_printf(" .apply_min_lod = %u\n", key->sampler[i].apply_min_lod); + debug_printf(" .apply_max_lod = %u\n", key->sampler[i].apply_max_lod); } } @@ -823,7 +903,7 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) } static struct lp_fragment_shader_variant * -generate_variant(struct llvmpipe_context *lp, +generate_variant(struct llvmpipe_screen *screen, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { @@ -861,7 +941,7 @@ generate_variant(struct llvmpipe_context *lp, !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && - !shader->info.uses_kill + !shader->info.base.uses_kill ? TRUE : FALSE; @@ -869,11 +949,11 @@ generate_variant(struct llvmpipe_context *lp, lp_debug_fs_variant(variant); } - generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + generate_fragment(screen, shader, variant, RAST_EDGE_TEST); if (variant->opaque) { /* Specialized shader, which doesn't need to read the color buffer. */ - generate_fragment(lp, shader, variant, RAST_WHOLE); + generate_fragment(screen, shader, variant, RAST_WHOLE); } else { variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; } @@ -889,6 +969,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct lp_fragment_shader *shader; int nr_samplers; + int i; shader = CALLOC_STRUCT(lp_fragment_shader); if (!shader) @@ -898,7 +979,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, make_empty_list(&shader->variants); /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &shader->info); + lp_build_tgsi_info(templ->tokens, &shader->info); /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); @@ -910,18 +991,58 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, return NULL; } - nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, sampler[nr_samplers]); + for (i = 0; i < shader->info.base.num_inputs; i++) { + shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; + + switch (shader->info.base.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + shader->inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + shader->inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + shader->inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + default: + assert(0); + break; + } + + switch (shader->info.base.input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + /* Colors may be either linearly or constant interpolated in + * the fragment shader, but that information isn't available + * here. Mark color inputs and fix them up later. + */ + shader->inputs[i].interp = LP_INTERP_COLOR; + break; + case TGSI_SEMANTIC_FACE: + shader->inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + /* Position was already emitted above + */ + shader->inputs[i].interp = LP_INTERP_POSITION; + shader->inputs[i].src_index = 0; + continue; + } + + shader->inputs[i].src_index = i+1; + } + if (LP_DEBUG & DEBUG_TGSI) { unsigned attrib; debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); debug_printf("usage masks:\n"); - for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) { - unsigned usage_mask = shader->info.input_usage_mask[attrib]; + for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.base.input_usage_mask[attrib]; debug_printf(" IN[%u].%s%s%s%s\n", attrib, usage_mask & TGSI_WRITEMASK_X ? "x" : "", @@ -1150,10 +1271,10 @@ make_variant_key(struct llvmpipe_context *lp, /* This value will be the same for all the variants of a given shader: */ - key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; for(i = 0; i < key->nr_samplers; ++i) { - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); @@ -1168,6 +1289,7 @@ make_variant_key(struct llvmpipe_context *lp, void llvmpipe_update_fs(struct llvmpipe_context *lp) { + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant = NULL; @@ -1208,7 +1330,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } t0 = os_time_get(); - variant = generate_variant(lp, shader, &key); + variant = generate_variant(screen, shader, &key); t1 = os_time_get(); dt = t1 - t0; @@ -1228,6 +1350,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) + + + + void llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 4999b8dca1..7d58c4936c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -34,6 +34,8 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */ +#include "lp_bld_interp.h" /* for struct lp_shader_input */ struct tgsi_token; @@ -96,7 +98,7 @@ struct lp_fragment_shader { struct pipe_shader_state base; - struct tgsi_shader_info info; + struct lp_tgsi_info info; struct lp_fs_variant_list_item variants; @@ -107,6 +109,9 @@ struct lp_fragment_shader unsigned no; unsigned variants_created; unsigned variants_cached; + + /** Fragment shader input interpolation info */ + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 17a4a0ed02..1dd866195d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -246,9 +246,9 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, struct pipe_sampler_view **views) { unsigned i; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; assert(num <= PIPE_MAX_VERTEX_SAMPLERS); if (!num) diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c new file mode 100644 index 0000000000..2c8b8b9a92 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -0,0 +1,759 @@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "os/os_time.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" +#include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */ + +#include "lp_perf.h" +#include "lp_debug.h" +#include "lp_flush.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" + + + +/* currently organized to interpolate full float[4] attributes even + * when some elements are unused. Later, can pack vertex data more + * closely. + */ + + +struct lp_setup_args +{ + /* Function arguments: + */ + LLVMValueRef v0; + LLVMValueRef v1; + LLVMValueRef v2; + LLVMValueRef facing; /* boolean */ + LLVMValueRef a0; + LLVMValueRef dadx; + LLVMValueRef dady; + + /* Derived: + */ + LLVMValueRef x0_center; + LLVMValueRef y0_center; + LLVMValueRef dy20_ooa; + LLVMValueRef dy01_ooa; + LLVMValueRef dx20_ooa; + LLVMValueRef dx01_ooa; +}; + +static LLVMTypeRef type4f(void) +{ + return LLVMVectorType(LLVMFloatType(), 4); +} + + +/* Equivalent of _mm_setr_ps(a,b,c,d) + */ +static LLVMValueRef vec4f(LLVMBuilderRef bld, + LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d, + const char *name) +{ + LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + LLVMValueRef res = LLVMGetUndef(type4f()); + + res = LLVMBuildInsertElement(bld, res, a, i0, ""); + res = LLVMBuildInsertElement(bld, res, b, i1, ""); + res = LLVMBuildInsertElement(bld, res, c, i2, ""); + res = LLVMBuildInsertElement(bld, res, d, i3, name); + + return res; +} + +/* Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, + LLVMValueRef a, + const char *name) +{ + LLVMValueRef res = LLVMGetUndef(type4f()); + int i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + } + + return res; +} + +static void +store_coef(LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef dadx, + LLVMValueRef dady) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0); + + LLVMBuildStore(builder, + a0, + LLVMBuildGEP(builder, args->a0, &idx, 1, "")); + + LLVMBuildStore(builder, + dadx, + LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); + + LLVMBuildStore(builder, + dady, + LLVMBuildGEP(builder, args->dady, &idx, 1, "")); +} + + + +static void +emit_constant_coef4( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef vert, + unsigned attr) +{ + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0); + LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr"); + LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr"); + + store_coef(builder, args, slot, vert_attr, zerovec, zerovec); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void +emit_facing_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot ) +{ + LLVMValueRef a0_0 = args->facing; + LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), ""); + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing"); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + + store_coef(builder, args, slot, a0, zerovec, zerovec); +} + + +static LLVMValueRef +vert_attrib(LLVMBuilderRef b, + LLVMValueRef vert, + int attr, + int elem, + const char *name) +{ + LLVMValueRef idx[2]; + idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0); + idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0); + return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); +} + + + +static void +emit_coef4( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef a1, + LLVMValueRef a2) +{ + LLVMValueRef dy20_ooa = args->dy20_ooa; + LLVMValueRef dy01_ooa = args->dy01_ooa; + LLVMValueRef dx20_ooa = args->dx20_ooa; + LLVMValueRef dx01_ooa = args->dx01_ooa; + LLVMValueRef x0_center = args->x0_center; + LLVMValueRef y0_center = args->y0_center; + + /* XXX: using fsub, fmul on vector types -- does this work?? + */ + LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); + LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); + + /* Calculate dadx (vec4f) + */ + LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); + LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); + LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); + + /* Calculate dady (vec4f) + */ + LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); + LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); + LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); + + /* Calculate a0 - the attribute value at the origin + */ + LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); + LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); + LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); + LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); + + store_coef(b, args, slot, attr_0, dadx, dady); +} + + +static void +emit_linear_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + emit_coef4(b, args, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +emit_perspective_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow"); + LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow"); + LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow"); + + LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a"); + LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a"); + LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a"); + + emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); +} + + +static void +emit_position_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + int slot, int attrib ) +{ + emit_linear_coef(builder, args, slot, attrib); +} + + + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +static void +emit_tri_coef( LLVMBuilderRef builder, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args ) +{ + unsigned slot; + + /* The internal position input is in slot zero: + */ + emit_position_coef(builder, args, 0, 0); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + + switch (key->inputs[slot].interp) { + case LP_INTERP_CONSTANT: + if (key->flatshade_first) { + emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr); + } + else { + emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr); + } + break; + + case LP_INTERP_LINEAR: + emit_linear_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_PERSPECTIVE: + emit_perspective_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + emit_facing_coef(builder, args, slot+1); + break; + + default: + assert(0); + } + } +} + + +/* XXX: This is generic code, share with fs/vs codegen: + */ +static lp_jit_setup_triangle +finalize_function(struct llvmpipe_screen *screen, + LLVMBuilderRef builder, + LLVMValueRef function) +{ + void *f; + + /* Verify the LLVM IR. If invalid, dump and abort */ +#ifdef DEBUG + if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) { + if (1) + lp_debug_dump_value(function); + abort(); + } +#endif + + /* Apply optimizations to LLVM IR */ + LLVMRunFunctionPassManager(screen->pass, function); + + if (gallivm_debug & GALLIVM_DEBUG_IR) + { + /* Print the LLVM IR to stderr */ + lp_debug_dump_value(function); + debug_printf("\n"); + } + + /* + * Translate the LLVM IR into machine code. + */ + f = LLVMGetPointerToGlobal(screen->engine, function); + + if (gallivm_debug & GALLIVM_DEBUG_ASM) + { + lp_disassemble(f); + } + + lp_func_delete_body(function); + + return f; +} + +/* XXX: Generic code: + */ +static void +lp_emit_emms(LLVMBuilderRef builder) +{ +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif +} + + +/* XXX: generic code: + */ +static void +set_noalias(LLVMBuilderRef builder, + LLVMValueRef function, + const LLVMTypeRef *arg_types, + int nr_args) +{ + int i; + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(function, i), + LLVMNoAliasAttribute); +} + +static void +init_args(LLVMBuilderRef b, + struct lp_setup_args *args, + const struct lp_setup_variant *variant) +{ + LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x"); + LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y"); + + LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x"); + LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y"); + + LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x"); + LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y"); + + LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(), + variant->key.pixel_center_half ? 0.5 : 0); + + LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" ); + LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" ); + + LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01"); + LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01"); + LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20"); + LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20"); + + LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0); + LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e"); + LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f"); + LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa"); + + LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa"); + LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa"); + LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa"); + LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa"); + + args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f"); + args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f"); + + args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f"); + args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f"); + + args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f"); + args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f"); +} + +/** + * Generate the runtime callable function for the coefficient calculation. + * + */ +static struct lp_setup_variant * +generate_setup_variant(struct llvmpipe_screen *screen, + struct lp_setup_variant_key *key) +{ + struct lp_setup_variant *variant = NULL; + struct lp_setup_args args; + char func_name[256]; + LLVMTypeRef vec4f_type; + LLVMTypeRef func_type; + LLVMTypeRef arg_types[7]; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + int64_t t0, t1; + + if (0) + goto fail; + + variant = CALLOC_STRUCT(lp_setup_variant); + if (variant == NULL) + goto fail; + + if (LP_DEBUG & DEBUG_COUNTERS) { + t0 = os_time_get(); + } + + memcpy(&variant->key, key, key->size); + variant->list_item_global.base = variant; + + util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u", + 0, + variant->no); + + /* Currently always deal with full 4-wide vertex attributes from + * the vertices. + */ + + vec4f_type = LLVMVectorType(LLVMFloatType(), 4); + + arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ + arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ + arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ + arg_types[3] = LLVMInt32Type(); /* facing */ + arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ + arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ + arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(screen->module, func_name, func_type); + if (!variant->function) + goto fail; + + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + + args.v0 = LLVMGetParam(variant->function, 0); + args.v1 = LLVMGetParam(variant->function, 1); + args.v2 = LLVMGetParam(variant->function, 2); + args.facing = LLVMGetParam(variant->function, 3); + args.a0 = LLVMGetParam(variant->function, 4); + args.dadx = LLVMGetParam(variant->function, 5); + args.dady = LLVMGetParam(variant->function, 6); + + lp_build_name(args.v0, "in_v0"); + lp_build_name(args.v1, "in_v1"); + lp_build_name(args.v2, "in_v2"); + lp_build_name(args.facing, "in_facing"); + lp_build_name(args.a0, "out_a0"); + lp_build_name(args.dadx, "out_dadx"); + lp_build_name(args.dady, "out_dady"); + + /* + * Function body + */ + block = LLVMAppendBasicBlock(variant->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + set_noalias(builder, variant->function, arg_types, Elements(arg_types)); + init_args(builder, &args, variant); + emit_tri_coef(builder, &variant->key, &args); + + lp_emit_emms(builder); + LLVMBuildRetVoid(builder); + LLVMDisposeBuilder(builder); + + variant->jit_function = finalize_function(screen, builder, + variant->function); + if (!variant->jit_function) + goto fail; + + /* + * Update timing information: + */ + if (LP_DEBUG & DEBUG_COUNTERS) { + t1 = os_time_get(); + LP_COUNT_ADD(llvm_compile_time, t1 - t0); + LP_COUNT_ADD(nr_llvm_compiles, 1); + } + + return variant; + +fail: + if (variant) { + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + FREE(variant); + } + + return NULL; +} + + + +static void +lp_make_setup_variant_key(struct llvmpipe_context *lp, + struct lp_setup_variant_key *key) +{ + struct lp_fragment_shader *fs = lp->fs; + unsigned i; + + assert(sizeof key->inputs[0] == sizeof(ushort)); + + key->num_inputs = fs->info.base.num_inputs; + key->flatshade_first = lp->rasterizer->flatshade_first; + key->pixel_center_half = lp->rasterizer->gl_rasterization_rules; + key->size = Offset(struct lp_setup_variant_key, + inputs[key->num_inputs]); + key->pad = 0; + + memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); + for (i = 0; i < key->num_inputs; i++) { + if (key->inputs[i].interp == LP_INTERP_COLOR) { + if (lp->rasterizer->flatshade) + key->inputs[i].interp = LP_INTERP_CONSTANT; + else + key->inputs[i].interp = LP_INTERP_LINEAR; + } + } + +} + + +static void +remove_setup_variant(struct llvmpipe_context *lp, + struct lp_setup_variant *variant) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del setup_variant #%u total %u\n", + variant->no, lp->nr_setup_variants); + } + + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + remove_from_list(&variant->list_item_global); + lp->nr_setup_variants--; + FREE(variant); +} + + + +/* When the number of setup variants exceeds a threshold, cull a + * fraction (currently a quarter) of them. + */ +static void +cull_setup_variants(struct llvmpipe_context *lp) +{ + struct pipe_context *pipe = &lp->pipe; + int i; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { + struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list); + remove_setup_variant(lp, item->base); + } +} + + +/** + * Update fragment/vertex shader linkage state. This is called just + * prior to drawing something when some fragment-related state has + * changed. + */ +void +llvmpipe_update_setup(struct llvmpipe_context *lp) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + struct lp_setup_variant_key *key = &lp->setup_variant.key; + struct lp_setup_variant *variant = NULL; + struct lp_setup_variant_list_item *li; + + lp_make_setup_variant_key(lp, key); + + foreach(li, &lp->setup_variants_list) { + if(li->base->key.size == key->size && + memcmp(&li->base->key, key, key->size) == 0) { + variant = li->base; + break; + } + } + + if (variant) { + move_to_head(&lp->setup_variants_list, &variant->list_item_global); + } + else { + if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { + cull_setup_variants(lp); + } + + variant = generate_setup_variant(screen, key); + insert_at_head(&lp->setup_variants_list, &variant->list_item_global); + lp->nr_setup_variants++; + } + + lp_setup_set_setup_variant(lp->setup, + variant); +} + +void +lp_delete_setup_variants(struct llvmpipe_context *lp) +{ + struct lp_setup_variant_list_item *li; + li = first_elem(&lp->setup_variants_list); + while(!at_end(&lp->setup_variants_list, li)) { + struct lp_setup_variant_list_item *next = next_elem(li); + remove_setup_variant(lp, li->base); + li = next; + } +} + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]) +{ + int i, slot; + + for (i = 0; i < NUM_CHANNELS; i++) { + float a0 = sa0 [0][i]; + float dadx = sdadx[0][i]; + float dady = sdady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], + a0, dadx, dady); + } + + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned usage_mask = key->inputs[slot].usage_mask; + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = sa0 [1 + slot][i]; + float dadx = sdadx[1 + slot][i]; + float dady = sdady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, + "xyzw"[i], + a0, dadx, dady); + } + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h new file mode 100644 index 0000000000..b0c81baa75 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h @@ -0,0 +1,80 @@ +#ifndef LP_STATE_SETUP_H +#define LP_STATE_SETUP_H + +#include "lp_bld_interp.h" + + +struct llvmpipe_context; +struct lp_setup_variant; + +struct lp_setup_variant_list_item +{ + struct lp_setup_variant *base; + struct lp_setup_variant_list_item *next, *prev; +}; + + +struct lp_setup_variant_key { + unsigned num_inputs:8; + unsigned flatshade_first:1; + unsigned pixel_center_half:1; + unsigned pad:7; + unsigned size:16; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; +}; + + +typedef void (*lp_jit_setup_triangle)( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4] ); + + + + +/* At this stage, for a given variant key, we create a + * draw_vertex_info struct telling the draw module how to format the + * vertices, and an llvm-generated function which calculates the + * attribute interpolants (a0, dadx, dady) from three of those + * vertices. + */ +struct lp_setup_variant { + struct lp_setup_variant_key key; + + struct lp_setup_variant_list_item list_item_global; + + /* XXX: this is a pointer to the LLVM IR. Once jit_function is + * generated, we never need to use the IR again - need to find a + * way to release this data without destroying the generated + * assembly. + */ + LLVMValueRef function; + + /* The actual generated setup function: + */ + lp_jit_setup_triangle jit_function; + + unsigned no; +}; + +void lp_setup_tri_fallback( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4], + const struct lp_setup_variant_key *key ); + +void lp_delete_setup_variants(struct llvmpipe_context *lp); + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 57b0ee5776..816518e508 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -75,10 +75,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); @@ -100,9 +97,10 @@ printv(char* string, v4sf value) f[0], f[1], f[2], f[3]); } -static void +static boolean compare(v4sf x, v4sf y) { + boolean success = TRUE; float *xp = (float *) &x; float *yp = (float *) &y; if (xp[0] != yp[0] || @@ -110,7 +108,9 @@ compare(v4sf x, v4sf y) xp[2] != yp[2] || xp[3] != yp[3]) { printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"); + success = FALSE; } + return success; } @@ -171,9 +171,12 @@ test_round(unsigned verbose, FILE *fp) LLVMDumpModule(module); for (i = 0; i < 3; i++) { + /* NOTE: There are several acceptable rules for x.5 rounding: ceiling, + * nearest even, etc. So we avoid testing such corner cases here. + */ v4sf xvals[3] = { {-10.0, -1, 0, 12.0}, - {-1.5, -0.25, 1.25, 2.5}, + {-1.49, -0.25, 1.25, 2.51}, {-0.99, -0.01, 0.01, 0.99} }; v4sf x = xvals[i]; @@ -191,7 +194,7 @@ test_round(unsigned verbose, FILE *fp) y = round_func(x); printv("C round(x) ", ref); printv("LLVM round(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = trunc(xp[0]); refp[1] = trunc(xp[1]); @@ -200,7 +203,7 @@ test_round(unsigned verbose, FILE *fp) y = trunc_func(x); printv("C trunc(x) ", ref); printv("LLVM trunc(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = floor(xp[0]); refp[1] = floor(xp[1]); @@ -209,7 +212,7 @@ test_round(unsigned verbose, FILE *fp) y = floor_func(x); printv("C floor(x) ", ref); printv("LLVM floor(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = ceil(xp[0]); refp[1] = ceil(xp[1]); @@ -218,7 +221,7 @@ test_round(unsigned verbose, FILE *fp) y = ceil_func(x); printv("C ceil(x) ", ref); printv("LLVM ceil(x) ", y); - compare(ref, y); + success = success && compare(ref, y); } LLVMFreeMachineCodeForFunction(engine, test_round); @@ -247,11 +250,7 @@ test_round(unsigned verbose, FILE *fp) boolean test_all(unsigned verbose, FILE *fp) { - boolean success = TRUE; - - test_round(verbose, fp); - - return success; + return test_round(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 7ab357f162..79939b1a39 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -72,10 +72,7 @@ add_sincos_test(LLVMModuleRef module, boolean sin) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 2ba39052ab..e49f9c62fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -289,172 +289,141 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): print -def generate_ssse3(): +def generate_sse2(): print ''' #if defined(PIPE_ARCH_SSE) #include "util/u_sse.h" -static void -lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, - const uint8_t *src, unsigned src_stride, - unsigned x0, unsigned y0) +static ALWAYS_INLINE void +swz4( const __m128i * restrict x, + const __m128i * restrict y, + const __m128i * restrict z, + const __m128i * restrict w, + __m128i * restrict a, + __m128i * restrict b, + __m128i * restrict c, + __m128i * restrict d) { + __m128i i, j, k, l; + __m128i m, n, o, p; + __m128i e, f, g, h; + + m = _mm_unpacklo_epi8(*x,*y); + n = _mm_unpackhi_epi8(*x,*y); + o = _mm_unpacklo_epi8(*z,*w); + p = _mm_unpackhi_epi8(*z,*w); + + i = _mm_unpacklo_epi16(m,n); + j = _mm_unpackhi_epi16(m,n); + k = _mm_unpacklo_epi16(o,p); + l = _mm_unpackhi_epi16(o,p); + + e = _mm_unpacklo_epi8(i,j); + f = _mm_unpackhi_epi8(i,j); + g = _mm_unpacklo_epi8(k,l); + h = _mm_unpackhi_epi8(k,l); + + *a = _mm_unpacklo_epi64(e,g); + *b = _mm_unpackhi_epi64(e,g); + *c = _mm_unpacklo_epi64(f,h); + *d = _mm_unpackhi_epi64(f,h); +} + +static ALWAYS_INLINE void +unswz4( const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict x, + __m128i * restrict y, + __m128i * restrict z, + __m128i * restrict w) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + + i = _mm_unpacklo_epi8(*a,*b); + j = _mm_unpackhi_epi8(*a,*b); + k = _mm_unpacklo_epi8(*c,*d); + l = _mm_unpackhi_epi8(*c,*d); + + m = _mm_unpacklo_epi16(i,k); + n = _mm_unpackhi_epi16(i,k); + o = _mm_unpacklo_epi16(j,l); + p = _mm_unpackhi_epi16(j,l); + + *x = _mm_unpacklo_epi64(m,n); + *y = _mm_unpackhi_epi64(m,n); + *z = _mm_unpacklo_epi64(o,p); + *w = _mm_unpackhi_epi64(o,p); +} +static void +lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, + const uint8_t * restrict src, unsigned src_stride, + unsigned x0, unsigned y0) +{ + __m128i *dst128 = (__m128i *) dst; unsigned x, y; - __m128i *pdst = (__m128i*) dst; - const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t); - unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH; - unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT; - - const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i line0 = *(__m128i*)ysrc0; - const uint8_t *ysrc = ysrc0 + src_stride; - ysrc0 += tile_stridey; - - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - __m128i r, g, b, a, line1; - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_shuffle_epi8(line0, shuffle00); - g = _mm_shuffle_epi8(line0, shuffle01); - b = _mm_shuffle_epi8(line0, shuffle02); - a = _mm_shuffle_epi8(line0, shuffle03); - - line0 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13)); - - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc -= tile_stridex; - r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23)); - - if (x + 1 < TILE_SIZE) { - line0 = *(__m128i*)ysrc; - ysrc += src_stride; - } - - PIPE_READ_WRITE_BARRIER(); - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33)); - - *pdst++ = r; - *pdst++ = g; - *pdst++ = b; - *pdst++ = a; + + src += y0 * src_stride; + src += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *src_row = src; + + for (x = 0; x < TILE_SIZE; x += 4) { + swz4((const __m128i *) (src_row + 0 * src_stride), + (const __m128i *) (src_row + 1 * src_stride), + (const __m128i *) (src_row + 2 * src_stride), + (const __m128i *) (src_row + 3 * src_stride), + dst128 + 2, /* b */ + dst128 + 1, /* g */ + dst128 + 0, /* r */ + dst128 + 3); /* a */ + + dst128 += 4; + src_row += sizeof(__m128i); } - } + src += 4 * src_stride; + } } static void -lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, - uint8_t *dst, unsigned dst_stride, +lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, + uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0) { unsigned int x, y; - const __m128i *psrc = (__m128i*) src; - const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t)); - uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t); - __m128i c0 = *psrc++; - __m128i c1; - - const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i *tile = (__m128i*) pdst; - pdst += dst_stride * TILE_VECTOR_HEIGHT; - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - uint8_t *linep = (uint8_t*) (tile++); - __m128i line0, line1, line2, line3; - - c1 = *psrc++; /* r */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_shuffle_epi8(c0, shuffle00); - line1 = _mm_shuffle_epi8(c0, shuffle01); - line2 = _mm_shuffle_epi8(c0, shuffle02); - line3 = _mm_shuffle_epi8(c0, shuffle03); - - c0 = *psrc++; /* g */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13)); - - c1 = *psrc++; /* b */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23)); - - if (psrc != end) - c0 = *psrc++; /* a */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33)); - - *(__m128i*) (linep) = line0; - *(__m128i*) (((char*)linep) + dst_stride) = line1; - *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2; - *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3; + const __m128i *src128 = (const __m128i *) src; + + dst += y0 * dst_stride; + dst += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *dst_row = dst; + + for (x = 0; x < TILE_SIZE; x += 4) { + unswz4( &src128[2], /* b */ + &src128[1], /* g */ + &src128[0], /* r */ + &src128[3], /* a */ + (__m128i *) (dst_row + 0 * dst_stride), + (__m128i *) (dst_row + 1 * dst_stride), + (__m128i *) (dst_row + 2 * dst_stride), + (__m128i *) (dst_row + 3 * dst_stride)); + + src128 += 4; + dst_row += sizeof(__m128i);; } + + dst += 4 * dst_stride; } } -#endif /* PIPE_ARCH_SSSE3 */ +#endif /* PIPE_ARCH_SSE */ ''' @@ -479,7 +448,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -517,7 +486,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -577,7 +546,7 @@ def main(): print '};' print - generate_ssse3() + generate_sse2() channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index ebb21a6e5a..a9426df686 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -236,7 +236,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) int ret; ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202, - &screen->channel); + 512*1024, &screen->channel); if (ret) return ret; screen->device = dev; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index ac69c7848e..bf6a577188 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -26,6 +26,9 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +#define nouveau_bo_tile_layout(nvbo) \ + ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) + /* Constant buffer assignment */ #define NV50_CB_PMISC 0 #define NV50_CB_PVP 1 diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 921ed15691..27eb3817bf 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -452,7 +452,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (nvi->opcode == NV_OP_SAT) { mi = nvi->src[0]->value->insn; - if (mi->opcode != NV_OP_ADD || mi->opcode != NV_OP_MAD) + if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD) continue; if (mi->flags_def || mi->def[0]->refc > 1) continue; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3f3166261b..f70c138fe1 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -92,7 +92,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) return 1; } - if (!bo->tile_flags) { + if (!nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index f973cf24b9..0cc2f4a837 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -45,7 +45,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, WAIT_RING (chan, 14); - if (!src_bo->tile_flags) { + if (!nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); @@ -64,7 +64,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, sz); /* copying only 1 zslice per call */ } - if (!dst_bo->tile_flags) { + if (!nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); @@ -95,14 +95,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); - if (src_bo->tile_flags) { + if (nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } - if (dst_bo->tile_flags) { + if (nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); OUT_RING (chan, (dy << 16) | (dx * cpp)); @@ -280,7 +280,7 @@ nv50_upload_sifc(struct nv50_context *nv50, MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */ - if (bo->tile_flags) { + if (nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); OUT_RING (chan, dst_format); OUT_RING (chan, 0); diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 23fdb0820a..13e8beed47 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1558,7 +1558,7 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program_bo* next = fpbo->next; nouveau_bo_unmap(fpbo->bo); nouveau_bo_ref(0, &fpbo->bo); - free(fpbo); + os_free_aligned(fpbo); fpbo = next; } while(fpbo != fp->fpbo); diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 145a7985da..f78fe34790 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,6 +29,7 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, + { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, { "draw", DBG_DRAW, "Log draw calls" }, { "swtcl", DBG_SWTCL, "Log SWTCL-specific info" }, { "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" }, diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index d9d4a9304d..c91532eb7b 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -378,7 +378,8 @@ static void r300_translate_fragment_shader( /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = DBG_ON(r300, DBG_FP); + DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; + DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; compiler.code = &shader->code; compiler.state = shader->compare_state; @@ -395,7 +396,7 @@ static void r300_translate_fragment_shader( find_output_registers(&compiler, shader); - if (compiler.Base.Debug) { + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); } diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 5b0121ce9e..5f34fcb274 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -158,7 +158,7 @@ static void r300_render_condition(struct pipe_context *pipe, uint mode) { struct r300_context *r300 = r300_context(pipe); - uint64_t result; + uint64_t result = 0; boolean wait; if (query) { @@ -167,9 +167,9 @@ static void r300_render_condition(struct pipe_context *pipe, if (!r300_get_query_result(pipe, query, wait, &result)) { r300->skip_rendering = FALSE; + } else { + r300->skip_rendering = result == 0; } - - r300->skip_rendering = result == 0; } else { r300->skip_rendering = FALSE; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7f41ff0e2e..b448924f85 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -124,6 +124,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; /* Texturing. */ diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index dc2bc7e827..8b7f1fab61 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -102,6 +102,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_NO_CBZB (1 << 21) /* Statistics. */ #define DBG_STATS (1 << 24) +#define DBG_P_STAT (1 << 25) /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 03ec127ff7..7e501221b1 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -364,6 +364,7 @@ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { uint32_t result = 0; const struct util_format_description *desc; + unsigned i; desc = util_format_description(format); @@ -371,10 +372,17 @@ r300_translate_vertex_data_type(enum pipe_format format) { return R300_INVALID_FORMAT; } - switch (desc->channel[0].type) { + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { /* Half-floats, floats, doubles */ case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: /* Supported only on RV350 and later. */ if (desc->nr_channels > 2) { @@ -394,7 +402,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { case UTIL_FORMAT_TYPE_UNSIGNED: /* Signed ints */ case UTIL_FORMAT_TYPE_SIGNED: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 8: result = R300_DATA_TYPE_BYTE; break; @@ -413,10 +421,10 @@ r300_translate_vertex_data_type(enum pipe_format format) { return R300_INVALID_FORMAT; } - if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { result |= R300_SIGNED; } - if (desc->channel[0].normalized) { + if (desc->channel[i].normalized) { result |= R300_NORMALIZE; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index a7911c6fcc..cee56bccdc 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -260,16 +260,26 @@ uint32_t r300_translate_texformat(enum pipe_format format, return ~0; /* Unsupported/unknown. */ } + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return ~0; /* Unsupported/unknown. */ + /* And finally, uniform formats. */ - switch (desc->channel[0].type) { + switch (desc->channel[i].type) { case UTIL_FORMAT_TYPE_UNSIGNED: case UTIL_FORMAT_TYPE_SIGNED: - if (!desc->channel[0].normalized && + if (!desc->channel[i].normalized && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { return ~0; } - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 4: switch (desc->nr_channels) { case 2: @@ -303,7 +313,7 @@ uint32_t r300_translate_texformat(enum pipe_format format, return ~0; case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: switch (desc->nr_channels) { case 1: @@ -359,6 +369,11 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + return R300_COLOR_FORMAT_UV88; + case PIPE_FORMAT_B5G6R5_UNORM: return R300_COLOR_FORMAT_RGB565; @@ -443,15 +458,25 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) desc = util_format_description(format); + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return ~0; /* Unsupported/unknown. */ + /* Specifies how the shader output is written to the fog unit. */ - if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { - if (desc->channel[0].size == 32) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + if (desc->channel[i].size == 32) { modifier |= R300_US_OUT_FMT_C4_32_FP; } else { modifier |= R300_US_OUT_FMT_C4_16_FP; } } else { - if (desc->channel[0].size == 16) { + if (desc->channel[i].size == 16) { modifier |= R300_US_OUT_FMT_C4_16; } else { /* C4_8 seems to be used for the formats whose pixel size @@ -468,7 +493,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* Add swizzles and return. */ switch (format) { - /* 8-bit outputs. + /* 8-bit outputs, one channel. * COLORFORMAT_I8 stores the C2 component. */ case PIPE_FORMAT_A8_UNORM: return modifier | R300_C2_SEL_A; @@ -478,6 +503,14 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; + /* 16-bit outputs, two channels. + * COLORFORMAT_UV88 stores C2 and C0. */ + case PIPE_FORMAT_L8A8_UNORM: + return modifier | R300_C0_SEL_A | R300_C2_SEL_R; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + return modifier | R300_C0_SEL_G | R300_C2_SEL_R; + /* BGRA outputs. */ case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM: diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index a49029e1e9..543d0fdc15 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -44,7 +44,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */ {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ }, { @@ -53,7 +53,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */ {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ } }; @@ -368,11 +368,11 @@ static void r300_setup_tiling(struct r300_screen *screen, switch (util_format_get_blocksize(format)) { case 1: case 4: + case 8: desc->microtile = R300_BUFFER_TILED; break; case 2: - case 8: if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { desc->microtile = R300_BUFFER_SQUARETILED; } diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index e2b9af9d01..65696555ac 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -202,7 +202,8 @@ void r300_translate_vertex_shader(struct r300_context *r300, memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = DBG_ON(r300, DBG_VP); + DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; + DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; compiler.code = &vs->code; compiler.UserData = vs; compiler.Base.is_r500 = r300->screen->caps.is_r500; @@ -214,7 +215,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256; compiler.Base.remove_unused_constants = TRUE; - if (compiler.Base.Debug) { + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_VP, "r300: Initial vertex program\n"); tgsi_dump(vs->state.tokens, 0); } diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index 433b7044e5..ede0bb2ec4 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -7,23 +7,18 @@ LIBRARY_INCLUDES = \ $(shell pkg-config libdrm --cflags-only-I) C_SOURCES = \ - r600_buffer.c \ - r600_state2.c \ - evergreen_state.c \ - r600_context.c \ - r600_shader.c \ - r600_draw.c \ + r600_asm.c \ r600_blit.c \ + r600_buffer.c \ r600_helper.c \ + r600_pipe.c \ r600_query.c \ r600_resource.c \ - r600_screen.c \ + r600_shader.c \ r600_state.c \ r600_texture.c \ - r600_asm.c \ r700_asm.c \ - r600_hw_states.c \ - eg_asm.c \ - eg_hw_states.c + evergreen_state.c \ + eg_asm.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 99c8644e02..bf0ad8571b 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -16,19 +16,19 @@ env.Append(CPPPATH = [ r600 = env.ConvenienceLibrary( target = 'r600', source = [ + 'r600_asm.c', 'r600_buffer.c', - 'r600_context.c', - 'r600_draw.c', 'r600_blit.c', 'r600_helper.c', + 'r600_pipe.c', 'r600_query.c', 'r600_resource.c', - 'r600_screen.c', + 'r600_shader.c', 'r600_state.c', 'r600_texture.c', - 'r600_shader.c', - 'r600_asm.c', 'r700_asm.c', + 'evergreen_state.c', + 'eg_asm.c', ]) Export('r600') diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 769f550874..52b7189e9e 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -20,14 +20,13 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_asm.h" -#include "r600_context.h" +#include <stdio.h> +#include <errno.h> #include "util/u_memory.h" +#include "r600_pipe.h" +#include "r600_asm.h" #include "eg_sq.h" #include "r600_opcodes.h" -#include <stdio.h> -#include <errno.h> int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { @@ -73,8 +72,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COND(cf->cond) | - S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); + S_SQ_CF_WORD1_COND(cf->cond) | + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); break; default: diff --git a/src/gallium/drivers/r600/eg_hw_states.c b/src/gallium/drivers/r600/eg_hw_states.c deleted file mode 100644 index ebbc9c3f37..0000000000 --- a/src/gallium/drivers/r600/eg_hw_states.c +++ /dev/null @@ -1,1088 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Dave Airlie - */ -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_blitter.h> -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "eg_state_inlines.h" -#include "evergreend.h" - -#include "eg_states_inc.h" - -static void eg_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - int i; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - rstate->states[EG_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); - rstate->states[EG_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); - rstate->states[EG_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); - rstate->states[EG_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); - rstate->states[EG_BLEND__CB_BLEND0_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND1_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND2_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND3_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND4_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND5_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND6_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND7_CONTROL] = 0x00000000; - - for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028780_BLEND_CONTROL_ENABLE(1); - - bc |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028780_SEPARATE_ALPHA_BLEND(1); - bc |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - rstate->states[EG_BLEND__CB_BLEND0_CONTROL + i] = bc; - } - - radeon_state_pm4(rstate); -} - -static void eg_ucp(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0); - - for (int i = 0; i < state->nr; i++) { - rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); - rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); - rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); - rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); - } - radeon_state_pm4(rstate); -} - -static void eg_cb(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0, cb, 0); - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - rstate->nbo = 1; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_028C70_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - - color_info = S_028C70_FORMAT(format) | - S_028C70_COMP_SWAP(swap) | - S_028C70_BLEND_CLAMP(1) | - S_028C70_SOURCE_FORMAT(1) | - S_028C70_NUMBER_TYPE(ntype); - - rstate->states[EG_CB__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8; - rstate->states[EG_CB__CB_COLOR0_INFO] = color_info; - rstate->states[EG_CB__CB_COLOR0_PITCH] = S_028C64_PITCH_TILE_MAX(pitch); - rstate->states[EG_CB__CB_COLOR0_SLICE] = S_028C68_SLICE_TILE_MAX(slice); - rstate->states[EG_CB__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[EG_CB__CB_COLOR0_ATTRIB] = S_028C74_NON_DISP_TILING_ORDER(1); - - radeon_state_pm4(rstate); -} - -static void eg_db(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[EG_DB__DB_HTILE_DATA_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_READ_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_WRITE_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_STENCIL_READ_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_STENCIL_WRITE_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_INFO] = S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format); - rstate->states[EG_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[EG_DB__DB_DEPTH_SIZE] = S_028058_PITCH_TILE_MAX(pitch); - rstate->states[EG_DB__DB_DEPTH_SLICE] = S_02805C_SLICE_TILE_MAX(slice); - radeon_state_pm4(rstate); -} - -static void eg_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - const struct pipe_clip_state *clip = NULL; - struct r600_screen *rscreen = rctx->screen; - float offset_units = 0, offset_scale = 0; - char depth = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned tmp; - unsigned prov_vtx = 1; - unsigned polygon_dual_mode; - - if (rctx->clip) - clip = &rctx->clip->state.clip; - if (fb->zsbuf) { - offset_units = state->offset_units; - offset_scale = state->offset_scale * 12.0f; - switch (fb->zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return; - } - } - offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - - if (state->flatshade_first) - prov_vtx = 0; - - rctx->flat_shade = state->flatshade; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000000; - if (rctx->flat_shade) - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = 0; - if (clip) { - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); - } - polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || - state->fill_back != PIPE_POLYGON_MODE_FILL); - - rstate->states[EG_RASTERIZER__PA_SU_SC_MODE_CNTL] = - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | - S_028814_POLY_MODE(polygon_dual_mode) | - S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)); - rstate->states[EG_RASTERIZER__PA_CL_VS_OUT_CNTL] = - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); - rstate->states[EG_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - rstate->states[EG_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); - rstate->states[EG_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[EG_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; - rstate->states[EG_RASTERIZER__PA_SU_VTX_CNTL] = 0x00000005; - - rstate->states[EG_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[EG_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - radeon_state_pm4(rstate); -} - -static void eg_scissor(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - struct r600_screen *rscreen = rctx->screen; - unsigned minx, maxx, miny, maxy; - u32 tl, br; - - if (state == NULL) { - minx = 0; - miny = 0; - maxx = fb->cbufs[0]->width; - maxy = fb->cbufs[0]->height; - } else { - minx = state->minx; - miny = state->miny; - maxx = state->maxx; - maxy = state->maxy; - } - tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny); - br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - /* screen scissor has no WINDOW OFFSET */ - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl | S_028204_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - radeon_state_pm4(rstate); -} - -static void eg_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[EG_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - radeon_state_pm4(rstate); -} - -static void eg_dsa(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; - const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; - struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - unsigned db_count_control = 0; - struct r600_shader *rshader; - struct r600_query *rquery = NULL; - boolean query_running; - int i; - - if (rctx->ps_shader == NULL) { - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - db_shader_control = 0; - db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1); - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - - rshader = &rctx->ps_shader->shader; - if (rshader->uses_kill) - db_shader_control |= S_02880C_KILL_ENABLE(1); - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); - } - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ - - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); - } - } - - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - db_render_control = 0; -/// db_render_control = S_028D0C_STENCIL_COMPRESS_DISABLE(1) | -/// S_028D0C_DEPTH_COMPRESS_DISABLE(1); - db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | - S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | - S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); - - query_running = FALSE; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = TRUE; - } - } - - if (query_running) { - db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); - db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1); - } - - rstate->states[EG_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[EG_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[EG_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[EG_DSA__DB_STENCILREFMASK] = stencil_ref_mask; - rstate->states[EG_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; - rstate->states[EG_DSA__SX_ALPHA_REF] = alpha_ref; - // rstate->states[EG_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - // rstate->states[EG_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[EG_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[EG_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[EG_DSA__DB_SHADER_CONTROL] = db_shader_control; - rstate->states[EG_DSA__DB_RENDER_CONTROL] = db_render_control; - rstate->states[EG_DSA__DB_RENDER_OVERRIDE] = db_render_override; - rstate->states[EG_DSA__DB_COUNT_CONTROL] = db_count_control; - rstate->states[EG_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[EG_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[EG_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - radeon_state_pm4(rstate); -} - - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -static void eg_sampler_border(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); - if (uc.ui) { - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX] = id; - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]); - } - radeon_state_pm4(rstate); -} - -static void eg_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS); - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); - /* FIXME LOD it depends on texture base level ... */ - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)); - - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = - S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | -S_03C008_TYPE(1); - radeon_state_pm4(rstate); - -} - - -static void eg_resource(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_screen *rscreen = rctx->screen; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4]; - - rstate->cpm4 = 0; - swizzle[0] = view->swizzle_r; - swizzle[1] = view->swizzle_g; - swizzle[2] = view->swizzle_b; - swizzle[3] = view->swizzle_a; - format = r600_translate_texformat(view->texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - return; - } - desc = util_format_description(view->texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", view->texture->format); - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - tmp = (struct r600_resource_texture*)view->texture; - rbuffer = &tmp->resource; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - - pitch = align(tmp->pitch[0] / tmp->bpt, 8); - - /* FIXME properly handle first level != 0 */ - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = - S_030000_DIM(r600_tex_dim(view->texture->target)) | - S_030000_PITCH((pitch / 8) - 1) | - S_030000_TEX_WIDTH(view->texture->width0 - 1); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD1] = - S_030004_TEX_HEIGHT(view->texture->height0 - 1) | - S_030004_TEX_DEPTH(view->texture->depth0 - 1); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD4] = - word4 | - S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | - S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_REQUEST_SIZE(1) | - S_030010_BASE_LEVEL(view->first_level); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD5] = - S_030014_LAST_LEVEL(view->last_level) | - S_030014_BASE_ARRAY(0) | - S_030014_LAST_ARRAY(0); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD7] = - S_03001C_DATA_FORMAT(format) | - S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE); - radeon_state_pm4(rstate); -} - -static void eg_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask; - int i; - - target_mask = 0; - shader_mask = 0; - color_control = S_028808_MODE(1); - - for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << (i * 4); - } - - if (pbs->logicop_enable) { - color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - - if (pbs->independent_blend_enable) { - for (i = 0; i < nr_cbufs; i++) { - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } - } else { - for (i = 0; i < nr_cbufs; i++) { - target_mask |= (pbs->rt[0].colormask << (4 * i)); - } - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[EG_CB_CNTL__CB_SHADER_MASK] = shader_mask; - rstate->states[EG_CB_CNTL__CB_TARGET_MASK] = target_mask; - rstate->states[EG_CB_CNTL__CB_COLOR_CONTROL] = color_control; - rstate->states[EG_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rstate->states[EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rstate->states[EG_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - - -static void eg_init_config(struct r600_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int hs_prio, cs_prio, ls_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_hs_gprs; - int num_ls_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_hs_threads; - int num_ls_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - int num_hs_stack_entries; - int num_ls_stack_entries; - enum radeon_family family; - - family = radeon_get_family(rctx->rw); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - hs_prio = 0; - ls_prio = 0; - cs_prio = 0; - - switch (family) { - case CHIP_CEDAR: - default: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 16; - num_gs_threads = 16; - num_es_threads = 16; - num_hs_threads = 16; - num_ls_threads = 16; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_REDWOOD: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_JUNIPER: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - } - - radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0); - - rctx->config.states[EG_CONFIG__SQ_CONFIG] = 0x00000000; - switch (family) { - case CHIP_CEDAR: - break; - default: - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); - break; - } - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_EXPORT_SRC_C(1); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_CS_PRIO(cs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_LS_PRIO(ls_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_HS_PRIO(hs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_ES_GPRS(num_es_gprs); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); - - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_ES_THREADS(num_es_threads); - - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_HS_THREADS(num_hs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_LS_THREADS(num_ls_threads); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); - - rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL_1] = S_00913C_VTX_DONE_DELAY(4); - - rctx->config.states[EG_CONFIG__SX_MISC] = 0x00000000; - - rctx->config.states[EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_0] = 0x0; - rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_1] = 0x0; - - rctx->config.states[EG_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3] = 0x00000000; - - rctx->config.states[EG_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_STRMOUT_CONFIG] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config.states[EG_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; -// rctx->config.states[EG_CONFIG__VGT_CACHE_INVALIDATION] = 0x2; -// rctx->config.states[EG_CONFIG__VGT_GS_VERTEX_REUSE] = 0x16; - rctx->config.states[EG_CONFIG__PA_CL_ENHANCE] = (3 << 1) | 1; - - radeon_state_pm4(&rctx->config); -} - -static int eg_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t src_format) -{ - struct radeon_state *vs_resource = &rctx->vs_resource[id]; - struct r600_screen *rscreen = rctx->screen; - unsigned format, num_format = 0, format_comp = 0; - - format = r600_translate_colorformat(src_format); - - r600_translate_vertex_num_format(src_format, &num_format, &format_comp); - format = S_030008_DATA_FORMAT(format) | S_030008_NUM_FORMAT_ALL(num_format) | - S_030008_FORMAT_COMP_ALL(format_comp); - - radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS); - - radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = S_030008_STRIDE(stride) | format; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W); - - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD7] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(vs_resource); -} - -static int eg_draw_vgt_init(struct r600_draw *draw, - int vgt_draw_initiator) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer; - radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0); - draw->draw.states[EG_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[EG_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; - draw->draw.states[EG_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset; - if (rbuffer) { - radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; - } - return radeon_state_pm4(&draw->draw); -} - -static int eg_draw_vgt_prim(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0); - draw->vgt.states[EG_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[EG_VGT__VGT_MAX_VTX_INDX] = draw->max_index; - draw->vgt.states[EG_VGT__VGT_MIN_VTX_INDX] = draw->min_index; - draw->vgt.states[EG_VGT__VGT_INDX_OFFSET] = draw->index_bias; - draw->vgt.states[EG_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[EG_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - return radeon_state_pm4(&draw->vgt); -} - - -static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_rasterizer_state *rasterizer; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE, have_face = FALSE; - - rasterizer = &rctx->rasterizer->state.rasterizer; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(rctx, rshader, i)); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; - - if (rasterizer->sprite_coord_enable & (1 << i)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - state->states[EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; - } - } - exports_ps |= (1 << num_cout); - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - if (have_pos) { - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1); - state->states[EG_PS_SHADER__SPI_INPUT_Z] |= 1; - } - - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] |= S_0286D0_FRONT_FACE_ENA(have_face); - - state->states[EG_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | - S_028844_STACK_SIZE(rshader->bc.nstack); - state->states[EG_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - state->states[EG_PS_SHADER__SPI_BARYC_CNTL] = S_0286E0_PERSP_CENTROID_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(1); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - state->nbo = 1; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static int eg_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - for (i = 0; i < 10; i++) { - state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; - } - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; - } - state->states[EG_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[EG_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028860_NUM_GPRS(rshader->bc.ngpr) | - S_028860_STACK_SIZE(rshader->bc.nstack); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); - state->nbo = 2; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - state->placement[2] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); - -} - -struct r600_context_hw_state_vtbl eg_hw_state_vtbl = { - .blend = eg_blend, - .ucp = eg_ucp, - .cb = eg_cb, - .db = eg_db, - .rasterizer = eg_rasterizer, - .scissor = eg_scissor, - .viewport = eg_viewport, - .dsa = eg_dsa, - .sampler_border = eg_sampler_border, - .sampler = eg_sampler, - .resource = eg_resource, - .cb_cntl = eg_cb_cntl, - .vs_resource = eg_vs_resource, - .vgt_init = eg_draw_vgt_init, - .vgt_prim = eg_draw_vgt_prim, - .vs_shader = eg_vs_shader, - .ps_shader = eg_ps_shader, - .init_config = eg_init_config, -}; - -void eg_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, type, shader_class, size; - struct radeon_state *rstate, *rstates; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - type = R600_STATE_CBUF; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - rstate = &rstates[0]; - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - nconstant = buffer->width0 / 16; - size = ALIGN_DIVUP(nconstant, 16); - - radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); - rstate->states[EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; - rstate->states[EG_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); -} diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 497865a66d..be81c28b43 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "evergreend.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -276,6 +277,14 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format) } } +static inline uint32_t r600_translate_stencilformat(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + return 1; + else + return 0; +} + static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -301,6 +310,12 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_SWAP_STD; /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -338,6 +353,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_028C70_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -382,6 +400,12 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_COLOR_16; + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -419,6 +443,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16_16_FLOAT; case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: return V_028C70_COLOR_16_16; /* 64-bit buffers. */ @@ -453,25 +478,6 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE void r600_translate_vertex_num_format(enum pipe_format format, uint32_t *num_format_p, - uint32_t *format_comp_p) -{ - uint32_t num_format = 0, format_comp = 0; - switch (format) { - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R32G32_SSCALED: - num_format = V_030008_SQ_NUM_FORMAT_SCALED; - format_comp = 1; - break; - default: - break; - } - *num_format_p = num_format; - *format_comp_p = format_comp; -} - static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) { return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; @@ -493,4 +499,173 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) return r600_translate_colorformat(format) != ~0; } +static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) +{ + uint32_t result = 0; + const struct util_format_description *desc; + unsigned i; + + desc = util_format_description(format); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16_FLOAT; + break; + case 2: + result = FMT_16_16_FLOAT; + break; + case 3: + result = FMT_16_16_16_FLOAT; + break; + case 4: + result = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32_FLOAT; + break; + case 2: + result = FMT_32_32_FLOAT; + break; + case 3: + result = FMT_32_32_32_FLOAT; + break; + case 4: + result = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + result = FMT_8; + break; + case 2: + result = FMT_8_8; + break; + case 3: +// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ +// break; + case 4: + result = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16; + break; + case 2: + result = FMT_16_16; + break; + case 3: +// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ +// break; + case 4: + result = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + break; + case 2: + result = FMT_32_32; + break; + case 3: + result = FMT_32_32_32; + break; + case 4: + result = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + result = S_030008_DATA_FORMAT(result); + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= S_030008_FORMAT_COMP_ALL(1); + } + if (desc->channel[i].normalized) { + result |= S_030008_NUM_FORMAT_ALL(0); + } else { + result |= S_030008_NUM_FORMAT_ALL(2); + } + return result; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(format)); + return ~0; +} + +static INLINE uint32_t r600_translate_vertex_data_swizzle(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + uint32_t word3; + + assert(format); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + fprintf(stderr, "r600: Bad format %s in %s:%d\n", + util_format_short_name(format), __FUNCTION__, __LINE__); + return 0; + } + + word3 = 0; + for (i = 0; i < desc->nr_channels; i++) { + switch (i) { + case 0: + word3 |= S_03000C_DST_SEL_X(desc->swizzle[0]); + break; + case 1: + word3 |= S_03000C_DST_SEL_Y(desc->swizzle[1]); + break; + case 2: + word3 |= S_03000C_DST_SEL_Z(desc->swizzle[2]); + break; + case 3: + word3 |= S_03000C_DST_SEL_W(desc->swizzle[3]); + break; + } + } + return word3; +} + #endif diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 0803a5768c..ce34ed4ad3 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -39,12 +39,10 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> +#include <util/u_framebuffer.h> #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "evergreend.h" -struct radeon_state { - unsigned dummy; -}; #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" @@ -60,10 +58,10 @@ static void evergreen_set_blend_color(struct pipe_context *ctx, return; rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; @@ -104,9 +102,9 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } } blend->cb_target_mask = target_mask; - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028808_CB_COLOR_CONTROL, + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, color_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); for (int i = 0; i < 8; i++) { unsigned eqRGB = state->rt[i].rgb_func; @@ -133,7 +131,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } } for (int i = 0; i < 8; i++) { - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL); } return rstate; @@ -214,25 +212,25 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, stencil_ref_mask, 0xFFFFFFFF & C_028430_STENCILREF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); return rstate; } @@ -244,6 +242,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, struct r600_pipe_state *rstate; unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; if (rs == NULL) { return NULL; @@ -253,6 +252,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + /* offset */ rs->offset_units = state->offset_units; rs->offset_scale = state->offset_scale * 12.0f; @@ -271,11 +272,11 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028814_PA_SU_SC_MODE_CNTL, + r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL, S_028814_PROVOKING_VTX_LAST(prov_vtx) | S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | @@ -286,22 +287,23 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02881C_PA_CL_VS_OUT_CNTL, + r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); return rstate; } @@ -347,7 +349,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_SAMPLER, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | @@ -357,17 +359,20 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); /* FIXME LOD it depends on texture base level ... */ - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_SAMPLER, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_SAMPLER, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | S_03C008_TYPE(1), 0xFFFFFFFF, NULL); if (uc.ui) { - /* TODO border color */ + r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); } return rstate; } @@ -406,7 +411,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte unsigned format; uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4]; - struct radeon_ws_bo *bo[2]; + struct r600_bo *bo[2]; if (resource == NULL) return NULL; @@ -424,15 +429,15 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(texture->format, + format = r600_translate_texformat(state->format, swizzle, &word4, &yuv_format); if (format == ~0) { format = 0; } - desc = util_format_description(texture->format); + desc = util_format_description(state->format); if (desc == NULL) { - R600_ERR("unknow format %d\n", texture->format); + R600_ERR("unknow format %d\n", state->format); } tmp = (struct r600_resource_texture*)texture; rbuffer = &tmp->resource; @@ -440,41 +445,37 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte bo[1] = rbuffer->bo; /* FIXME depth texture decompression */ if (tmp->depth) { -#if 0 - r = evergreen_texture_from_depth(ctx, tmp, view->first_level); - if (r) { - return; - } - bo[0] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); - bo[1] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); -#endif + r600_texture_depth_flush(ctx, texture); + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->flushed_depth_texture->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; } - pitch = align(tmp->pitch[0] / tmp->bpt, 8); + pitch = align(tmp->pitch_in_pixels[0], 8); /* FIXME properly handle first level != 0 */ - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030000_RESOURCE0_WORD0, + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030004_RESOURCE0_WORD1, + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, S_030004_TEX_HEIGHT(texture->height0 - 1) | S_030004_TEX_DEPTH(texture->depth0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030008_RESOURCE0_WORD2, - tmp->offset[0] >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_03000C_RESOURCE0_WORD3, - tmp->offset[1] >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030010_RESOURCE0_WORD4, + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_REQUEST_SIZE(1) | S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030014_RESOURCE0_WORD5, + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, S_030014_LAST_LEVEL(state->last_level) | S_030014_BASE_ARRAY(0) | S_030014_LAST_ARRAY(0), 0xffffffff, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_03001C_RESOURCE0_WORD7, + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, S_03001C_DATA_FORMAT(format) | S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); @@ -484,8 +485,14 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { - /* TODO */ - assert(1); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + + for (int i = 0; i < count; i++) { + if (resource[i]) { + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } + } } static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, @@ -493,12 +500,27 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - - for (int i = 0; i < count; i++) { - if (resource[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + int i; + + for (i = 0; i < count; i++) { + if (&rctx->ps_samplers.views[i]->base != views[i]) { + if (resource[i]) + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + else + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + + pipe_sampler_view_reference( + (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], + views[i]); + } + } + for (i = count; i < NUM_TEX_UNITS; i++) { + if (rctx->ps_samplers.views[i]) { + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } + rctx->ps_samplers.n_views = count; } static void evergreen_bind_state(struct pipe_context *ctx, void *state) @@ -517,6 +539,10 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + + memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); + rctx->ps_samplers.n_samplers = count; + for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); } @@ -527,7 +553,6 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - /* TODO implement */ for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); } @@ -542,7 +567,7 @@ static void evergreen_delete_state(struct pipe_context *ctx, void *state) rctx->states[rstate->id] = NULL; } for (int i = 0; i < rstate->nregs; i++) { - radeon_ws_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); } free(rstate); } @@ -570,20 +595,20 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, rctx->clip = *state; rstate->id = R600_PIPE_STATE_CLIP; for (int i = 0; i < state->nr; i++) { - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_0285BC_PA_CL_UCP0_X + i * 4, fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_0285C0_PA_CL_UCP0_Y + i * 4, fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_0285C4_PA_CL_UCP0_Z + i * 4, fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_0285C8_PA_CL_UCP0_W + i * 4, fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); } - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL, + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); @@ -628,51 +653,30 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_SCISSOR; tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028214_PA_SC_CLIPRECT_0_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028218_PA_SC_CLIPRECT_1_TL, tl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_02821C_PA_SC_CLIPRECT_1_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028220_PA_SC_CLIPRECT_2_TL, tl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028224_PA_SC_CLIPRECT_2_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028228_PA_SC_CLIPRECT_3_TL, tl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -692,11 +696,11 @@ static void evergreen_set_stencil_ref(struct pipe_context *ctx, rctx->stencil_ref = *state; rstate->id = R600_PIPE_STATE_STENCIL_REF; tmp = S_028430_STENCILREF(state->ref_value[0]); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, tmp, ~C_028430_STENCILREF, NULL); tmp = S_028434_STENCILREF_BF(state->ref_value[1]); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, tmp, ~C_028434_STENCILREF_BF, NULL); @@ -716,15 +720,15 @@ static void evergreen_set_viewport_state(struct pipe_context *ctx, rctx->viewport = *state; rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); free(rctx->states[R600_PIPE_STATE_VIEWPORT]); rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; @@ -741,7 +745,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned color_info; unsigned format, swap, ntype; const struct util_format_description *desc; - struct radeon_ws_bo *bo[3]; + struct r600_bo *bo[3]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; @@ -749,8 +753,8 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -766,30 +770,30 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state color_info |= S_028C70_SOURCE_FORMAT(1); /* FIXME handle enabling of CB beyond BASE8 which has different offset */ - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - state->cbufs[cb]->offset >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028C64_CB_COLOR0_PITCH + cb * 0x3C, S_028C64_PITCH_TILE_MAX(pitch), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028C68_CB_COLOR0_SLICE + cb * 0x3C, S_028C68_SLICE_TILE_MAX(slice), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, S_028C74_NON_DISP_TILING_ORDER(1), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, bo[0]); } static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, @@ -798,7 +802,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state struct r600_resource_texture *rtex; struct r600_resource *rbuffer; unsigned level; - unsigned pitch, slice, format; + unsigned pitch, slice, format, stencil_format; if (state->zsbuf == NULL) return; @@ -811,23 +815,37 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state rbuffer = &rtex->resource; level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); + stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); + + r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + + if (stencil_format) { + uint32_t stencil_offset; + + stencil_offset = ((state->zsbuf->height * rtex->pitch_in_bytes[level]) + 255) & ~255; + r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + } + + r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028048_DB_Z_READ_BASE, - state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028050_DB_Z_WRITE_BASE, - state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo); -// r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028014_DB_HTILE_DATA_BASE, state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028040_DB_Z_INFO, + r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format), 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028058_DB_DEPTH_SIZE, + r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02805C_DB_DEPTH_SLICE, + r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice), 0xFFFFFFFF, NULL); } @@ -844,14 +862,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); - } - for (int i = 0; i < state->nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + + rctx->pframebuffer = &rctx->framebuffer; /* build states */ for (int i = 0; i < state->nr_cbufs; i++) { @@ -871,26 +885,44 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, tl = S_028240_TL_X(0) | S_028240_TL_Y(0); br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028244_PA_SC_GENERIC_SCISSOR_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, + r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, 0x00000000, target_mask, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02823C_CB_SHADER_MASK, + r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, shader_mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C04_PA_SC_AA_CONFIG, + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0x00000000, 0xFFFFFFFF, NULL); free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); @@ -941,24 +973,24 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, switch (shader) { case PIPE_SHADER_VERTEX: rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - 0, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); break; case PIPE_SHADER_FRAGMENT: rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - 0, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); break; default: @@ -973,8 +1005,7 @@ static void *evergreen_create_shader_state(struct pipe_context *ctx, struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); int r; - shader->shader.use_mem_constant = TRUE; - r = r600_pipe_shader_create2(ctx, shader, state->tokens); + r = r600_pipe_shader_create(ctx, shader, state->tokens); if (r) { return NULL; } @@ -1021,7 +1052,7 @@ static void evergreen_delete_vs_shader(struct pipe_context *ctx, void *state) free(shader); } -void evergreen_init_state_functions2(struct r600_pipe_context *rctx) +void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; @@ -1062,7 +1093,7 @@ void evergreen_init_state_functions2(struct r600_pipe_context *rctx) rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy; } -void evergreen_init_config2(struct r600_pipe_context *rctx) +void evergreen_init_config(struct r600_pipe_context *rctx) { struct r600_pipe_state *rstate = &rctx->config; int ps_prio; @@ -1206,125 +1237,125 @@ void evergreen_init_config2(struct r600_pipe_context *rctx) tmp |= S_008C00_VS_PRIO(vs_prio); tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); tmp |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C18_NUM_PS_THREADS(num_ps_threads); tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads); tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); tmp = 0; tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); - -// r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL); - -// r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONFIG, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); - -r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL, + r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); + +// r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL); + +// r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); + +r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); r600_context_pipe_state_set(&rctx->ctx, rstate); @@ -1336,21 +1367,20 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate; struct r600_resource *rbuffer; - unsigned i, j, offset, format, prim; + unsigned i, j, offset, prim; u32 vgt_dma_index_type, vgt_draw_initiator, mask; struct pipe_vertex_buffer *vertex_buffer; struct r600_draw rdraw; struct r600_pipe_state vgt; struct r600_drawl draw; - assert(info->index_bias == 0); - if (rctx->any_user_vbs) { - r600_upload_user_buffers2(rctx); + r600_upload_user_buffers(rctx); rctx->any_user_vbs = FALSE; } memset(&draw, 0, sizeof(struct r600_drawl)); + draw.ctx = ctx; draw.mode = info->mode; draw.start = info->start; draw.count = info->count; @@ -1360,16 +1390,16 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.max_index = info->max_index; draw.index_bias = info->index_bias; - r600_translate_index_buffer2(rctx, &rctx->index_buffer.buffer, + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, &rctx->index_buffer.index_size, &draw.start, info->count); draw.index_size = rctx->index_buffer.index_size; - draw.index_buffer = rctx->index_buffer.buffer; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); draw.index_buffer_offset = draw.start * draw.index_size; draw.start = 0; - r600_upload_index_buffer2(rctx, &draw); + r600_upload_index_buffer(rctx, &draw); } else { draw.index_size = 0; draw.index_buffer = NULL; @@ -1399,44 +1429,40 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) return; /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update2(&rctx->context, rctx->vs_shader)) + if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) return; - if (r600_pipe_shader_update2(&rctx->context, rctx->ps_shader)) + if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) return; for (i = 0 ; i < rctx->vertex_elements->count; i++) { - unsigned num_format = 0, format_comp = 0; - + uint32_t word3, word2; + uint32_t format; rstate = &rctx->vs_resource[i]; + + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + j = rctx->vertex_elements->elements[i].vertex_buffer_index; vertex_buffer = &rctx->vertex_buffer[j]; rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; + offset = rctx->vertex_elements->elements[i].src_offset + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); - r600_translate_vertex_num_format(rctx->vertex_elements->elements[i].src_format, &num_format, &format_comp); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, - R_030008_RESOURCE0_WORD2, - S_030008_STRIDE(vertex_buffer->stride) | - S_030008_DATA_FORMAT(format) | - S_030008_NUM_FORMAT_ALL(num_format) | - S_030008_FORMAT_COMP_ALL(format_comp), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, - R_03000C_RESOURCE0_WORD3, - S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_RESOURCE, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); + format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + + word2 = format | S_030008_STRIDE(vertex_buffer->stride); + + word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->elements[i].src_format); + + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); evergreen_vs_resource_set(&rctx->ctx, rstate, i); } @@ -1447,11 +1473,13 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) vgt.id = R600_PIPE_STATE_VGT; vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONFIG, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); if (rctx->rasterizer && rctx->framebuffer.zsbuf) { float offset_units = rctx->rasterizer->offset_units; @@ -1476,19 +1504,19 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) return; } offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&vgt, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&vgt, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&vgt, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&vgt, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(&vgt, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, offset_db_fmt_cntl, 0xFFFFFFFF, NULL); } @@ -1505,6 +1533,8 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.indices_bo_offset = draw.index_buffer_offset; } evergreen_context_draw(&rctx->ctx, &rdraw); + + pipe_resource_reference(&draw.index_buffer, NULL); } void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -1512,40 +1542,63 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE, have_face = FALSE; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + int ninterp = 0; + boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; + unsigned spi_baryc_cntl; /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index2(&rctx->vs_shader->shader, rshader, i)); + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + /* evergreen NUM_INTERP only contains values interpolated into the LDS, + POSITION goes via GPRs from the SC so isn't counted */ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; + pos_index = i; + else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + else { + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR || + rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + ninterp++; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + have_linear = TRUE; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + have_perspective = TRUE; + if (rshader->input[i].centroid) + have_centroid = TRUE; + } if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { tmp |= S_028644_PT_SPRITE_TEX(1); } - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } for (i = 0; i < rshader->noutput; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_Z_EXPORT_ENABLE(1), + S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_EXPORT_ENABLE(1), + S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); } exports_ps = 0; num_cout = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { num_cout++; @@ -1557,46 +1610,75 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader exports_ps = 2; } - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); + if (ninterp == 0) { + ninterp = 1; + have_perspective = TRUE; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | + S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | + S_0286CC_LINEAR_GRADIENT_ENA(have_linear); spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1); + if (pos_index != -1) { + spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); spi_input_z |= 1; } - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286CC_SPI_PS_IN_CONTROL_0, + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + spi_baryc_cntl = 0; + if (have_perspective) + spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) | + S_0286E0_PERSP_CENTROID_ENA(have_centroid); + if (have_linear) + spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286D0_SPI_PS_IN_CONTROL_1, - S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, + spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, + 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0286E0_SPI_BARYC_CNTL, + spi_baryc_cntl, + 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | S_028844_STACK_SIZE(rshader->bc.nstack), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028848_SQ_PGM_RESOURCES_2_PS, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, - R_0286E0_SPI_BARYC_CNTL, - S_0286E0_PERSP_CENTROID_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(1), - 0xFFFFFFFF, NULL); if (rshader->uses_kill) { /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, S_02880C_KILL_ENABLE(1), S_02880C_KILL_ENABLE(1), NULL); } + + r600_pipe_state_add_reg(rstate, + R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); } void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -1618,30 +1700,57 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader spi_vs_out_id[i / 4] |= tmp; } for (i = 0; i < 10; i++) { - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_02861C_SPI_VS_OUT_ID_0 + i * 4, spi_vs_out_id[i], 0xFFFFFFFF, NULL); } - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028860_SQ_PGM_RESOURCES_VS, S_028860_NUM_GPRS(rshader->bc.ngpr) | S_028860_STACK_SIZE(rshader->bc.nstack), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_028864_SQ_PGM_RESOURCES_2_VS, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, EVERGREEN_GROUP_CONTEXT, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - 0x00000000, 0xFFFFFFFF, shader->bo); + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) +{ + struct pipe_depth_stencil_alpha_state dsa; + struct r600_pipe_state *rstate; + + memset(&dsa, 0, sizeof(dsa)); + + rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + 0x0, + S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + r600_pipe_state_add_reg(rstate, + R_028000_DB_RENDER_CONTROL, + S_028000_DEPTH_COPY_ENABLE(1) | + S_028000_STENCIL_COPY_ENABLE(1) | + S_028000_COPY_CENTROID(1), + S_028000_DEPTH_COPY_ENABLE(1) | + S_028000_STENCIL_COPY_ENABLE(1) | + S_028000_COPY_CENTROID(1), NULL); + return rstate; } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 486cb29005..8e96f9355e 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -40,6 +40,9 @@ #define EVERGREEN_SAMPLER_OFFSET 0X0003C000 #define EVERGREEN_SAMPLER_END 0X0003CFF0 +#define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 +#define EVERGREEN_CTL_CONST_END 0x0003E200 + #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 @@ -683,6 +686,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -981,9 +987,6 @@ #define S_030010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) #define G_030010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) #define C_030010_ENDIAN_SWAP 0xFFFFCFFF -#define S_030010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_030010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_030010_REQUEST_SIZE 0xFFFF3FFF #define S_030010_DST_SEL_X(x) (((x) & 0x7) << 16) #define G_030010_DST_SEL_X(x) (((x) >> 16) & 0x7) #define C_030010_DST_SEL_X 0xFFF8FFFF @@ -1047,40 +1050,6 @@ #define S_030008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_030008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_030008_DATA_FORMAT 0xFC0FFFFF -#define V_030008_COLOR_INVALID 0x00000000 -#define V_030008_COLOR_8 0x00000001 -#define V_030008_COLOR_4_4 0x00000002 -#define V_030008_COLOR_3_3_2 0x00000003 -#define V_030008_COLOR_16 0x00000005 -#define V_030008_COLOR_16_FLOAT 0x00000006 -#define V_030008_COLOR_8_8 0x00000007 -#define V_030008_COLOR_5_6_5 0x00000008 -#define V_030008_COLOR_6_5_5 0x00000009 -#define V_030008_COLOR_1_5_5_5 0x0000000A -#define V_030008_COLOR_4_4_4_4 0x0000000B -#define V_030008_COLOR_5_5_5_1 0x0000000C -#define V_030008_COLOR_32 0x0000000D -#define V_030008_COLOR_32_FLOAT 0x0000000E -#define V_030008_COLOR_16_16 0x0000000F -#define V_030008_COLOR_16_16_FLOAT 0x00000010 -#define V_030008_COLOR_8_24 0x00000011 -#define V_030008_COLOR_8_24_FLOAT 0x00000012 -#define V_030008_COLOR_24_8 0x00000013 -#define V_030008_COLOR_24_8_FLOAT 0x00000014 -#define V_030008_COLOR_10_11_11 0x00000015 -#define V_030008_COLOR_10_11_11_FLOAT 0x00000016 -#define V_030008_COLOR_11_11_10 0x00000017 -#define V_030008_COLOR_11_11_10_FLOAT 0x00000018 -#define V_030008_COLOR_2_10_10_10 0x00000019 -#define V_030008_COLOR_8_8_8_8 0x0000001A -#define V_030008_COLOR_10_10_10_2 0x0000001B -#define V_030008_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_030008_COLOR_32_32 0x0000001D -#define V_030008_COLOR_32_32_FLOAT 0x0000001E -#define V_030008_COLOR_16_16_16_16 0x0000001F -#define V_030008_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_030008_COLOR_32_32_32_32 0x00000022 -#define V_030008_COLOR_32_32_32_32_FLOAT 0x00000023 #define S_030008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_030008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_030008_NUM_FORMAT_ALL 0xF3FFFFFF @@ -1424,8 +1393,16 @@ #define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x00008C0C #define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x00008D8C #define R_028000_DB_RENDER_CONTROL 0x00028000 +#define S_028000_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) +#define S_028000_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) +#define S_028000_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) +#define S_028000_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) +#define S_028000_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) #define S_028000_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) #define S_028000_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) +#define S_028000_COPY_CENTROID(x) (((x) & 0x1) << 7) +#define S_028000_COPY_SAMPLE(x) (((x) & 0x7) << 8) +#define S_028000_COLOR_DISABLE(x) (((x) & 0x1) << 12) #define R_028004_DB_COUNT_CONTROL 0x00028004 #define S_028004_ZPASS_INCREMENT_DISABLE (((x) & 0x1) << 0) #define S_028004_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 1) @@ -1724,7 +1701,7 @@ #define R_028CA8_CB_COLOR1_VIEW 0x00028CA8 #define R_028CAC_CB_COLOR1_INFO 0x00028CAC #define R_028CB0_CB_COLOR1_ATTRIB 0x00028CB0 -#define R_028CB8_CB_COLOR1_DIM 0x00028CB8 +#define R_028CB4_CB_COLOR1_DIM 0x00028CB4 #define R_028CD8_CB_COLOR2_BASE 0x00028CD8 #define R_028CDC_CB_COLOR2_PITCH 0x00028CDC #define R_028CE0_CB_COLOR2_SLICE 0x00028CE0 @@ -1849,9 +1826,18 @@ #define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) #define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) #define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF -#define S_0085F0_CR_DEST_BASE_ENA(x) (((x) & 0x1) << 15) -#define G_0085F0_CR_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) -#define C_0085F0_CR_DEST_BASE_ENA 0xFFFF7FFF +#define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15) +#define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) + +#define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16) +#define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1) + +#define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17) +#define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1) + +#define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18) +#define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1) + #define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) #define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) #define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF @@ -1882,4 +1868,8 @@ #define R_008970_VGT_NUM_INDICES 0x008970 #define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 +#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 +#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 + +#define R_03A200_SQ_LOOP_CONST_0 0x3A200 #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index b8c74675e6..62d983269f 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -26,6 +26,7 @@ #ifndef R600_H #define R600_H +#include <assert.h> #include <stdint.h> #include <stdio.h> #include <util/u_double_list.h> @@ -98,55 +99,42 @@ enum chip_class { EVERGREEN, }; +struct r600_tiling_info { + unsigned num_channels; + unsigned num_banks; + unsigned group_bytes; +}; + enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); -/* lowlevel WS bo */ -struct radeon_ws_bo; -struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, +/* r600_bo.c */ +struct r600_bo; +struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned usage); -struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon, - unsigned handle); -void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx); -void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); -void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, - struct radeon_ws_bo *src); +struct r600_bo *r600_bo_handle(struct radeon *radeon, + unsigned handle, unsigned *array_mode); +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); +void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); +void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, + struct r600_bo *src); +static INLINE unsigned r600_bo_offset(struct r600_bo *bo) +{ + return 0; +} + /* R600/R700 STATES */ #define R600_GROUP_MAX 16 #define R600_BLOCK_MAX_BO 32 #define R600_BLOCK_MAX_REG 128 -enum r600_group_id { - R600_GROUP_CONFIG = 0, - R600_GROUP_CONTEXT, - R600_GROUP_ALU_CONST, - R600_GROUP_RESOURCE, - R600_GROUP_SAMPLER, - R600_GROUP_CTL_CONST, - R600_GROUP_LOOP_CONST, - R600_GROUP_BOOL_CONST, - R600_NGROUPS -}; - -enum evergreen_group_id { - EVERGREEN_GROUP_CONFIG = 0, - EVERGREEN_GROUP_CONTEXT, - EVERGREEN_GROUP_RESOURCE, - EVERGREEN_GROUP_SAMPLER, - EVERGREEN_GROUP_CTL_CONST, - EVERGREEN_GROUP_LOOP_CONST, - EVERGREEN_GROUP_BOOL_CONST, - EVERGREEN_GROUP_SAMPLER_BORDER, - EVERGREEN_NGROUPS -}; - struct r600_pipe_reg { - unsigned group_id; u32 offset; u32 mask; u32 value; - struct radeon_ws_bo *bo; + struct r600_bo *bo; }; struct r600_pipe_state { @@ -156,11 +144,9 @@ struct r600_pipe_state { }; static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state, - unsigned group_id, u32 offset, - u32 value, u32 mask, - struct radeon_ws_bo *bo) + u32 offset, u32 value, u32 mask, + struct r600_bo *bo) { - state->regs[state->nregs].group_id = group_id; state->regs[state->nregs].offset = offset; state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; @@ -173,15 +159,18 @@ static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state, #define R600_BLOCK_STATUS_DIRTY (1 << 1) struct r600_block_reloc { - struct radeon_ws_bo *bo; - unsigned nreloc; - unsigned bo_pm4_index[R600_BLOCK_MAX_BO]; + struct r600_bo *bo; + unsigned flush_flags; + unsigned flush_mask; + unsigned bo_pm4_index; }; -struct r600_group_block { +struct r600_block { + struct list_head list; unsigned status; unsigned start_offset; unsigned pm4_ndwords; + unsigned pm4_flush_ndwords; unsigned nbo; unsigned nreg; u32 *reg; @@ -190,12 +179,10 @@ struct r600_group_block { struct r600_block_reloc reloc[R600_BLOCK_MAX_BO]; }; -struct r600_group { +struct r600_range { unsigned start_offset; unsigned end_offset; - unsigned nblocks; - struct r600_group_block *blocks; - unsigned *offset_block_id; + struct r600_block **blocks; }; /* @@ -223,7 +210,7 @@ struct r600_query { /* if we've flushed the query */ unsigned state; /* The buffer where query results are stored. */ - struct radeon_ws_bo *buffer; + struct r600_bo *buffer; unsigned buffer_size; /* linked list of queries */ struct list_head list; @@ -236,8 +223,12 @@ struct r600_query { struct r600_context { struct radeon *radeon; - unsigned ngroups; - struct r600_group groups[R600_GROUP_MAX]; + unsigned hash_size; + unsigned hash_shift; + struct r600_range range[256]; + unsigned nblocks; + struct r600_block **blocks; + struct list_head dirty; unsigned pm4_ndwords; unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; @@ -249,6 +240,10 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; + unsigned fence; + struct list_head fenced_bo; + unsigned *cfence; + struct r600_bo *fence_bo; }; struct r600_draw { @@ -257,7 +252,7 @@ struct r600_draw { u32 vgt_index_type; u32 vgt_draw_initiator; u32 indices_bo_offset; - struct radeon_ws_bo *indices; + struct r600_bo *indices; }; int r600_context_init(struct r600_context *ctx, struct radeon *radeon); @@ -278,6 +273,8 @@ boolean r600_context_query_result(struct r600_context *ctx, boolean wait, void *vresult); void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); +void r600_context_queries_suspend(struct r600_context *ctx); +void r600_context_queries_resume(struct r600_context *ctx); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index fcdcad3edf..d13da0ef63 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -20,14 +20,13 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_context.h" +#include <stdio.h> +#include <errno.h> #include "util/u_memory.h" +#include "r600_pipe.h" #include "r600_sq.h" #include "r600_opcodes.h" #include "r600_asm.h" -#include <stdio.h> -#include <errno.h> static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) { @@ -466,8 +465,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->cf_last->ndw += 2; bc->ndw += 2; - if (bc->use_mem_constant) - bc->cf_last->kcache0_mode = 2; + bc->cf_last->kcache0_mode = 2; /* process cur ALU instructions for bank swizzle */ if (alu->last) { @@ -602,7 +600,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | + S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | + S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | + S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | + S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 6aadf72957..bebc7c15b0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -58,7 +58,7 @@ struct r600_bc_alu { unsigned bank_swizzle; unsigned bank_swizzle_force; u32 value[4]; - int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; }; struct r600_bc_tex { @@ -101,6 +101,11 @@ struct r600_bc_vtx { unsigned dst_sel_y; unsigned dst_sel_z; unsigned dst_sel_w; + unsigned use_const_fields; + unsigned data_format; + unsigned num_format_all; + unsigned format_comp_all; + unsigned srf_mode_all; }; struct r600_bc_output { @@ -160,7 +165,6 @@ struct r600_cf_callstack { struct r600_bc { enum radeon_family family; int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ - unsigned use_mem_constant; struct list_head cf; struct r600_bc_cf *cf_last; unsigned ndw; @@ -176,6 +180,10 @@ struct r600_bc { struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH]; }; +/* eg_asm.c */ +int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); + +/* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); @@ -186,4 +194,7 @@ int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +/* r700_asm.c */ +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); + #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 357776c55e..50d47060c1 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Marek Olšák <maraeo@gmail.com> + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,68 +19,105 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Marek Olšák */ -#include <errno.h> -#include <pipe/p_screen.h> +#include <util/u_surface.h> #include <util/u_blitter.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include "util/u_surface.h" -#include "r600_screen.h" -#include "r600_context.h" +#include <util/u_format.h> +#include "r600_pipe.h" + +enum r600_blitter_op /* bitmask */ +{ + R600_CLEAR = 1, + R600_CLEAR_SURFACE = 2, + R600_COPY = 4 +}; -static void r600_blitter_save_states(struct pipe_context *ctx) +static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - util_blitter_save_blend(rctx->blitter, rctx->blend); - util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa); - if (rctx->stencil_ref) { - util_blitter_save_stencil_ref(rctx->blitter, - &rctx->stencil_ref->state.stencil_ref); + r600_context_queries_suspend(&rctx->ctx); + + util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); + util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); + if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { + util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); } - util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer); + util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]); util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); - if (rctx->viewport) { - util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport); + if (rctx->states[R600_PIPE_STATE_VIEWPORT]) { + util_blitter_save_viewport(rctx->blitter, &rctx->viewport); } - if (rctx->clip) { - util_blitter_save_clip(rctx->blitter, &rctx->clip->state.clip); + if (rctx->states[R600_PIPE_STATE_CLIP]) { + util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, - rctx->vertex_buffer); - - /* remove ptr so they don't get deleted */ - rctx->blend = NULL; - rctx->clip = NULL; - rctx->vs_shader = NULL; - rctx->ps_shader = NULL; - rctx->rasterizer = NULL; - rctx->dsa = NULL; + util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); + rctx->vertex_elements = NULL; - /* suspend queries */ - r600_queries_suspend(ctx); + if (op & (R600_CLEAR_SURFACE | R600_COPY)) + util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); + + if (op & R600_COPY) { + util_blitter_save_fragment_sampler_states( + rctx->blitter, rctx->ps_samplers.n_samplers, + (void**)rctx->ps_samplers.samplers); + + util_blitter_save_fragment_sampler_views( + rctx->blitter, rctx->ps_samplers.n_views, + (struct pipe_sampler_view**)rctx->ps_samplers.views); + } + +} + +static void r600_blitter_end(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_context_queries_resume(&rctx->ctx); +} + +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_surface *zsurf, *cbsurf; + int level = 0; + float depth = 1.0f; + + zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, + PIPE_BIND_DEPTH_STENCIL); + + cbsurf = ctx->screen->get_tex_surface(ctx->screen, + (struct pipe_resource*)texture->flushed_depth_texture, + 0, level, 0, PIPE_BIND_RENDER_TARGET); + + if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + depth = 0.0f; + + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); + util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); + r600_blitter_end(ctx); + + pipe_surface_reference(&zsurf, NULL); + pipe_surface_reference(&cbsurf, NULL); + + + return 0; } static void r600_clear(struct pipe_context *ctx, unsigned buffers, const float *rgba, double depth, unsigned stencil) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_blitter_save_states(ctx); + r600_blitter_begin(ctx, R600_CLEAR); util_blitter_clear(rctx->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } static void r600_clear_render_target(struct pipe_context *ctx, @@ -89,16 +126,12 @@ static void r600_clear_render_target(struct pipe_context *ctx, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } static void r600_clear_depth_stencil(struct pipe_context *ctx, @@ -109,19 +142,35 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } + +/* Copy a block of pixels from one surface to another using HW. */ +static void r600_hw_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned srcx, unsigned srcy, unsigned srcz, + unsigned width, unsigned height) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_blitter_begin(ctx, R600_COPY); + util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height, + TRUE); + r600_blitter_end(ctx); +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_subresource subdst, @@ -131,79 +180,22 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned srcx, unsigned srcy, unsigned srcz, unsigned width, unsigned height) { - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); -} + boolean is_depth; + /* there is something wrong with depth resource copies at the moment so avoid them for now */ + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) + util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + else + r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); -static void *r600_create_db_flush_dsa(struct r600_context *rctx) -{ - struct r600_screen *rscreen = rctx->screen; - struct pipe_depth_stencil_alpha_state dsa; - struct r600_context_state *state; - boolean quirk = false; - enum radeon_family family; - - family = radeon_get_family(rscreen->rw); - if (family == CHIP_RV610 || family == CHIP_RV630 || family == CHIP_RV620 || - family == CHIP_RV635) - quirk = true; - - memset(&dsa, 0, sizeof(dsa)); - - if (quirk) { - dsa.depth.enabled = 1; - dsa.depth.func = PIPE_FUNC_LEQUAL; - dsa.stencil[0].enabled = 1; - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR; - dsa.stencil[0].writemask = 0xff; - } - - state = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); - state->flags |= R600_STATE_FLAG_DSA_FLUSH; - return state; - } -void r600_init_blit_functions(struct r600_context *rctx) +void r600_init_blit_functions(struct r600_pipe_context *rctx) { rctx->context.clear = r600_clear; rctx->context.clear_render_target = r600_clear_render_target; rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; - - /* create a custom depth stencil for DB flush */ - rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); -} - -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) -{ - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = rctx->pframebuffer; - struct pipe_surface *zsurf, *cbsurf; - int level = 0; - float depth = 1.0f; - - zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, - PIPE_BIND_DEPTH_STENCIL); - - cbsurf = ctx->screen->get_tex_surface(ctx->screen, texture->flushed_depth_texture, 0, level, 0, - PIPE_BIND_RENDER_TARGET); - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); - - if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || - rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) - depth = 0.0f; - - util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); - - pipe_surface_reference(&zsurf, NULL); - pipe_surface_reference(&cbsurf, NULL); - - /* resume queries */ - r600_queries_resume(ctx); - return 0; } diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 1621b2ab63..455aa2e81f 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -31,9 +31,10 @@ #include <util/u_memory.h> #include <util/u_upload_mgr.h> #include "state_tracker/drm_driver.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" +#include <xf86drm.h> +#include "radeon_drm.h" +#include "r600.h" +#include "r600_pipe.h" extern struct u_resource_vtbl r600_buffer_vtbl; @@ -42,23 +43,23 @@ u32 r600_domain_from_usage(unsigned usage) u32 domain = RADEON_GEM_DOMAIN_GTT; if (usage & PIPE_BIND_RENDER_TARGET) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } if (usage & PIPE_BIND_DEPTH_STENCIL) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } if (usage & PIPE_BIND_SAMPLER_VIEW) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } /* also need BIND_BLIT_SOURCE/DESTINATION ? */ if (usage & PIPE_BIND_VERTEX_BUFFER) { - domain |= RADEON_GEM_DOMAIN_GTT; + domain |= RADEON_GEM_DOMAIN_GTT; } if (usage & PIPE_BIND_INDEX_BUFFER) { - domain |= RADEON_GEM_DOMAIN_GTT; + domain |= RADEON_GEM_DOMAIN_GTT; } if (usage & PIPE_BIND_CONSTANT_BUFFER) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } return domain; @@ -67,9 +68,8 @@ u32 r600_domain_from_usage(unsigned usage) struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { - struct r600_screen *rscreen = r600_screen(screen); struct r600_resource_buffer *rbuffer; - struct radeon_ws_bo *bo; + struct r600_bo *bo; /* XXX We probably want a different alignment for buffers and textures. */ unsigned alignment = 4096; @@ -86,7 +86,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->r.base.vtbl = &r600_buffer_vtbl; rbuffer->r.size = rbuffer->r.base.b.width0; rbuffer->r.domain = r600_domain_from_usage(rbuffer->r.base.b.bind); - bo = radeon_ws_bo(rscreen->rw, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind); + bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind); if (bo == NULL) { FREE(rbuffer); return NULL; @@ -127,10 +127,9 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r600_resource_buffer *rbuffer = r600_buffer(buf); - struct r600_screen *rscreen = r600_screen(screen); if (rbuffer->r.bo) { - radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL); + r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } FREE(rbuffer); } @@ -139,7 +138,6 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - struct r600_screen *rscreen = r600_screen(pipe->screen); int write = 0; uint8_t *data; int i; @@ -155,9 +153,9 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, flush = TRUE; if (flush) { - radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL); + r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); rbuffer->num_ranges = 0; - rbuffer->r.bo = radeon_ws_bo(rscreen->rw, + rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys, rbuffer->r.base.b.width0, 0, rbuffer->r.base.b.bind); break; @@ -170,7 +168,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, if (transfer->usage & PIPE_TRANSFER_WRITE) { write = 1; } - data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, pipe); + data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); if (!data) return NULL; @@ -181,10 +179,9 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - struct r600_screen *rscreen = r600_screen(pipe->screen); if (rbuffer->r.bo) - radeon_ws_bo_unmap(rscreen->rw, rbuffer->r.bo); + r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, @@ -228,16 +225,16 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, { struct radeon *rw = (struct radeon*)screen->winsys; struct r600_resource *rbuffer; - struct radeon_ws_bo *bo = NULL; + struct r600_bo *bo = NULL; - bo = radeon_ws_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle, NULL); if (bo == NULL) { return NULL; } rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { - radeon_ws_bo_reference(rw, &bo, NULL); + r600_bo_reference(rw, &bo, NULL); return NULL; } @@ -262,8 +259,7 @@ struct u_resource_vtbl r600_buffer_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -int r600_upload_index_buffer(struct r600_context *rctx, - struct r600_draw *draw) +int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { struct pipe_resource *upload_buffer = NULL; unsigned index_offset = draw->index_buffer_offset; @@ -280,14 +276,17 @@ int r600_upload_index_buffer(struct r600_context *rctx, goto done; } draw->index_buffer_offset = index_offset; - draw->index_buffer = upload_buffer; + + /* Transfer ownership. */ + pipe_resource_reference(&draw->index_buffer, upload_buffer); + pipe_resource_reference(&upload_buffer, NULL); } done: return ret; } -int r600_upload_user_buffers(struct r600_context *rctx) +int r600_upload_user_buffers(struct r600_pipe_context *rctx) { enum pipe_error ret = PIPE_OK; int i, nr; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c deleted file mode 100644 index 091751e93a..0000000000 --- a/src/gallium/drivers/r600/r600_context.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ -#include <stdio.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_upload_mgr.h> -#include <util/u_blitter.h> -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" - -static void r600_destroy_context(struct pipe_context *context) -{ - struct r600_context *rctx = r600_context(context); - - rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); - rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple); - rctx->scissor = r600_context_state_decref(rctx->scissor); - rctx->clip = r600_context_state_decref(rctx->clip); - rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); - rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); - rctx->depth = r600_context_state_decref(rctx->depth); - rctx->stencil = r600_context_state_decref(rctx->stencil); - rctx->alpha = r600_context_state_decref(rctx->alpha); - rctx->dsa = r600_context_state_decref(rctx->dsa); - rctx->blend = r600_context_state_decref(rctx->blend); - rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref); - rctx->viewport = r600_context_state_decref(rctx->viewport); - rctx->framebuffer = r600_context_state_decref(rctx->framebuffer); - - free(rctx->ps_constant); - free(rctx->vs_constant); - free(rctx->vs_resource); - - util_blitter_destroy(rctx->blitter); - - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); - - radeon_ctx_fini(rctx->ctx); - FREE(rctx); -} - -void r600_flush(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = NULL; -#if 0 - static int dc = 0; - char dname[256]; -#endif - - /* flush upload buffers */ - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - - /* suspend queries */ - r600_queries_suspend(ctx); - - -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 20) { - radeon_ctx_dump_bof(rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif - - radeon_ctx_submit(rctx->ctx); - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - rquery->flushed = TRUE; - } - - radeon_ctx_clear(rctx->ctx); - /* resume queries */ - r600_queries_resume(ctx); -} - -struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) -{ - struct r600_context *rctx = CALLOC_STRUCT(r600_context); - struct r600_screen* rscreen = r600_screen(screen); - - if (rctx == NULL) - return NULL; - rctx->context.winsys = rscreen->screen.winsys; - rctx->context.screen = screen; - rctx->context.priv = priv; - rctx->context.destroy = r600_destroy_context; - rctx->context.draw_vbo = r600_draw_vbo; - rctx->context.flush = r600_flush; - - /* Easy accessing of screen/winsys. */ - rctx->screen = rscreen; - rctx->rw = rscreen->rw; - - if (radeon_get_family_class(rscreen->rw) == EVERGREEN) - rctx->vtbl = &eg_hw_state_vtbl; - else - rctx->vtbl = &r600_hw_state_vtbl; - - r600_init_query_functions(rctx); - r600_init_state_functions(rctx); - r600_init_context_resource_functions(rctx); - - r600_init_blit_functions(rctx); - - rctx->blitter = util_blitter_create(&rctx->context); - if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - rctx->vtbl->init_config(rctx); - - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - goto out_free; - } - - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { - goto out_free; - } - - rctx->vs_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state)); - if (!rctx->vs_constant) { - goto out_free; - } - - rctx->ps_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state)); - if (!rctx->ps_constant) { - goto out_free; - } - - rctx->vs_resource = (struct radeon_state *)calloc(R600_MAX_RESOURCE, sizeof(struct radeon_state)); - if (!rctx->vs_resource) { - goto out_free; - } - - rctx->ctx = radeon_ctx_init(rscreen->rw); - radeon_draw_init(&rctx->draw, rscreen->rw); - r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; - return &rctx->context; - out_free: - FREE(rctx); - return NULL; -} diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h deleted file mode 100644 index d104531d36..0000000000 --- a/src/gallium/drivers/r600/r600_context.h +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_CONTEXT_H -#define R600_CONTEXT_H - -#include <stdio.h> -#include <pipe/p_state.h> -#include <pipe/p_context.h> -#include <tgsi/tgsi_scan.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_util.h> -#include <util/u_blitter.h> -#include <util/u_double_list.h> -#include "radeon.h" -#include "r600_shader.h" - -struct u_upload_mgr; - -#define R600_QUERY_STATE_STARTED (1 << 0) -#define R600_QUERY_STATE_ENDED (1 << 1) -#define R600_QUERY_STATE_SUSPENDED (1 << 2) - -struct r600_query { - u64 result; - /* The kind of query. Currently only OQ is supported. */ - unsigned type; - /* How many results have been written, in dwords. It's incremented - * after end_query and flush. */ - unsigned num_results; - /* if we've flushed the query */ - boolean flushed; - unsigned state; - /* The buffer where query results are stored. */ - struct radeon_ws_bo *buffer; - unsigned buffer_size; - /* linked list of queries */ - struct list_head list; - struct radeon_state rstate; -}; - -/* XXX move this to a more appropriate place */ -union pipe_states { - struct pipe_rasterizer_state rasterizer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct pipe_clip_state clip; - struct pipe_shader_state shader; - struct pipe_depth_state depth; - struct pipe_stencil_state stencil; - struct pipe_alpha_state alpha; - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_stencil_ref stencil_ref; - struct pipe_framebuffer_state framebuffer; - struct pipe_sampler_state sampler; - struct pipe_sampler_view sampler_view; - struct pipe_viewport_state viewport; -}; - -enum pipe_state_type { - pipe_rasterizer_type = 1, - pipe_poly_stipple_type, - pipe_scissor_type, - pipe_clip_type, - pipe_shader_type, - pipe_depth_type, - pipe_stencil_type, - pipe_alpha_type, - pipe_dsa_type, - pipe_blend_type, - pipe_stencil_ref_type, - pipe_framebuffer_type, - pipe_sampler_type, - pipe_sampler_view_type, - pipe_viewport_type, - pipe_type_count -}; - -#define R600_MAX_RSTATE 16 -#define R600_STATE_FLAG_DSA_FLUSH 1 - -struct r600_context_state { - union pipe_states state; - unsigned refcount; - unsigned type; - struct radeon_state rstate[R600_MAX_RSTATE]; - struct r600_shader shader; - struct radeon_ws_bo *bo; - unsigned nrstate; - unsigned flags; -}; - -struct r600_vertex_element -{ - unsigned refcount; - unsigned count; - struct pipe_vertex_element elements[32]; -}; - -struct r600_draw { - struct pipe_context *ctx; - struct radeon_state draw; - struct radeon_state vgt; - unsigned mode; - unsigned start; - unsigned count; - unsigned index_size; - struct pipe_resource *index_buffer; - unsigned index_buffer_offset; - unsigned min_index, max_index; - unsigned index_bias; -}; - -struct r600_context_hw_states { - struct radeon_state rasterizer; - struct radeon_state scissor; - struct radeon_state dsa; - struct radeon_state cb_cntl; - - struct radeon_state db_flush; - struct radeon_state cb_flush; -}; - -#define R600_MAX_CONSTANT 256 /* magic */ -#define R600_MAX_RESOURCE 160 /* magic */ - -struct r600_shader_sampler_states { - unsigned nsampler; - unsigned nview; - unsigned nborder; - struct radeon_state *sampler[PIPE_MAX_ATTRIBS]; - struct radeon_state *view[PIPE_MAX_ATTRIBS]; - struct radeon_state *border[PIPE_MAX_ATTRIBS]; -}; - -struct r600_context; -struct r600_screen; -struct r600_resource; -struct r600_resource_texture; - -struct r600_context_hw_state_vtbl { - void (*blend)(struct r600_context *rctx, - struct radeon_state *rstate, - const struct pipe_blend_state *state); - void (*ucp)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state); - void (*cb)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb); - void (*db)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state); - void (*rasterizer)(struct r600_context *rctx, struct radeon_state *rstate); - void (*scissor)(struct r600_context *rctx, struct radeon_state *rstate); - void (*viewport)(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state); - void (*dsa)(struct r600_context *rctx, struct radeon_state *rstate); - void (*sampler_border)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id); - void (*sampler)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id); - void (*resource)(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id); - void (*cb_cntl)(struct r600_context *rctx, struct radeon_state *rstate); - int (*vs_resource)(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t format); - int (*vgt_init)(struct r600_draw *draw, - int vgt_draw_initiator); - int (*vgt_prim)(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type); - - int (*ps_shader)(struct r600_context *rctx, struct r600_context_state *rshader, - struct radeon_state *state); - int (*vs_shader)(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state); - void (*init_config)(struct r600_context *rctx); -}; -extern struct r600_context_hw_state_vtbl r600_hw_state_vtbl; -extern struct r600_context_hw_state_vtbl eg_hw_state_vtbl; - -struct r600_context { - struct pipe_context context; - struct blitter_context *blitter; - struct pipe_framebuffer_state *pframebuffer; - unsigned family; - void *custom_dsa_flush; - struct list_head query_list; - struct r600_screen *screen; - struct radeon *rw; - struct radeon_ctx *ctx; - struct radeon_draw draw; - struct r600_context_hw_state_vtbl *vtbl; - struct radeon_state config; - boolean use_mem_constant; - /* FIXME get rid of those vs_resource,vs/ps_constant */ - struct radeon_state *vs_resource; - unsigned vs_nresource; - struct radeon_state *vs_constant; - struct radeon_state *ps_constant; - /* hw states */ - struct r600_context_hw_states hw_states; - /* pipe states */ - unsigned flat_shade; - - unsigned nvertex_buffer; - struct r600_context_state *rasterizer; - struct r600_context_state *poly_stipple; - struct r600_context_state *scissor; - struct r600_context_state *clip; - struct r600_context_state *ps_shader; - struct r600_context_state *vs_shader; - struct r600_context_state *depth; - struct r600_context_state *stencil; - struct r600_context_state *alpha; - struct r600_context_state *dsa; - struct r600_context_state *blend; - struct r600_context_state *stencil_ref; - struct r600_context_state *viewport; - struct r600_context_state *framebuffer; - struct r600_shader_sampler_states vs_sampler; - struct r600_shader_sampler_states ps_sampler; - /* can add gs later */ - struct r600_vertex_element *vertex_elements; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_index_buffer index_buffer; - struct pipe_blend_color blend_color; - - /* upload managers */ - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; - bool any_user_vbs; -}; - -/* Convenience cast wrapper. */ -static INLINE struct r600_context *r600_context(struct pipe_context *pipe) -{ - return (struct r600_context*)pipe; -} - -static INLINE struct r600_query* r600_query(struct pipe_query* q) -{ - return (struct r600_query*)q; -} - -struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); -struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); -void r600_flush(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence); - -int r600_context_hw_states(struct pipe_context *ctx); - -void r600_draw_vbo(struct pipe_context *ctx, - const struct pipe_draw_info *info); - -void r600_init_blit_functions(struct r600_context *rctx); -void r600_init_state_functions(struct r600_context *rctx); -void r600_init_query_functions(struct r600_context* rctx); -struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); - -extern int r600_pipe_shader_create(struct pipe_context *ctx, - struct r600_context_state *rstate, - const struct tgsi_token *tokens); -extern int r600_pipe_shader_update(struct pipe_context *ctx, - struct r600_context_state *rstate); -extern int r600_find_vs_semantic_index(struct r600_context *rctx, struct r600_shader *rshader, int id); - -#define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) - -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, - uint32_t *word4_p, uint32_t *yuv_format_p); - -/* query */ -extern void r600_queries_resume(struct pipe_context *ctx); -extern void r600_queries_suspend(struct pipe_context *ctx); - -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); - -void r600_set_constant_buffer_file(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); -void r600_set_constant_buffer_mem(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); -void eg_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); - -int r600_upload_index_buffer(struct r600_context *rctx, - struct r600_draw *draw); -int r600_upload_user_buffers(struct r600_context *rctx); - -#endif diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c deleted file mode 100644 index afc3b7bba1..0000000000 --- a/src/gallium/drivers/r600/r600_draw.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ -#include <stdio.h> -#include <errno.h> -#include <pipe/p_screen.h> -#include <util/u_format.h> -#include <util/u_math.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include <util/u_index_modify.h> -#include "radeon.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_state_inlines.h" - -static void r600_translate_index_buffer(struct r600_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) -{ - switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); - *index_size = 2; - *start = 0; - break; - - case 2: - if (*start % 2 != 0) { - util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); - *start = 0; - } - break; - - case 4: - break; - } -} - -static int r600_draw_common(struct r600_draw *draw) -{ - struct r600_context *rctx = r600_context(draw->ctx); - /* FIXME vs_resource */ - struct radeon_state *vs_resource; - struct r600_resource *rbuffer; - unsigned i, j, offset, prim; - u32 vgt_dma_index_type, vgt_draw_initiator; - struct pipe_vertex_buffer *vertex_buffer; - int r; - - r = r600_context_hw_states(draw->ctx); - if (r) - return r; - switch (draw->index_size) { - case 2: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA); - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA); - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX); - vgt_dma_index_type = 0; - break; - default: - fprintf(stderr, "%s %d unsupported index size %d\n", __func__, __LINE__, draw->index_size); - return -EINVAL; - } - r = r600_conv_pipe_prim(draw->mode, &prim); - if (r) - return r; - - /* rebuild vertex shader if input format changed */ - r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader); - if (r) - return r; - r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); - if (r) - return r; - radeon_draw_bind(&rctx->draw, &rctx->vs_shader->rstate[0]); - radeon_draw_bind(&rctx->draw, &rctx->ps_shader->rstate[0]); - - for (i = 0 ; i < rctx->vs_nresource; i++) { - radeon_state_fini(&rctx->vs_resource[i]); - } - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - vs_resource = &rctx->vs_resource[i]; - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - - rctx->vtbl->vs_resource(rctx, i, rbuffer, offset, vertex_buffer->stride, rctx->vertex_elements->elements[i].src_format); - radeon_draw_bind(&rctx->draw, vs_resource); - } - rctx->vs_nresource = rctx->vertex_elements->count; - /* FIXME start need to change winsys */ - rctx->vtbl->vgt_init(draw, vgt_draw_initiator); - radeon_draw_bind(&rctx->draw, &draw->draw); - - rctx->vtbl->vgt_prim(draw, prim, vgt_dma_index_type); - radeon_draw_bind(&rctx->draw, &draw->vgt); - - r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw); - if (r == -EBUSY) { - r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw); - } - - radeon_state_fini(&draw->draw); - - return r; -} - -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_draw draw; - int r; - - memset(&draw, 0, sizeof(draw)); - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - draw.index_buffer = rctx->index_buffer.buffer; - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer(rctx, &draw); - } - else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = 0; - draw.max_index = 0xffffff; - draw.index_buffer_offset = 0; - draw.index_bias = draw.start; - } - - r = r600_draw_common(&draw); - if (r) - fprintf(stderr,"draw common failed %d\n", r); -} diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h new file mode 100644 index 0000000000..0c91a21238 --- /dev/null +++ b/src/gallium/drivers/r600/r600_formats.h @@ -0,0 +1,56 @@ +#ifndef R600_FORMATS_H +#define R600_FORMATS_H + +/* list of formats from R700 ISA document - apply across GPUs in different registers */ +#define FMT_INVALID 0x00000000 +#define FMT_8 0x00000001 +#define FMT_4_4 0x00000002 +#define FMT_3_3_2 0x00000003 +#define FMT_16 0x00000005 +#define FMT_16_FLOAT 0x00000006 +#define FMT_8_8 0x00000007 +#define FMT_5_6_5 0x00000008 +#define FMT_6_5_5 0x00000009 +#define FMT_1_5_5_5 0x0000000A +#define FMT_4_4_4_4 0x0000000B +#define FMT_5_5_5_1 0x0000000C +#define FMT_32 0x0000000D +#define FMT_32_FLOAT 0x0000000E +#define FMT_16_16 0x0000000F +#define FMT_16_16_FLOAT 0x00000010 +#define FMT_8_24 0x00000011 +#define FMT_8_24_FLOAT 0x00000012 +#define FMT_24_8 0x00000013 +#define FMT_24_8_FLOAT 0x00000014 +#define FMT_10_11_11 0x00000015 +#define FMT_10_11_11_FLOAT 0x00000016 +#define FMT_11_11_10 0x00000017 +#define FMT_11_11_10_FLOAT 0x00000018 +#define FMT_2_10_10_10 0x00000019 +#define FMT_8_8_8_8 0x0000001A +#define FMT_10_10_10_2 0x0000001B +#define FMT_X24_8_32_FLOAT 0x0000001C +#define FMT_32_32 0x0000001D +#define FMT_32_32_FLOAT 0x0000001E +#define FMT_16_16_16_16 0x0000001F +#define FMT_16_16_16_16_FLOAT 0x00000020 +#define FMT_32_32_32_32 0x00000022 +#define FMT_32_32_32_32_FLOAT 0x00000023 +#define FMT_1 0x00000025 +#define FMT_GB_GR 0x00000027 +#define FMT_BG_RG 0x00000028 +#define FMT_32_AS_8 0x00000029 +#define FMT_32_AS_8_8 0x0000002a +#define FMT_5_9_9_9_SHAREDEXP 0x0000002b +#define FMT_8_8_8 0x0000002c +#define FMT_16_16_16 0x0000002d +#define FMT_16_16_16_FLOAT 0x0000002e +#define FMT_32_32_32 0x0000002f +#define FMT_32_32_32_FLOAT 0x00000030 +#define FMT_BC1 0x00000031 +#define FMT_BC2 0x00000032 +#define FMT_BC3 0x00000033 +#define FMT_BC4 0x00000034 +#define FMT_BC5 0x00000035 + +#endif diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 5e0e0aab57..7e13109306 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -26,8 +26,7 @@ #include <stdio.h> #include <errno.h> #include <util/u_inlines.h> -#include "r600_screen.h" -#include "r600_context.h" +#include "r600_pipe.h" #include "r600d.h" int r600_conv_pipe_prim(unsigned pprim, unsigned *prim) diff --git a/src/gallium/drivers/r600/r600_hw_states.c b/src/gallium/drivers/r600/r600_hw_states.c deleted file mode 100644 index b4d73a0fb1..0000000000 --- a/src/gallium/drivers/r600/r600_hw_states.c +++ /dev/null @@ -1,1215 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Dave Airlie - */ - -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_blitter.h> -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_state_inlines.h" -#include "r600d.h" - -static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - int i; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); - rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); - rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); - rstate->states[R600_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); - rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND1_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND2_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND3_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND4_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND5_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND6_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND7_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; - - for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028804_SEPARATE_ALPHA_BLEND(1); - bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc; - if (i == 0) - rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; - } - - radeon_state_pm4(rstate); -} - -static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0); - - for (int i = 0; i < state->nr; i++) { - rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); - rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); - rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); - rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); - } - radeon_state_pm4(rstate); -} - -static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0); - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - - - color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_NUMBER_TYPE(ntype); - - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_0280A0_SOURCE_FORMAT(1); - rstate->states[R600_CB0__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8; - rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; - rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice); - rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - radeon_state_pm4(rstate); -} - -static void r600_db(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[R600_DB__DB_DEPTH_BASE] = state->zsbuf->offset >> 8; - rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) | - S_028010_FORMAT(format); - rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; - rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - radeon_state_pm4(rstate); -} - -static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - const struct pipe_clip_state *clip = NULL; - struct r600_screen *rscreen = rctx->screen; - float offset_units = 0, offset_scale = 0; - char depth = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned point_size; - unsigned prov_vtx = 1; - unsigned polygon_dual_mode; - - if (rctx->clip) - clip = &rctx->clip->state.clip; - if (fb->zsbuf) { - offset_units = state->offset_units; - offset_scale = state->offset_scale * 12.0f; - switch (fb->zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return; - } - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - - if (state->flatshade_first) - prov_vtx = 0; - - rctx->flat_shade = state->flatshade; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = - S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0; - if (clip) { - /* Clip plane enable bits are stashed in the lower six bits of - * PA_CL_CLIP_CNTL, so just set all of the corresponding bits with a - * pinch of bit twiddling. - * - * PS_UCP_MODE 3 is "expand and clip as trifan," which is the same - * setting that we use on r300-r500. I believe that fglrx always uses - * this mode as well. */ - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = - ((1 << clip->nr) - 1) | - S_028810_PS_UCP_MODE(3) | - S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); - } - polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || - state->fill_back != PIPE_POLYGON_MODE_FILL); - - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | - S_028814_POLY_MODE(polygon_dual_mode) | - S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)); - rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); - rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* Point size for PA_SU_POINT_SIZE and PA_SU_POINT_MINMAX is fixed-point, - * 12.4. - * - * For some reason, maximum point size is set to 0x8000 (2048.0) instead - * of the maximum value 0xFFF0 (4095.0). */ - point_size = (unsigned)(state->point_size * 8.0); - rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = - S_028A00_HEIGHT(point_size) | S_028A00_WIDTH(point_size); - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = - S_028A04_MIN_SIZE(0) | S_028A04_MAX_SIZE(0x8000); - rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = S_028A08_WIDTH(8); - rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; - rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = S_028C00_LAST_PIXEL(1); - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = fui(1); - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = fui(1); - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = fui(1); - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = fui(1); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - radeon_state_pm4(rstate); -} - -static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - struct r600_screen *rscreen = rctx->screen; - enum radeon_family family; - unsigned minx, maxx, miny, maxy; - u32 tl, br; - - family = radeon_get_family(rctx->rw); - - if (state == NULL) { - minx = 0; - miny = 0; - maxx = fb->cbufs[0]->width; - maxy = fb->cbufs[0]->height; - } else { - minx = state->minx; - miny = state->miny; - maxx = state->maxx; - maxy = state->maxy; - } - tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = - S_02820C_CLIP_RULE(0xFFFF); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - - if (family >= CHIP_RV770) - rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - radeon_state_pm4(rstate); -} - -static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = fui(0); - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = fui(1); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = - S_028818_VPORT_X_SCALE_ENA(1) | - S_028818_VPORT_X_OFFSET_ENA(1) | - S_028818_VPORT_Y_SCALE_ENA(1) | - S_028818_VPORT_Y_OFFSET_ENA(1) | - S_028818_VPORT_Z_SCALE_ENA(1) | - S_028818_VPORT_Z_OFFSET_ENA(1) | - S_028818_VTX_W0_FMT(1); - radeon_state_pm4(rstate); -} - -static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; - const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; - struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - struct r600_shader *rshader; - struct r600_query *rquery = NULL; - boolean query_running; - int i; - bool flush_db = FALSE; - - if (rctx->ps_shader == NULL) { - return; - } - if (rctx->dsa->flags & R600_STATE_FLAG_DSA_FLUSH) - flush_db = TRUE; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - - if (!flush_db) - db_shader_control = S_02880C_DUAL_EXPORT_ENABLE(1); - - rshader = &rctx->ps_shader->shader; - if (rshader->uses_kill) - db_shader_control |= S_02880C_KILL_ENABLE(1); - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); - } - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - - /* set stencil enable */ - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1) | - S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)) | - S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)) | - S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)) | - S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask) | - S_028430_STENCILREF(stencil_ref->ref_value[0]); - - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1) | - S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)) | - S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)) | - S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)) | - S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = - S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask) | - S_028430_STENCILREF(stencil_ref->ref_value[1]); - } - } - - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func) | - S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - db_render_control = 0; - - if (flush_db) - db_render_control = S_028D0C_DEPTH_COPY_ENABLE(1) | - S_028D0C_STENCIL_COPY_ENABLE(1) | - S_028D0C_COPY_CENTROID(1); - - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - - query_running = FALSE; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = TRUE; - } - } - - if (query_running) { - db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); - if (radeon_get_family_class(rscreen->rw) == R700) - db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); - } - - rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = fui(1); - rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask; - rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; - rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref; - rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = db_shader_control; - rstate->states[R600_DSA__DB_RENDER_CONTROL] = db_render_control; - rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = db_render_override; - - rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - radeon_state_pm4(rstate); -} - - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -static void r600_sampler_border(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); - if (uc.ui) { - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]); - } - radeon_state_pm4(rstate); -} - -static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); - /* FIXME LOD it depends on texture base level ... */ - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - radeon_state_pm4(rstate); - -} - - -static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_screen *rscreen = rctx->screen; - const struct util_format_description *desc; - struct r600_resource_texture *texture; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; - - rstate->cpm4 = 0; - swizzle[0] = view->swizzle_r; - swizzle[1] = view->swizzle_g; - swizzle[2] = view->swizzle_b; - swizzle[3] = view->swizzle_a; - format = r600_translate_texformat(view->texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - return; - } - desc = util_format_description(view->texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", view->texture->format); - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - texture = (struct r600_resource_texture*)view->texture; - rbuffer = &texture->resource; - - if (texture->depth) { - r600_texture_depth_flush(ctx, view->texture); - rbuffer = &texture->flushed_depth_texture->resource; - } - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - - pitch = align(texture->pitch[0] / texture->bpt, 8); - - /* FIXME properly handle first level != 0 */ - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = - S_038000_DIM(r600_tex_dim(view->texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(view->texture->width0 - 1); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = - S_038004_TEX_HEIGHT(view->texture->height0 - 1) | - S_038004_TEX_DEPTH(view->texture->depth0 - 1) | - S_038004_DATA_FORMAT(format); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = texture->offset[0] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = texture->offset[1] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = - word4 | - S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | - S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(view->first_level); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = - S_038014_LAST_LEVEL(view->last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - radeon_state_pm4(rstate); -} - -static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask, shader_control; - int i; - - target_mask = 0; - shader_mask = 0; - shader_control = 0; - color_control = S_028808_PER_MRT_BLEND(1); - - for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << (i * 4); - shader_control |= (1 << i); - } - - if (pbs->logicop_enable) { - color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - - if (pbs->independent_blend_enable) { - for (i = 0; i < nr_cbufs; i++) { - if (pbs->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } - } else { - for (i = 0; i < nr_cbufs; i++) { - if (pbs->rt[0].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (pbs->rt[0].colormask << (4 * i)); - } - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; - rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; - rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; - if (radeon_get_family_class(rscreen->rw) == R700) - rstate->states[R600_CB_CNTL__CB_SHADER_CONTROL] = shader_control; - rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - -static void r600_init_config(struct r600_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - enum radeon_family family; - - family = radeon_get_family(rctx->rw); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - switch (family) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } - radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0); - - rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; - switch (family) { - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV710: - break; - default: - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); - break; - } - - if (!rctx->screen->use_mem_constant) - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - - rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; - - if (family >= CHIP_RV770) { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = - S_008D8C_VS_PC_LIMIT_ENABLE(1); - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = - S_009838_DEPTH_FREE(4) | - S_009838_DEPTH_FLUSH(16) | - S_009838_DEPTH_PENDING_FREE(4) | - S_009838_DEPTH_CACHELINE_FREE(4); - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00500000 | - S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | - S_028A4C_FORCE_EOV_REZ_ENABLE(1); - } else { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002 | - S_009508_DISABLE_CUBE_WRAP(1); - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = - S_009838_DEPTH_FREE(4) | - S_009838_DEPTH_FLUSH(16) | - S_009838_DEPTH_PENDING_FREE(4) | - S_009838_DEPTH_CACHELINE_FREE(16); - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = - S_0286C8_PS_GROUPING(1); - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = - S_028A4C_WALK_ORDER_ENABLE(1) | - S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1); - } - rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = S_028AB4_REUSE_OFF(1); - rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(&rctx->config); -} - -static int r600_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t src_format) -{ - struct radeon_state *vs_resource = &rctx->vs_resource[id]; - struct r600_screen *rscreen = rctx->screen; - unsigned format, num_format = 0, format_comp = 0; - - format = r600_translate_colorformat(src_format); - - r600_translate_vertex_num_format(src_format, &num_format, &format_comp); - - format = S_038008_DATA_FORMAT(format) | S_038008_NUM_FORMAT_ALL(num_format) | S_038008_FORMAT_COMP_ALL(format_comp); - - radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS); - radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(stride) | format; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(vs_resource); -} - -static int r600_draw_vgt_init(struct r600_draw *draw, - int vgt_draw_initiator) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer; - radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0); - draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; - draw->draw.states[R600_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset; - if (rbuffer) { - radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; - } - return radeon_state_pm4(&draw->draw); -} - -static int r600_draw_vgt_prim(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0); - draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = draw->max_index; - draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = draw->min_index; - draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->index_bias; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - return radeon_state_pm4(&draw->vgt); -} - -static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_rasterizer_state *rasterizer; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE, have_face = FALSE; - - rasterizer = &rctx->rasterizer->state.rasterizer; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(rctx, rshader, i)) | S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; - - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rasterizer->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= S_028854_EXPORT_Z(1); - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; - } - } - exports_ps |= S_028854_EXPORT_COLORS(num_cout); - if (exports_ps == 0) { - /* Always at least export 1 color component per pixel. */ - exports_ps = S_028854_EXPORT_COLORS(1); - } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = - S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - - if (have_pos) { - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= - S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - state->states[R600_PS_SHADER__SPI_INPUT_Z] |= - S_0286D8_PROVIDE_Z_TO_SPI(1); - } - - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = - S_0286D0_FRONT_FACE_ENA(have_face); - - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack); - state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - state->nbo = 1; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static int r600_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - for (i = 0; i < 10; i++) { - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; - } - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; - } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); - state->nbo = 2; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - state->placement[2] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -struct r600_context_hw_state_vtbl r600_hw_state_vtbl = { - .blend = r600_blend, - .ucp = r600_ucp, - .cb = r600_cb, - .db = r600_db, - .rasterizer = r600_rasterizer, - .scissor = r600_scissor, - .viewport = r600_viewport, - .dsa = r600_dsa, - .sampler_border = r600_sampler_border, - .sampler = r600_sampler, - .resource = r600_resource, - .cb_cntl = r600_cb_cntl, - .vs_resource = r600_vs_resource, - .vgt_init = r600_draw_vgt_init, - .vgt_prim = r600_draw_vgt_prim, - .vs_shader = r600_vs_shader, - .ps_shader = r600_ps_shader, - .init_config = r600_init_config, -}; - -void r600_set_constant_buffer_file(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, i, type, shader_class; - struct radeon_state *rstate, *rstates; - struct pipe_transfer *transfer; - u32 *ptr; - - type = R600_STATE_CONSTANT; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - if (buffer && buffer->width0 > 0) { - nconstant = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (i = 0; i < nconstant; i++) { - rstate = &rstates[i]; - radeon_state_init(rstate, rscreen->rw, type, i, shader_class); - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); - } - pipe_buffer_unmap(ctx, buffer, transfer); - } -} - -void r600_set_constant_buffer_mem(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, type, shader_class, size; - struct radeon_state *rstate, *rstates; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - type = R600_STATE_CBUF; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - rstate = &rstates[0]; - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - - nconstant = buffer->width0 / 16; - size = ALIGN_DIVUP(nconstant, 16); - - radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); - rstate->states[R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; - rstate->states[R600_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); -} - diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 0cf9c1c401..4f9b39a7fd 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -233,12 +233,6 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014 -/* same up to here */ -/* -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018 -*/ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000015 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000016 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000017 @@ -336,9 +330,11 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW 0x000000D7 diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_pipe.c index be8a78ca3d..dd8fa4fcd7 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -19,37 +19,221 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson */ #include <stdio.h> -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_public.h" +#include <errno.h> +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_blitter.h> +#include <util/u_double_list.h> +#include <util/u_transfer.h> +#include <util/u_surface.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_upload_mgr.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "r600d.h" #include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" #include "r600_state_inlines.h" +/* + * pipe_context + */ +static void r600_flush(struct pipe_context *ctx, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; +#if 0 + static int dc = 0; + char dname[256]; +#endif + + if (!rctx->ctx.pm4_cdwords) + return; + + u_upload_flush(rctx->upload_vb); + u_upload_flush(rctx->upload_ib); + +#if 0 + sprintf(dname, "gallium-%08d.bof", dc); + if (dc < 20) { + r600_context_dump_bof(&rctx->ctx, dname); + R600_ERR("dumped %s\n", dname); + } + dc++; +#endif + r600_context_flush(&rctx->ctx); +} + +static void r600_destroy_context(struct pipe_context *context) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; + + r600_context_fini(&rctx->ctx); + for (int i = 0; i < R600_PIPE_NSTATES; i++) { + free(rctx->states[i]); + } + + u_upload_destroy(rctx->upload_vb); + u_upload_destroy(rctx->upload_ib); + + FREE(rctx->ps_resource); + FREE(rctx->vs_resource); + FREE(rctx); +} + +static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) +{ + struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); + struct r600_screen* rscreen = (struct r600_screen *)screen; + enum chip_class class; + + if (rctx == NULL) + return NULL; + rctx->context.winsys = rscreen->screen.winsys; + rctx->context.screen = screen; + rctx->context.priv = priv; + rctx->context.destroy = r600_destroy_context; + rctx->context.flush = r600_flush; + + /* Easy accessing of screen/winsys. */ + rctx->screen = rscreen; + rctx->radeon = rscreen->radeon; + rctx->family = r600_get_family(rctx->radeon); + + r600_init_blit_functions(rctx); + r600_init_query_functions(rctx); + r600_init_context_resource_functions(rctx); + + switch (r600_get_family(rctx->radeon)) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + rctx->context.draw_vbo = r600_draw_vbo; + r600_init_state_functions(rctx); + if (r600_context_init(&rctx->ctx, rctx->radeon)) { + r600_destroy_context(&rctx->context); + return NULL; + } + r600_init_config(rctx); + break; + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + rctx->context.draw_vbo = evergreen_draw; + evergreen_init_state_functions(rctx); + if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { + r600_destroy_context(&rctx->context); + return NULL; + } + evergreen_init_config(rctx); + break; + default: + R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, + PIPE_BIND_INDEX_BUFFER); + if (rctx->upload_ib == NULL) { + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER); + if (rctx->upload_vb == NULL) { + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->blitter = util_blitter_create(&rctx->context); + if (rctx->blitter == NULL) { + FREE(rctx); + return NULL; + } + + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->vs_resource) { + FREE(rctx); + return NULL; + } + + rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->ps_resource) { + FREE(rctx); + return NULL; + } + + class = r600_get_family_class(rctx->radeon); + if (class == R600 || class == R700) + rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); + else + rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); + + r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; + + return &rctx->context; +} + +/* + * pipe_screen + */ static const char* r600_get_vendor(struct pipe_screen* pscreen) { return "X.Org"; } +static const char *r600_get_family_name(enum radeon_family family) +{ + switch(family) { + case CHIP_R600: return "R600"; + case CHIP_RV610: return "RV610"; + case CHIP_RV630: return "RV630"; + case CHIP_RV670: return "RV670"; + case CHIP_RV620: return "RV620"; + case CHIP_RV635: return "RV635"; + case CHIP_RS780: return "RS780"; + case CHIP_RS880: return "RS880"; + case CHIP_RV770: return "RV770"; + case CHIP_RV730: return "RV730"; + case CHIP_RV710: return "RV710"; + case CHIP_RV740: return "RV740"; + case CHIP_CEDAR: return "CEDAR"; + case CHIP_REDWOOD: return "REDWOOD"; + case CHIP_JUNIPER: return "JUNIPER"; + case CHIP_CYPRESS: return "CYPRESS"; + case CHIP_HEMLOCK: return "HEMLOCK"; + default: return "unknown"; + } +} + static const char* r600_get_name(struct pipe_screen* pscreen) { - struct r600_screen *screen = r600_screen(pscreen); - enum radeon_family family = radeon_get_family(screen->rw); + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + enum radeon_family family = r600_get_family(rscreen->radeon); - if (family >= CHIP_R600 && family < CHIP_RV770) - return "R600 (HD2XXX,HD3XXX)"; - else if (family < CHIP_CEDAR) - return "R700 (HD4XXX)"; - else - return "EVERGREEN"; + return r600_get_family_name(family); } static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) @@ -72,6 +256,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; /* Unsupported features (boolean caps). */ @@ -87,7 +272,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 14; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* FIXME allow this once infrastructure is there */ - return 0; + return 16; case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; @@ -104,15 +289,35 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + default: R600_ERR("r600: unknown param %d\n", param); return 0; } } +static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + default: + R600_ERR("r600: unsupported paramf %d\n", param); + return 0.0f; + } +} + static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { - switch(shader) { + switch(shader) + { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_VERTEX: break; @@ -155,24 +360,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } } -static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - R600_ERR("r600: unsupported paramf %d\n", param); - return 0.0f; - } -} - static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, @@ -226,31 +413,25 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, static void r600_destroy_screen(struct pipe_screen* pscreen) { - struct r600_screen* rscreen = r600_screen(pscreen); + struct r600_screen *rscreen = (struct r600_screen *)pscreen; if (rscreen == NULL) return; FREE(rscreen); } -struct pipe_screen *r600_screen_create(struct radeon *rw) + +struct pipe_screen *r600_screen_create(struct radeon *radeon) { - struct r600_screen* rscreen; + struct r600_screen *rscreen; rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } - /* don't enable mem constant for r600 yet */ - rscreen->use_mem_constant = FALSE; - if (radeon_get_family_class(rw) == EVERGREEN) { - rscreen->use_mem_constant = TRUE; - } - - radeon_set_mem_constant(rw, rscreen->use_mem_constant); - rscreen->rw = rw; - rscreen->screen.winsys = (struct pipe_winsys*)rw; + rscreen->radeon = radeon; + rscreen->screen.winsys = (struct pipe_winsys*)radeon; rscreen->screen.destroy = r600_destroy_screen; rscreen->screen.get_name = r600_get_name; rscreen->screen.get_vendor = r600_get_vendor; @@ -261,5 +442,8 @@ struct pipe_screen *r600_screen_create(struct radeon *rw) rscreen->screen.context_create = r600_create_context; r600_init_screen_texture_functions(&rscreen->screen); r600_init_screen_resource_functions(&rscreen->screen); + + rscreen->tiling_info = r600_get_tiling_info(radeon); + return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 5abf910c81..e7c4b60d00 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -26,6 +26,15 @@ #ifndef R600_PIPE_H #define R600_PIPE_H +#include <pipe/p_state.h> +#include <pipe/p_screen.h> +#include <pipe/p_context.h> +#include <util/u_math.h> +#include "r600.h" +#include "r600_public.h" +#include "r600_shader.h" +#include "r600_resource.h" + enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -49,6 +58,7 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; struct radeon *radeon; + struct r600_tiling_info *tiling_info; }; struct r600_pipe_sampler_view { @@ -69,12 +79,6 @@ struct r600_pipe_blend { unsigned cb_target_mask; }; -struct r600_pipe_shader { - struct r600_shader shader; - struct r600_pipe_state rstate; - struct radeon_ws_bo *bo; -}; - struct r600_vertex_element { unsigned count; @@ -82,13 +86,32 @@ struct r600_vertex_element struct pipe_vertex_element elements[32]; }; +struct r600_pipe_shader { + struct r600_shader shader; + struct r600_pipe_state rstate; + struct r600_bo *bo; + struct r600_vertex_element vertex_elements; +}; + +/* needed for blitter save */ +#define NUM_TEX_UNITS 16 + +struct r600_textures_info { + struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; + unsigned n_views; + void *samplers[NUM_TEX_UNITS]; + unsigned n_samplers; +}; + +#define R600_CONSTANT_ARRAY_SIZE 256 +#define R600_RESOURCE_ARRAY_SIZE 160 + struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; struct pipe_framebuffer_state *pframebuffer; unsigned family; void *custom_dsa_flush; - struct list_head query_list; /* fake member for depth remove once merged */ struct r600_screen *screen; struct radeon *radeon; struct r600_pipe_state *states[R600_PIPE_NSTATES]; @@ -103,12 +126,8 @@ struct r600_pipe_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - unsigned vs_nconst; - unsigned ps_nconst; - struct r600_pipe_state vs_const[256]; - struct r600_pipe_state ps_const[256]; - struct r600_pipe_state vs_resource[160]; - struct r600_pipe_state ps_resource[160]; + struct r600_pipe_state *vs_resource; + struct r600_pipe_state *ps_resource; struct r600_pipe_state config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; @@ -121,6 +140,8 @@ struct r600_pipe_context { struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; unsigned any_user_vbs; + struct r600_textures_info ps_samplers; + }; struct r600_drawl { @@ -136,29 +157,65 @@ struct r600_drawl { struct pipe_resource *index_buffer; }; -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, - uint32_t *word4_p, uint32_t *yuv_format_p); +/* evergreen_state.c */ +void evergreen_init_state_functions(struct r600_pipe_context *rctx); +void evergreen_init_config(struct r600_pipe_context *rctx); +void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); +void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); + +/* r600_blit.c */ +void r600_init_blit_functions(struct r600_pipe_context *rctx); +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); + +/* r600_buffer.c */ +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind); +unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned face, unsigned level); +struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, + struct winsys_handle *whandle); +int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); +int r600_upload_user_buffers(struct r600_pipe_context *rctx); -/* r600_state2.c */ -int r600_pipe_shader_update2(struct pipe_context *ctx, struct r600_pipe_shader *shader); -int r600_pipe_shader_create2(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); -int r600_upload_index_buffer2(struct r600_pipe_context *rctx, struct r600_drawl *draw); -int r600_upload_user_buffers2(struct r600_pipe_context *rctx); -void r600_translate_index_buffer2(struct r600_pipe_context *r600, +/* r600_query.c */ +void r600_init_query_functions(struct r600_pipe_context *rctx); + +/* r600_resource.c */ +void r600_init_context_resource_functions(struct r600_pipe_context *r600); + +/* r600_shader.c */ +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +int r600_find_vs_semantic_index(struct r600_shader *vs, + struct r600_shader *ps, int id); + +/* r600_state.c */ +void r600_init_state_functions(struct r600_pipe_context *rctx); +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void r600_init_config(struct r600_pipe_context *rctx); +void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, unsigned *start, unsigned count); -int r600_find_vs_semantic_index2(struct r600_shader *vs, - struct r600_shader *ps, int id); +void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); +/* r600_helper.h */ +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -/* evergreen_state.c */ -void evergreen_init_state_functions2(struct r600_pipe_context *rctx); -void evergreen_init_config2(struct r600_pipe_context *rctx); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); -void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); -void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +/* r600_texture.c */ +void r600_init_screen_texture_functions(struct pipe_screen *screen); +uint32_t r600_translate_texformat(enum pipe_format format, + const unsigned char *swizzle_view, + uint32_t *word4_p, uint32_t *yuv_format_p); +/* + * common helpers + */ static INLINE u32 S_FIXED(float value, u32 frac_bits) { return value * (1 << frac_bits); diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h index 1d89c9f9f6..f1970201e8 100644 --- a/src/gallium/drivers/r600/r600_public.h +++ b/src/gallium/drivers/r600/r600_public.h @@ -1,9 +1,28 @@ - +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ #ifndef R600_PUBLIC_H #define R600_PUBLIC_H -struct radeon; - -struct pipe_screen* r600_screen_create(struct radeon *rw); +struct pipe_screen *r600_screen_create(struct radeon *radeon); #endif diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 6e50701de6..726668260c 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -19,230 +19,55 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson */ -#include <errno.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include "r600_screen.h" -#include "r600_context.h" - -static void r600_query_begin(struct r600_context *rctx, struct r600_query *rquery) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate = &rquery->rstate; - - radeon_state_fini(rstate); - radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_BEGIN, 0, 0); - rstate->states[R600_QUERY__OFFSET] = rquery->num_results; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - } -} - -static void r600_query_end(struct r600_context *rctx, struct r600_query *rquery) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate = &rquery->rstate; - - radeon_state_fini(rstate); - radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_END, 0, 0); - rstate->states[R600_QUERY__OFFSET] = rquery->num_results + 8; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - } -} +#include "r600_pipe.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct r600_query *q; - - if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) - return NULL; - - q = CALLOC_STRUCT(r600_query); - if (!q) - return NULL; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - q->type = query_type; - q->buffer_size = 4096; - - q->buffer = radeon_ws_bo(rscreen->rw, q->buffer_size, 1, 0); - if (!q->buffer) { - FREE(q); - return NULL; - } - - LIST_ADDTAIL(&q->list, &rctx->query_list); - - return (struct pipe_query *)q; -} - -static void r600_destroy_query(struct pipe_context *ctx, - struct pipe_query *query) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_query *q = r600_query(query); - - radeon_ws_bo_reference(rscreen->rw, &q->buffer, NULL); - LIST_DEL(&q->list); - FREE(query); + return (struct pipe_query*)r600_context_query_create(&rctx->ctx, query_type); } -static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquery) +static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - u64 start, end; - u32 *results; - int i; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, ctx); - for (i = 0; i < rquery->num_results; i += 4) { - start = (u64)results[i] | (u64)results[i + 1] << 32; - end = (u64)results[i + 2] | (u64)results[i + 3] << 32; - if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) { - rquery->result += end - start; - } - } - radeon_ws_bo_unmap(rscreen->rw, rquery->buffer); - rquery->num_results = 0; -} - -static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquery) -{ - struct r600_context *rctx = r600_context(ctx); - - if (rquery->num_results >= ((rquery->buffer_size >> 2) - 2)) { - /* running out of space */ - if (!rquery->flushed) { - ctx->flush(ctx, 0, NULL); - } - r600_query_result(ctx, rquery); - } - r600_query_begin(rctx, rquery); - rquery->flushed = FALSE; -} - -static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery) -{ - struct r600_context *rctx = r600_context(ctx); - - r600_query_end(rctx, rquery); - rquery->num_results += 16; + r600_context_query_destroy(&rctx->ctx, (struct r600_query *)query); } static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = r600_query(query); - int r; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - rquery->state = R600_QUERY_STATE_STARTED; + rquery->result = 0; rquery->num_results = 0; - rquery->flushed = FALSE; - r600_query_resume(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } + r600_query_begin(&rctx->ctx, (struct r600_query *)query); } static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = r600_query(query); - int r; - - rquery->state &= ~R600_QUERY_STATE_STARTED; - rquery->state |= R600_QUERY_STATE_ENDED; - r600_query_suspend(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } -} - -void r600_queries_suspend(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery; - int r; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - r600_query_suspend(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } - } - rquery->state |= R600_QUERY_STATE_SUSPENDED; - } -} - -void r600_queries_resume(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery; - int r; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - r600_query_resume(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } - } - rquery->state &= ~R600_QUERY_STATE_SUSPENDED; - } + r600_query_end(&rctx->ctx, (struct r600_query *)query); } static boolean r600_get_query_result(struct pipe_context *ctx, struct pipe_query *query, boolean wait, void *vresult) { - struct r600_query *rquery = r600_query(query); - uint64_t *result = (uint64_t*)vresult; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - if (!rquery->flushed) { + if (rquery->num_results) { ctx->flush(ctx, 0, NULL); - rquery->flushed = TRUE; } - r600_query_result(ctx, rquery); - *result = rquery->result; - rquery->result = 0; - return TRUE; + return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } -void r600_init_query_functions(struct r600_context* rctx) +void r600_init_query_functions(struct r600_pipe_context *rctx) { - LIST_INITHEAD(&rctx->query_list); - rctx->context.create_query = r600_create_query; rctx->context.destroy_query = r600_destroy_query; rctx->context.begin_query = r600_begin_query; diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 05707740da..207642ccfa 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -21,9 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_screen.h" +#include "r600_pipe.h" static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) @@ -46,7 +44,16 @@ static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * scre } } -void r600_init_context_resource_functions(struct r600_context *r600) +void r600_init_screen_resource_functions(struct pipe_screen *screen) +{ + screen->resource_create = r600_resource_create; + screen->resource_from_handle = r600_resource_from_handle; + screen->resource_get_handle = u_resource_get_handle_vtbl; + screen->resource_destroy = u_resource_destroy_vtbl; + screen->user_buffer_create = r600_user_buffer_create; +} + +void r600_init_context_resource_functions(struct r600_pipe_context *r600) { r600->context.get_transfer = u_get_transfer_vtbl; r600->context.transfer_map = u_transfer_map_vtbl; @@ -56,12 +63,3 @@ void r600_init_context_resource_functions(struct r600_context *r600) r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } - -void r600_init_screen_resource_functions(struct pipe_screen *screen) -{ - screen->resource_create = r600_resource_create; - screen->resource_from_handle = r600_resource_from_handle; - screen->resource_get_handle = u_resource_get_handle_vtbl; - screen->resource_destroy = u_resource_destroy_vtbl; - screen->user_buffer_create = r600_user_buffer_create; -} diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index b0026e9578..ef484aba4a 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -25,8 +25,15 @@ #include "util/u_transfer.h" -struct r600_context; -struct r600_screen; +/* Texture transfer. */ +struct r600_transfer { + /* Base class. */ + struct pipe_transfer transfer; + /* Buffer transfer. */ + struct pipe_transfer *buffer_transfer; + unsigned offset; + struct pipe_resource *linear_texture; +}; /* This gets further specialized into either buffer or texture * structures. Use the vtbl struct to choose between the two @@ -34,7 +41,7 @@ struct r600_screen; */ struct r600_resource { struct u_resource base; - struct radeon_ws_bo *bo; + struct r600_bo *bo; u32 domain; u32 flink; u32 size; @@ -42,14 +49,12 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; - unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long width[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long height[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch_override; - unsigned long bpt; - unsigned long size; + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_override; + unsigned size; unsigned tiled; unsigned array_mode; unsigned tile_type; @@ -58,7 +63,6 @@ struct r600_resource_texture { struct r600_resource_texture *flushed_depth_texture; }; -void r600_init_context_resource_functions(struct r600_context *r600); void r600_init_screen_resource_functions(struct pipe_screen *screen); /* r600_buffer */ @@ -106,4 +110,18 @@ int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture); extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); + +/* r600_texture.c texture transfer functions. */ +struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box); +void r600_texture_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans); +void* r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_transfer* transfer); +void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer); + #endif diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h deleted file mode 100644 index 4105bb7cf6..0000000000 --- a/src/gallium/drivers/r600/r600_screen.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_SCREEN_H -#define R600_SCREEN_H - -#include <pipe/p_state.h> -#include <pipe/p_screen.h> -#include <pipebuffer/pb_buffer.h> -#include <xf86drm.h> -#include <radeon_drm.h> -#include "radeon.h" -#include "util/u_transfer.h" -#include "r600_resource.h" - -/* Texture transfer. */ -struct r600_transfer { - /* Base class. */ - struct pipe_transfer transfer; - /* Buffer transfer. */ - struct pipe_transfer *buffer_transfer; - unsigned offset; - struct pipe_resource *linear_texture; -}; - -struct r600_screen { - struct pipe_screen screen; - struct radeon *rw; - boolean use_mem_constant; -}; - -static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen) -{ - return (struct r600_screen*)screen; -} - -/* Buffer functions. */ -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ); -struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, - void *ptr, unsigned bytes, - unsigned bind); -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned face, unsigned level); -struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, - struct winsys_handle *whandle); - -/* r600_texture.c texture transfer functions. */ -struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box); -void r600_texture_transfer_destroy(struct pipe_context *ctx, - struct pipe_transfer *trans); -void* r600_texture_transfer_map(struct pipe_context *ctx, - struct pipe_transfer* transfer); -void r600_texture_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer* transfer); - -/* r600_blit.c */ -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); - -/* helpers */ -int r600_conv_pipe_format(unsigned pformat, unsigned *format); -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); - -void r600_init_screen_texture_functions(struct pipe_screen *screen); - -#endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0c27bb7d87..d1143985ea 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -25,9 +25,7 @@ #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" #include "util/u_format.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_shader.h" +#include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" #include "r600_opcodes.h" @@ -35,54 +33,227 @@ #include <stdio.h> #include <errno.h> +static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_028614_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028868_SQ_PGM_RESOURCES_VS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288D0_SQ_PGM_CF_OFFSET_VS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028858_SQ_PGM_START_VS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028894_SQ_PGM_START_FS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); -struct r600_shader_tgsi_instruction; +} -struct r600_shader_ctx { - struct tgsi_shader_info info; - struct tgsi_parse_context parse; - const struct tgsi_token *tokens; - unsigned type; - unsigned file_offset[TGSI_FILE_COUNT]; - unsigned temp_reg; - struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; - struct r600_shader *shader; - u32 value[4]; - u32 *literals; - u32 nliterals; - u32 max_driver_temp_used; -}; +int r600_find_vs_semantic_index(struct r600_shader *vs, + struct r600_shader *ps, int id) +{ + struct r600_shader_io *input = &ps->input[id]; -struct r600_shader_tgsi_instruction { - unsigned tgsi_opcode; - unsigned is_op3; - unsigned r600_opcode; - int (*process)(struct r600_shader_ctx *ctx); -}; + for (int i = 0; i < vs->noutput; i++) { + if (input->name == vs->output[i].name && + input->sid == vs->output[i].sid) { + return i - 1; + } + } + return 0; +} -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; -static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); +static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + + /* clear previous register */ + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_Z_EXPORT_ENABLE(1), + S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); + } + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028850_SQ_PGM_RESOURCES_PS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028854_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288CC_SQ_PGM_CF_OFFSET_PS, + 0x00000000, 0xFFFFFFFF, NULL); + + if (rshader->uses_kill) { + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_KILL_ENABLE(1), + S_02880C_KILL_ENABLE(1), NULL); + } + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} -/* called from hw states files to find VS->FS mapping */ -int r600_find_vs_semantic_index(struct r600_context *rctx, struct r600_shader *rshader, int id) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - int i; - struct r600_shader *vs = &rctx->vs_shader->shader; - struct r600_shader_io *input = &rshader->input[id]; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_shader *rshader = &shader->shader; + void *ptr; - for (i = 0; i < vs->noutput; i++) { - if (input->name == vs->output[i].name && - input->sid == vs->output[i].sid) { - return i - 1; + /* copy new shader */ + if (shader->bo == NULL) { + shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); + if (shader->bo == NULL) { + return -ENOMEM; } + ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); + r600_bo_unmap(rctx->radeon, shader->bo); } + /* build state */ + rshader->flat_shade = rctx->flatshade; + switch (rshader->processor_type) { + case TGSI_PROCESSOR_VERTEX: + if (rshader->family >= CHIP_CEDAR) { + evergreen_pipe_shader_vs(ctx, shader); + } else { + r600_pipe_shader_vs(ctx, shader); + } + break; + case TGSI_PROCESSOR_FRAGMENT: + if (rshader->family >= CHIP_CEDAR) { + evergreen_pipe_shader_ps(ctx, shader); + } else { + r600_pipe_shader_ps(ctx, shader); + } + break; + default: + return -EINVAL; + } + r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); return 0; } -static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) +static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_shader *shader = &rshader->shader; const struct util_format_description *desc; enum pipe_format resource_format[160]; unsigned i, nresources = 0; @@ -92,9 +263,16 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad if (shader->processor_type != TGSI_PROCESSOR_VERTEX) return 0; + /* doing a full memcmp fell over the refcount */ + if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && + (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) { + return 0; + } + rshader->vertex_elements = *rctx->vertex_elements; for (i = 0; i < rctx->vertex_elements->count; i++) { resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; } + r600_bo_reference(rctx->radeon, &rshader->bo, NULL); LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -118,25 +296,40 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad return r600_bc_build(&shader->bc); } -int r600_pipe_shader_create(struct pipe_context *ctx, - struct r600_context_state *rpshader, - const struct tgsi_token *tokens) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + int r; + + if (shader == NULL) + return -EINVAL; + /* there should be enough input */ + if (rctx->vertex_elements->count < shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, shader->shader.bc.nresource); + return -EINVAL; + } + r = r600_shader_update(ctx, shader); + if (r) + return r; + return r600_pipe_shader(ctx, shader); +} + +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; //fprintf(stderr, "--------------------------------------------------------------\n"); //tgsi_dump(tokens, 0); - if (rpshader == NULL) - return -ENOMEM; - rpshader->shader.family = radeon_get_family(rscreen->rw); - rpshader->shader.use_mem_constant = rscreen->use_mem_constant; - r = r600_shader_from_tgsi(tokens, &rpshader->shader); + shader->shader.family = r600_get_family(rctx->radeon); + r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } - r = r600_bc_build(&rpshader->shader.bc); + r = r600_bc_build(&shader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); return r; @@ -145,81 +338,41 @@ int r600_pipe_shader_create(struct pipe_context *ctx, return 0; } -static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *state; - - state = &rpshader->rstate[0]; - radeon_state_fini(&rpshader->rstate[0]); - - return rctx->vtbl->vs_shader(rctx, rpshader, state); -} - -static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *state; - - state = &rpshader->rstate[0]; - radeon_state_fini(state); - - return rctx->vtbl->ps_shader(rctx, rpshader, state); -} - -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct r600_shader *rshader = &rpshader->shader; - int r; - void *data; +/* + * tgsi -> r600 shader + */ +struct r600_shader_tgsi_instruction; - /* copy new shader */ - radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL); - rpshader->bo = NULL; - rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4, - 4096, 0); - if (rpshader->bo == NULL) { - return -ENOMEM; - } - data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, ctx); - memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4); - radeon_ws_bo_unmap(rscreen->rw, rpshader->bo); - /* build state */ - rshader->flat_shade = rctx->flat_shade; - switch (rshader->processor_type) { - case TGSI_PROCESSOR_VERTEX: - r = r600_pipe_shader_vs(ctx, rpshader); - break; - case TGSI_PROCESSOR_FRAGMENT: - r = r600_pipe_shader_ps(ctx, rpshader); - break; - default: - r = -EINVAL; - break; - } - return r; -} +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; + u32 value[4]; + u32 *literals; + u32 nliterals; + u32 max_driver_temp_used; + /* needed for evergreen interpolation */ + boolean input_centroid; + boolean input_linear; + boolean input_perspective; + int num_interp_gpr; +}; -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - int r; +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; - if (rpshader == NULL) - return -EINVAL; - /* there should be enough input */ - if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rpshader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, &rpshader->shader); - if (r) - return r; - return r600_pipe_shader(ctx, rpshader); -} +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; +static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static int tgsi_is_supported(struct r600_shader_ctx *ctx) { @@ -241,11 +394,9 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) } #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { - if (i->Src[j].Register.Dimension || - i->Src[j].Register.Absolute) { - R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, - i->Src[j].Register.Dimension, - i->Src[j].Register.Absolute); + if (i->Src[j].Register.Dimension) { + R600_ERR("unsupported src %d (dimension %d)\n", j, + i->Src[j].Register.Dimension); return -EINVAL; } } @@ -258,10 +409,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; struct r600_bc_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = 0; + + if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { + ij_index = 0; + if (ctx->shader->input[input].centroid) + ij_index++; + } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { + ij_index = 0; + /* if we have perspective add one */ + if (ctx->input_perspective) { + ij_index++; + /* if we have perspective centroid */ + if (ctx->input_centroid) + ij_index++; + } + if (ctx->shader->input[input].centroid) + ij_index++; + } + + /* work out gpr and base_chan from index */ + gpr = ij_index / 2; + base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -272,13 +446,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; if ((i > 1) && (i < 6)) { - alu.dst.sel = ctx->shader->input[gpr].gpr; + alu.dst.sel = ctx->shader->input[input].gpr; alu.dst.write = 1; } alu.dst.chan = i % 4; - alu.src[0].chan = (1 - (i % 2)); - alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; + + alu.src[0].sel = gpr; + alu.src[0].chan = (base_chan - (i % 2)); + + alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) @@ -304,6 +481,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].interpolate = d->Declaration.Interpolate; + ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; if (ctx->type == TGSI_PROCESSOR_VERTEX) { /* turn input into fetch */ @@ -320,13 +498,19 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) vtx.dst_sel_y = 1; vtx.dst_sel_z = 2; vtx.dst_sel_w = 3; + vtx.use_const_fields = 1; r = r600_bc_add_vtx(ctx->bc, &vtx); if (r) return r; } if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { /* turn input into interpolate on EG */ - evergreen_interp_alu(ctx, i); + if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { + if (ctx->shader->input[i].interpolate > 0) { + ctx->shader->input[i].lds_pos = ctx->shader->nlds++; + evergreen_interp_alu(ctx, i); + } + } } break; case TGSI_FILE_OUTPUT: @@ -353,6 +537,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } +/* + * for evergreen we need to scan the shader to find the number of GPRs we need to + * reserve for interpolation. + * + * we need to know if we are going to emit + * any centroid inputs + * if perspective and linear are required +*/ +static int evergreen_gpr_count(struct r600_shader_ctx *ctx) +{ + int i; + int num_baryc; + + ctx->input_linear = FALSE; + ctx->input_perspective = FALSE; + ctx->input_centroid = FALSE; + ctx->num_interp_gpr = 1; + + /* any centroid inputs */ + for (i = 0; i < ctx->info.num_inputs; i++) { + /* skip position/face */ + if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + continue; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) + ctx->input_linear = TRUE; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) + ctx->input_perspective = TRUE; + if (ctx->info.input_centroid[i]) + ctx->input_centroid = TRUE; + } + + num_baryc = 0; + /* ignoring sample for now */ + if (ctx->input_perspective) + num_baryc++; + if (ctx->input_linear) + num_baryc++; + if (ctx->input_centroid) + num_baryc *= 2; + + ctx->num_interp_gpr += (num_baryc + 1) >> 1; + + /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ + return ctx->num_interp_gpr; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -367,7 +598,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = r600_bc_init(ctx.bc, shader->family); if (r) return r; - ctx.bc->use_mem_constant = shader->use_mem_constant; ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -399,14 +629,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; } + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); + } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_count[TGSI_FILE_INPUT]; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - if (ctx.shader->use_mem_constant) - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; - else - ctx.file_offset[TGSI_FILE_CONSTANT] = 256; + + ctx.file_offset[TGSI_FILE_CONSTANT] = 128; ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + @@ -497,7 +728,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 61; output[i].swizzle_x = 2; - output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].swizzle_y = 7; + output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { + output[i].array_base = 61; + output[i].swizzle_x = 7; + output[i].swizzle_y = 1; + output[i].swizzle_z = output[i].swizzle_w = 7; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); @@ -603,6 +841,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } @@ -658,13 +897,14 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s } } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { + if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[j].sel; + alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; + alu.src[0].rel = r600_src[i].rel; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -674,7 +914,8 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s if (r) return r; } - r600_src[j].sel = treg; + r600_src[i].sel = treg; + r600_src[i].rel =0; j--; } } @@ -693,13 +934,13 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ nliteral++; } } - for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[j].sel; + alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; alu.dst.sel = treg; alu.dst.chan = k; @@ -710,11 +951,11 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); + r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); if (r) return r; - r600_src[j].sel = treg; - j++; + r600_src[i].sel = treg; + j--; } } return 0; @@ -737,6 +978,9 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -807,6 +1051,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; r = tgsi_split_literal_constant(ctx, r600_src); if (r) @@ -1230,34 +1477,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) return tgsi_helper_tempx_replicate(ctx); } -static int tgsi_trans(struct r600_shader_ctx *ctx) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, r; - - for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - if (inst->Dst[0].Register.WriteMask & (1 << i)) { - alu.inst = ctx->inst_info->r600_opcode; - for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); - if (r) - return r; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); - } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - } - return 0; -} - static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -1374,6 +1593,9 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { @@ -1470,6 +1692,9 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* do it in 2 step as op3 doesn't support writemask */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1502,6 +1727,9 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; @@ -1575,7 +1803,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - + for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -1728,7 +1956,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = src_gpr; - alu.src[0].chan = i; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1748,14 +1976,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; - tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.sampler_id = tex.resource_id; + tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.resource_id = tex.sampler_id; + if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) + tex.resource_id += PIPE_MAX_ATTRIBS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; - tex.dst_sel_x = 0; - tex.dst_sel_y = 1; - tex.dst_sel_z = 2; - tex.dst_sel_w = 3; + tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; + tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; + tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; + tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; tex.src_sel_x = 0; tex.src_sel_y = 1; tex.src_sel_z = 2; @@ -1798,6 +2028,9 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* 1 - src0 */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1877,6 +2110,9 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; @@ -1928,7 +2164,10 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; - + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; + for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -2371,7 +2610,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } /* r6/7 only for now */ -static int tgsi_arl(struct r600_shader_ctx *ctx) +static int tgsi_eg_arl(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.last = 1; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + return 0; +} +static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -2716,7 +2988,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, @@ -2797,7 +3069,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ @@ -2880,7 +3152,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { }; static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, @@ -2955,7 +3227,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 06dd65038d..f8bc595139 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -31,6 +31,8 @@ struct r600_shader_io { unsigned done; int sid; unsigned interpolate; + boolean centroid; + unsigned lds_pos; /* for evergreen */ }; struct r600_shader { @@ -39,11 +41,11 @@ struct r600_shader { boolean flat_shade; unsigned ninput; unsigned noutput; + unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; - boolean use_mem_constant; }; int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 86f9825b52..81d25b5420 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -19,202 +19,767 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse + */ + +/* TODO: + * - fix mask for depth control & cull for query */ #include <stdio.h> #include <errno.h> -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_double_list.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_upload_mgr.h> +#include <util/u_index_modify.h> +#include <util/u_framebuffer.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "r600d.h" #include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" +#include "r600_state_inlines.h" + +static void r600_draw_common(struct r600_drawl *draw) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + unsigned i, j, offset, prim; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct pipe_vertex_buffer *vertex_buffer; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + + switch (draw->index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw->index_size); + return; + } + if (r600_conv_pipe_prim(draw->mode, &prim)) + return; + + + /* rebuild vertex shader if input format changed */ + if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + return; + if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + return; + + for (i = 0 ; i < rctx->vertex_elements->count; i++) { + uint32_t word2, format; + + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + j = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[j]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->elements[i].src_offset + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + + format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + + word2 = format | S_038008_STRIDE(vertex_buffer->stride); + + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); + } -static void clean_flush(struct r600_context *rctx, struct radeon_state *flush); -static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush); -static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush); + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); + /* build late state */ + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&vgt, + R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw->count; + rdraw.vgt_num_instances = 1; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw->index_buffer) { + rbuffer = (struct r600_resource*)draw->index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw->index_buffer_offset; + } + r600_context_draw(&rctx->ctx, &rdraw); +} + +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) +{ + switch (*index_size) { + case 1: + util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + *index_size = 2; + *start = 0; + break; + + case 2: + if (*start % 2 != 0) { + util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); + *start = 0; + } + break; -static struct r600_context_state *r600_new_context_state(unsigned type) + case 4: + break; + } +} + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { - struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_drawl draw; + + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + memset(&draw, 0, sizeof(struct r600_drawl)); + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->index_bias; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + &draw.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + draw.index_buffer_offset = draw.start * draw.index_size; + draw.start = 0; + r600_upload_index_buffer(rctx, &draw); + } else { + draw.index_size = 0; + draw.index_buffer = NULL; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->start; + } + r600_draw_common(&draw); + + pipe_resource_reference(&draw.index_buffer, NULL); +} + +static void r600_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + if (rstate == NULL) - return NULL; - rstate->type = type; - rstate->refcount = 1; - return rstate; + return; + + rstate->id = R600_PIPE_STATE_BLEND_COLOR; + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); + rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); + struct r600_pipe_state *rstate; + u32 color_control, target_mask; - rstate = r600_new_context_state(pipe_blend_type); - rstate->state.blend = *state; - rctx->vtbl->blend(rctx, &rstate->rstate[0], &rstate->state.blend); - + if (blend == NULL) { + return NULL; + } + rstate = &blend->rstate; + + rstate->id = R600_PIPE_STATE_BLEND; + + target_mask = 0; + color_control = S_028808_PER_MRT_BLEND(1); + if (state->logicop_enable) { + color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); + } else { + color_control |= (0xcc << 16); + } + /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ + if (state->independent_blend_enable) { + for (int i = 0; i < 8; i++) { + if (state->rt[i].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (state->rt[i].colormask << (4 * i)); + } + } else { + for (int i = 0; i < 8; i++) { + if (state->rt[0].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (state->rt[0].colormask << (4 * i)); + } + } + blend->cb_target_mask = target_mask; + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + color_control, 0xFFFFFFFF, NULL); + + for (int i = 0; i < 8; i++) { + unsigned eqRGB = state->rt[i].rgb_func; + unsigned srcRGB = state->rt[i].rgb_src_factor; + unsigned dstRGB = state->rt[i].rgb_dst_factor; + + unsigned eqA = state->rt[i].alpha_func; + unsigned srcA = state->rt[i].alpha_src_factor; + unsigned dstA = state->rt[i].alpha_dst_factor; + uint32_t bc = 0; + + if (!state->rt[i].blend_enable) + continue; + + bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); + bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); + bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); + + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + bc |= S_028804_SEPARATE_ALPHA_BLEND(1); + bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); + bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); + bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); + } + + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + if (i == 0) { + r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); + } + } return rstate; } +static void r600_bind_blend_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; + struct r600_pipe_state *rstate; + + if (state == NULL) + return; + rstate = &blend->rstate; + rctx->states[rstate->id] = rstate; + rctx->cb_target_mask = blend->cb_target_mask; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + static void *r600_create_dsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { - struct r600_context_state *rstate; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; + unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; + + if (rstate == NULL) { + return NULL; + } + + rstate->id = R600_PIPE_STATE_DSA; + /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ + /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be + * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will + * be set if shader use texkill instruction + */ + db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + stencil_ref_mask = 0; + stencil_ref_mask_bf = 0; + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | + S_028800_ZFUNC(state->depth.func); + + /* stencil */ + if (state->stencil[0].enabled) { + db_depth_control |= S_028800_STENCIL_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); + db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); + db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + + + stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | + S_028430_STENCILWRITEMASK(state->stencil[0].writemask); + if (state->stencil[1].enabled) { + db_depth_control |= S_028800_BACKFACE_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); + db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); + db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); + } + } + + /* alpha */ + alpha_test_control = 0; + alpha_ref = 0; + if (state->alpha.enabled) { + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); + alpha_ref = fui(state->alpha.ref_value); + } + + /* misc */ + db_render_control = 0; + db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); + /* TODO db_render_override depends on query */ + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, stencil_ref_mask, + 0xFFFFFFFF & C_028430_STENCILREF, NULL); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); + r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); - rstate = r600_new_context_state(pipe_dsa_type); - rstate->state.dsa = *state; return rstate; } static void *r600_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) { - struct r600_context_state *rstate; + struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); + struct r600_pipe_state *rstate; + unsigned tmp; + unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; + + if (rs == NULL) { + return NULL; + } + + rstate = &rs->rstate; + rs->flatshade = state->flatshade; + rs->sprite_coord_enable = state->sprite_coord_enable; + + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + /* offset */ + rs->offset_units = state->offset_units; + rs->offset_scale = state->offset_scale * 12.0f; + + rstate->id = R600_PIPE_STATE_RASTERIZER; + if (state->flatshade_first) + prov_vtx = 0; + tmp = 0x00000001; + if (state->sprite_coord_enable) { + tmp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); + } + } + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + + polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || + state->fill_back != PIPE_POLYGON_MODE_FILL); + r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL, + S_028814_PROVOKING_VTX_LAST(prov_vtx) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | + S_028814_POLY_MODE(polygon_dual_mode) | + S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, + S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + /* point size 12.4 fixed point */ + tmp = (unsigned)(state->point_size * 8.0); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + + tmp = (unsigned)(state->line_width * 8.0); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); - rstate = r600_new_context_state(pipe_rasterizer_type); - rstate->state.rasterizer = *state; return rstate; } +static void r600_bind_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (state == NULL) + return; + + rctx->flatshade = rs->flatshade; + rctx->sprite_coord_enable = rs->sprite_coord_enable; + rctx->rasterizer = rs; + + rctx->states[rs->rstate.id] = &rs->rstate; + r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); +} + +static void r600_delete_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + + if (rctx->rasterizer == rs) { + rctx->rasterizer = NULL; + } + if (rctx->states[rs->rstate.id] == &rs->rstate) { + rctx->states[rs->rstate.id] = NULL; + } + free(rs); +} + static void *r600_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + union util_color uc; + + if (rstate == NULL) { + return NULL; + } - rstate = r600_new_context_state(pipe_sampler_type); - rstate->state.sampler = *state; - rctx->vtbl->sampler(rctx, &rstate->rstate[0], &rstate->state.sampler, 0); - rctx->vtbl->sampler_border(rctx, &rstate->rstate[1], &rstate->state.sampler, 0); + rstate->id = R600_PIPE_STATE_SAMPLER; + util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + /* FIXME LOD it depends on texture base level ... */ + r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + if (uc.ui) { + r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + } return rstate; } -static void r600_remove_sampler_view(struct r600_shader_sampler_states *sampler, - struct r600_context_state *rstate) +static void *r600_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) { - int i, j; - - for (i = 0; i < sampler->nview; i++) { - for (j = 0; j < rstate->nrstate; j++) { - if (sampler->view[i] == &rstate->rstate[j]) - sampler->view[i] = NULL; - } - } + struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + + assert(count < 32); + v->count = count; + v->refcount = 1; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + return v; } + static void r600_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) { - struct r600_context_state *rstate = (struct r600_context_state *)state; - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - /* need to search list of vs/ps sampler views and remove it from any - uggh */ - r600_remove_sampler_view(&rctx->ps_sampler, rstate); - r600_remove_sampler_view(&rctx->vs_sampler, rstate); - r600_context_state_decref(rstate); + pipe_resource_reference(&state->texture, NULL); + FREE(resource); } static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_context_state *rstate; - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_state *rstate; + const struct util_format_description *desc; + struct r600_resource_texture *tmp; + struct r600_resource *rbuffer; + unsigned format; + uint32_t word4 = 0, yuv_format = 0, pitch = 0; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; + struct r600_bo *bo[2]; - rstate = r600_new_context_state(pipe_sampler_view_type); - rstate->state.sampler_view = *state; - rstate->state.sampler_view.texture = NULL; + if (resource == NULL) + return NULL; + rstate = &resource->state; + + /* initialize base object */ + resource->base = *state; + resource->base.texture = NULL; pipe_reference(NULL, &texture->reference); - rstate->state.sampler_view.texture = texture; - rstate->state.sampler_view.reference.count = 1; - rstate->state.sampler_view.context = ctx; - rctx->vtbl->resource(ctx, &rstate->rstate[0], &rstate->state.sampler_view, 0); - return &rstate->state.sampler_view; + resource->base.texture = texture; + resource->base.reference.count = 1; + resource->base.context = ctx; + + swizzle[0] = state->swizzle_r; + swizzle[1] = state->swizzle_g; + swizzle[2] = state->swizzle_b; + swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(state->format, + swizzle, + &word4, &yuv_format); + if (format == ~0) { + format = 0; + } + desc = util_format_description(state->format); + if (desc == NULL) { + R600_ERR("unknow format %d\n", state->format); + } + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + /* FIXME depth texture decompression */ + if (tmp->depth) { + r600_texture_depth_flush(ctx, texture); + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->flushed_depth_texture->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + } + pitch = align(tmp->pitch_in_pixels[0], 8); + if (tmp->tiled) { + array_mode = tmp->array_mode; + tile_type = tmp->tile_type; + } + + /* FIXME properly handle first level != 0 */ + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + S_038000_DIM(r600_tex_dim(texture->target)) | + S_038000_TILE_MODE(array_mode) | + S_038000_TILE_TYPE(tile_type) | + S_038000_PITCH((pitch / 8) - 1) | + S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + S_038004_TEX_HEIGHT(texture->height0 - 1) | + S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | + S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | + S_038010_REQUEST_SIZE(1) | + S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + S_038014_LAST_LEVEL(state->last_level) | + S_038014_BASE_ARRAY(0) | + S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); + + return &resource->base; } -static void r600_set_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views, - struct r600_shader_sampler_states *sampler, - unsigned shader_id) +static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - unsigned i; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - for (i = 0; i < sampler->nview; i++) { - radeon_draw_unbind(&rctx->draw, sampler->view[i]); + for (int i = 0; i < count; i++) { + if (resource[i]) { + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } } +} + +static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + int i; for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)views[i]; - if (rstate) { - rstate->nrstate = 0; + if (&rctx->ps_samplers.views[i]->base != views[i]) { + if (resource[i]) + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + else + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + + pipe_sampler_view_reference( + (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], + views[i]); + } } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)views[i]; - if (rstate) { - if (rstate->nrstate >= R600_MAX_RSTATE) - continue; - if (rstate->nrstate) { - memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); - } - radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, shader_id); - sampler->view[i] = &rstate->rstate[rstate->nrstate]; - rstate->nrstate++; + for (i = count; i < NUM_TEX_UNITS; i++) { + if (rctx->ps_samplers.views[i]) { + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } - sampler->nview = count; + rctx->ps_samplers.n_views = count; } -static void r600_set_ps_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) +static void r600_bind_state(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - r600_set_sampler_view(ctx, count, views, &rctx->ps_sampler, R600_SHADER_PS); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (state == NULL) + return; + rctx->states[rstate->id] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_vs_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) +static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { - struct r600_context *rctx = r600_context(ctx); - r600_set_sampler_view(ctx, count, views, &rctx->vs_sampler, R600_SHADER_VS); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + + memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); + rctx->ps_samplers.n_samplers = count; + + for (int i = 0; i < count; i++) { + r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); + } } -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) +static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - int r; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - rstate = r600_new_context_state(pipe_shader_type); - rstate->state.shader = *state; - r = r600_pipe_shader_create(&rctx->context, rstate, rstate->state.shader.tokens); - if (r) { - r600_context_state_decref(rstate); - return NULL; + for (int i = 0; i < count; i++) { + r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); } - return rstate; } -static void *r600_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) +static void r600_delete_state(struct pipe_context *ctx, void *state) { - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - assert(count < 32); - v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - v->refcount = 1; - return v; + if (rctx->states[rstate->id] == rstate) { + rctx->states[rstate->id] = NULL; + } + for (int i = 0; i < rstate->nregs; i++) { + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + } + free(rstate); } static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) @@ -228,279 +793,475 @@ static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) free(v); } +static void r600_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + + if (rstate == NULL) + return; + + rctx->clip = *state; + rstate->id = R600_PIPE_STATE_CLIP; + for (int i = 0; i < state->nr; i++) { + r600_pipe_state_add_reg(rstate, + R_028E20_PA_CL_UCP0_X + i * 4, + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E24_PA_CL_UCP0_Y + i * 4, + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E28_PA_CL_UCP0_Z + i * 4, + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E2C_PA_CL_UCP0_W + i * 4, + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + } + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, + S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | + S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_CLIP]); + rctx->states[R600_PIPE_STATE_CLIP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = (struct r600_vertex_element*)state; r600_delete_vertex_element(ctx, rctx->vertex_elements); rctx->vertex_elements = v; if (v) { v->refcount++; +// rctx->vs_rebuild = TRUE; } } -static void r600_bind_rasterizer_state(struct pipe_context *ctx, void *state) +static void r600_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; - - if (state == NULL) - return; - rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); - rctx->rasterizer = r600_context_state_incref(rstate); } -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) +static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; - - if (state == NULL) - return; - rctx->blend = r600_context_state_decref(rctx->blend); - rctx->blend = r600_context_state_incref(rstate); - } -static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) +static void r600_set_scissor_state(struct pipe_context *ctx, + const struct pipe_scissor_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tl, br; - if (state == NULL) + if (rstate == NULL) return; - rctx->dsa = r600_context_state_decref(rctx->dsa); - rctx->dsa = r600_context_state_incref(rstate); -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; - - rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); - rctx->ps_shader = r600_context_state_incref(rstate); -} - -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; - rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); - rctx->vs_shader = r600_context_state_incref(rstate); + rstate->id = R600_PIPE_STATE_SCISSOR; + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + r600_pipe_state_add_reg(rstate, + R_028210_PA_SC_CLIPRECT_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028214_PA_SC_CLIPRECT_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028218_PA_SC_CLIPRECT_1_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02821C_PA_SC_CLIPRECT_1_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028220_PA_SC_CLIPRECT_2_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028224_PA_SC_CLIPRECT_2_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028228_PA_SC_CLIPRECT_3_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02822C_PA_SC_CLIPRECT_3_BR, br, + 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_SCISSOR]); + rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_bind_sampler_shader(struct pipe_context *ctx, - unsigned count, void **states, - struct r600_shader_sampler_states *sampler, unsigned shader_id) +static void r600_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - unsigned i; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tmp; - for (i = 0; i < sampler->nsampler; i++) { - radeon_draw_unbind(&rctx->draw, sampler->sampler[i]); - } - for (i = 0; i < sampler->nborder; i++) { - radeon_draw_unbind(&rctx->draw, sampler->border[i]); - } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)states[i]; - if (rstate) { - rstate->nrstate = 0; - } - } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)states[i]; - if (rstate) { - if (rstate->nrstate >= R600_MAX_RSTATE) - continue; - if (rstate->nrstate) { - memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); - memcpy(&rstate->rstate[rstate->nrstate+1], &rstate->rstate[1], sizeof(struct radeon_state)); - } - radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, shader_id); - radeon_state_convert(&rstate->rstate[rstate->nrstate + 1], R600_STATE_SAMPLER_BORDER, i, shader_id); - sampler->sampler[i] = &rstate->rstate[rstate->nrstate]; - sampler->border[i] = &rstate->rstate[rstate->nrstate + 1]; - rstate->nrstate += 2; - } - } - sampler->nsampler = count; - sampler->nborder = count; -} + if (rstate == NULL) + return; -static void r600_bind_ps_sampler(struct pipe_context *ctx, - unsigned count, void **states) -{ - struct r600_context *rctx = r600_context(ctx); - r600_bind_sampler_shader(ctx, count, states, &rctx->ps_sampler, R600_SHADER_PS); + rctx->stencil_ref = *state; + rstate->id = R600_PIPE_STATE_STENCIL_REF; + tmp = S_028430_STENCILREF(state->ref_value[0]); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, tmp, + ~C_028430_STENCILREF, NULL); + tmp = S_028434_STENCILREF_BF(state->ref_value[1]); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, tmp, + ~C_028434_STENCILREF_BF, NULL); + + free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); + rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_bind_vs_sampler(struct pipe_context *ctx, - unsigned count, void **states) +static void r600_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) { - struct r600_context *rctx = r600_context(ctx); - r600_bind_sampler_shader(ctx, count, states, &rctx->vs_sampler, R600_SHADER_VS); -} + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); -static void r600_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_context_state *rstate = (struct r600_context_state *)state; + if (rstate == NULL) + return; - r600_context_state_decref(rstate); + rctx->viewport = *state; + rstate->id = R600_PIPE_STATE_VIEWPORT; + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_VIEWPORT]); + rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *color) +static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state, int cb) { - struct r600_context *rctx = r600_context(ctx); - - rctx->blend_color = *color; + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level = state->cbufs[cb]->level; + unsigned pitch, slice; + unsigned color_info; + unsigned format, swap, ntype; + const struct util_format_description *desc; + struct r600_bo *bo[3]; + + rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + rbuffer = &rtex->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + bo[2] = rbuffer->bo; + + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; + ntype = 0; + desc = util_format_description(rtex->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_0280A0_NUMBER_SRGB; + + format = r600_translate_colorformat(rtex->resource.base.b.format); + swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_0280A0_FORMAT(format) | + S_0280A0_COMP_SWAP(swap) | + S_0280A0_ARRAY_MODE(rtex->array_mode) | + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_NUMBER_TYPE(ntype); + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + color_info |= S_0280A0_SOURCE_FORMAT(1); + + r600_pipe_state_add_reg(rstate, + R_028040_CB_COLOR0_BASE + cb * 4, + (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_0280A0_CB_COLOR0_INFO + cb * 4, + color_info, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_028060_CB_COLOR0_SIZE + cb * 4, + S_028060_PITCH_TILE_MAX(pitch) | + S_028060_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028080_CB_COLOR0_VIEW + cb * 4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0280E0_CB_COLOR0_FRAG + cb * 4, + r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, + R_0280C0_CB_COLOR0_TILE + cb * 4, + r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]); + r600_pipe_state_add_reg(rstate, + R_028100_CB_COLOR0_MASK + cb * 4, + 0x00000000, 0xFFFFFFFF, NULL); } -static void r600_set_clip_state(struct pipe_context *ctx, - const struct pipe_clip_state *state) +static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level; + unsigned pitch, slice, format; - r600_context_state_decref(rctx->clip); + if (state->zsbuf == NULL) + return; - rstate = r600_new_context_state(pipe_clip_type); - rstate->state.clip = *state; - rctx->vtbl->ucp(rctx, &rstate->rstate[0], &rstate->state.clip); - rctx->clip = rstate; + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rtex->tiled = 1; + rtex->array_mode = 2; + rtex->tile_type = 1; + rtex->depth = 1; + rbuffer = &rtex->resource; + + level = state->zsbuf->level; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; + format = r600_translate_dbformat(state->zsbuf->texture->format); + + r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, + S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, + S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), + 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, + (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); } static void r600_set_framebuffer_state(struct pipe_context *ctx, const struct pipe_framebuffer_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - int i; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 shader_mask, tl, br, shader_control, target_mask; - if (rctx->framebuffer) { - for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) - radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[i+1]); - radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[0]); - } - clean_flush(rctx, &rctx->hw_states.cb_flush); - clean_flush(rctx, &rctx->hw_states.db_flush); - rctx->pframebuffer = NULL; - r600_context_state_decref(rctx->framebuffer); + if (rstate == NULL) + return; - rstate = r600_new_context_state(pipe_framebuffer_type); - rstate->state.framebuffer = *state; - for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { - pipe_reference(NULL, &state->cbufs[i]->reference); - } - pipe_reference(NULL, &state->zsbuf->reference); - rctx->framebuffer = rstate; - rctx->pframebuffer = &rstate->state.framebuffer; - for (i = 0; i < state->nr_cbufs; i++) { - rctx->vtbl->cb(rctx, &rstate->rstate[i+1], state, i); + /* unreference old buffer and reference new one */ + rstate->id = R600_PIPE_STATE_FRAMEBUFFER; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + + rctx->pframebuffer = &rctx->framebuffer; + + /* build states */ + for (int i = 0; i < state->nr_cbufs; i++) { + r600_cb(rctx, rstate, state, i); } if (state->zsbuf) { - rctx->vtbl->db(rctx, &rstate->rstate[0], state); + r600_db(rctx, rstate, state); } - /* setup flush states */ - setup_cb_flush(rctx, &rctx->hw_states.cb_flush); - setup_db_flush(rctx, &rctx->hw_states.db_flush); - - return; -} - -static void r600_set_polygon_stipple(struct pipe_context *ctx, - const struct pipe_poly_stipple *state) -{ -} - -static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) -{ -} -static void r600_set_scissor_state(struct pipe_context *ctx, - const struct pipe_scissor_state *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - - r600_context_state_decref(rctx->scissor); + target_mask = 0x00000000; + target_mask = 0xFFFFFFFF; + shader_mask = 0; + shader_control = 0; + for (int i = 0; i < state->nr_cbufs; i++) { + target_mask ^= 0xf << (i * 4); + shader_mask |= 0xf << (i * 4); + shader_control |= 1 << i; + } + tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); + + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028244_PA_SC_GENERIC_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + if (rctx->family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); + } - rstate = r600_new_context_state(pipe_scissor_type); - rstate->state.scissor = *state; - rctx->scissor = rstate; + r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, + shader_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, + 0x00000000, target_mask, NULL); + r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, + shader_mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL, + 0x01000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST, + 0x000000FF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK, + 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK, + 0xFFFFFFFF, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); + rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_stencil_ref(struct pipe_context *ctx, - const struct pipe_stencil_ref *state) +static void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - r600_context_state_decref(rctx->stencil_ref); + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } - rstate = r600_new_context_state(pipe_stencil_ref_type); - rstate->state.stencil_ref = *state; - rctx->stencil_ref = rstate; + /* TODO make this more like a state */ } -static void r600_set_vertex_buffers(struct pipe_context *ctx, - unsigned count, +static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { - struct r600_context *rctx = r600_context(ctx); - unsigned i; - boolean any_user_buffers = FALSE; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - for (i = 0; i < rctx->nvertex_buffer; i++) { + for (int i = 0; i < rctx->nvertex_buffer; i++) { pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); } memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (i = 0; i < count; i++) { + for (int i = 0; i < count; i++) { rctx->vertex_buffer[i].buffer = NULL; if (r600_buffer_is_user_buffer(buffers[i].buffer)) - any_user_buffers = TRUE; + rctx->any_user_vbs = TRUE; pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); } - rctx->any_user_vbs = any_user_buffers; rctx->nvertex_buffer = count; } -static void r600_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) +static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } +} - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); +static void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + int r; + + r = r600_pipe_shader_create(ctx, shader, state->tokens); + if (r) { + return NULL; } + return shader; +} - /* TODO make this more like a state */ +static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->ps_shader = (struct r600_pipe_shader *)state; } -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) +static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->vs_shader = (struct r600_pipe_shader *)state; +} - r600_context_state_decref(rctx->viewport); +static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - rstate = r600_new_context_state(pipe_viewport_type); - rstate->state.viewport = *state; - rctx->vtbl->viewport(rctx, &rstate->rstate[0], &rstate->state.viewport); - rctx->viewport = rstate; + if (rctx->ps_shader == shader) { + rctx->ps_shader = NULL; + } + /* TODO proper delete */ + free(shader); } -void r600_init_state_functions(struct r600_context *rctx) +static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->vs_shader == shader) { + rctx->vs_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; @@ -511,30 +1272,23 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.create_vertex_elements_state = r600_create_vertex_elements; rctx->context.create_vs_state = r600_create_shader_state; rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; rctx->context.bind_fs_state = r600_bind_ps_shader; - rctx->context.bind_rasterizer_state = r600_bind_rasterizer_state; + rctx->context.bind_rasterizer_state = r600_bind_rs_state; rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; rctx->context.bind_vs_state = r600_bind_vs_shader; rctx->context.delete_blend_state = r600_delete_state; rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.delete_fs_state = r600_delete_state; - rctx->context.delete_rasterizer_state = r600_delete_state; + rctx->context.delete_fs_state = r600_delete_ps_shader; + rctx->context.delete_rasterizer_state = r600_delete_rs_state; rctx->context.delete_sampler_state = r600_delete_state; rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; - rctx->context.delete_vs_state = r600_delete_state; + rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = r600_set_blend_color; rctx->context.set_clip_state = r600_set_clip_state; - - if (radeon_get_family_class(rctx->rw) == EVERGREEN) - rctx->context.set_constant_buffer = eg_set_constant_buffer; - else if (rctx->screen->use_mem_constant) - rctx->context.set_constant_buffer = r600_set_constant_buffer_mem; - else - rctx->context.set_constant_buffer = r600_set_constant_buffer_file; - + rctx->context.set_constant_buffer = r600_set_constant_buffer; rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; rctx->context.set_framebuffer_state = r600_set_framebuffer_state; rctx->context.set_polygon_stipple = r600_set_polygon_stipple; @@ -548,174 +1302,291 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.sampler_view_destroy = r600_sampler_view_destroy; } -struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate) -{ - if (rstate == NULL) - return NULL; - rstate->refcount++; - return rstate; -} - -struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate) +void r600_init_config(struct r600_pipe_context *rctx) { - unsigned i; - - if (rstate == NULL) - return NULL; - if (--rstate->refcount) - return NULL; - switch (rstate->type) { - case pipe_sampler_view_type: - pipe_resource_reference(&rstate->state.sampler_view.texture, NULL); - break; - case pipe_framebuffer_type: - for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL); - radeon_state_fini(&rstate->rstate[i+1]); - } - pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL); + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + enum radeon_family family; + struct r600_pipe_state *rstate = &rctx->config; + u32 tmp; + + family = r600_get_family(rctx->radeon); + ps_prio = 0; + vs_prio = 1; + gs_prio = 2; + es_prio = 3; + switch (family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; break; - case pipe_viewport_type: - case pipe_depth_type: - case pipe_rasterizer_type: - case pipe_poly_stipple_type: - case pipe_scissor_type: - case pipe_clip_type: - case pipe_stencil_type: - case pipe_alpha_type: - case pipe_dsa_type: - case pipe_blend_type: - case pipe_stencil_ref_type: - case pipe_shader_type: - case pipe_sampler_type: + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: default: - R600_ERR("invalid type %d\n", rstate->type); - return NULL; - } - radeon_state_fini(&rstate->rstate[0]); - FREE(rstate); - return NULL; -} - -static void r600_bind_shader_sampler(struct r600_context *rctx, struct r600_shader_sampler_states *sampler) -{ - int i; - - for (i = 0; i < sampler->nsampler; i++) { - if (sampler->sampler[i]) - radeon_draw_bind(&rctx->draw, sampler->sampler[i]); - } - - for (i = 0; i < sampler->nborder; i++) { - if (sampler->border[i]) - radeon_draw_bind(&rctx->draw, sampler->border[i]); - } - - for (i = 0; i < sampler->nview; i++) { - if (sampler->view[i]) - radeon_draw_bind(&rctx->draw, sampler->view[i]); - } -} - -static void clean_flush(struct r600_context *rctx, struct radeon_state *flush) -{ - struct r600_screen *rscreen = rctx->screen; - int i; - - for (i = 0 ; i < flush->nbo; i++) { - radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], NULL); + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; } - flush->nbo = 0; - radeon_state_fini(flush); -} -static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct pipe_surface *surf; - int i; - - radeon_state_init(flush, rscreen->rw, R600_STATE_CB_FLUSH, 0, 0); + rstate->id = R600_PIPE_STATE_CONFIG; - for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { - surf = rctx->framebuffer->state.framebuffer.cbufs[i]; - - rtex = (struct r600_resource_texture*)surf->texture; - rbuffer = &rtex->resource; - /* just need to the bo to the flush list */ - radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], rbuffer->bo); - flush->placement[i] = RADEON_GEM_DOMAIN_VRAM; + /* SQ_CONFIG */ + tmp = 0; + switch (family) { + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV710: + break; + default: + tmp |= S_008C00_VC_ENABLE(1); + break; } - flush->nbo = rctx->framebuffer->state.framebuffer.nr_cbufs; - return radeon_state_pm4(flush); -} - -static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct pipe_surface *surf; - - surf = rctx->framebuffer->state.framebuffer.zsbuf; - - radeon_state_init(flush, rscreen->rw, R600_STATE_DB_FLUSH, 0, 0); - - if (surf) { - rtex = (struct r600_resource_texture*)surf->texture; - rbuffer = &rtex->resource; - /* just need to the bo to the flush list */ - radeon_ws_bo_reference(rscreen->rw, &flush->bo[0], rbuffer->bo); - flush->placement[0] = RADEON_GEM_DOMAIN_VRAM; - - flush->nbo = 1; + tmp |= S_008C00_DX9_CONSTS(0); + tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1); + tmp |= S_008C00_PS_PRIO(ps_prio); + tmp |= S_008C00_VS_PRIO(vs_prio); + tmp |= S_008C00_GS_PRIO(gs_prio); + tmp |= S_008C00_ES_PRIO(es_prio); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + + /* SQ_GPR_RESOURCE_MGMT_1 */ + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + /* SQ_GPR_RESOURCE_MGMT_2 */ + tmp = 0; + tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + /* SQ_THREAD_RESOURCE_MGMT */ + tmp = 0; + tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + + /* SQ_STACK_RESOURCE_MGMT_1 */ + tmp = 0; + tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + /* SQ_STACK_RESOURCE_MGMT_2 */ + tmp = 0; + tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); + + if (family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); + } else { + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); } - return radeon_state_pm4(flush); + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, rstate); } -int r600_context_hw_states(struct pipe_context *ctx) +void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) { - struct r600_context *rctx = r600_context(ctx); - unsigned i; - - /* build new states */ - rctx->vtbl->rasterizer(rctx, &rctx->hw_states.rasterizer); - rctx->vtbl->scissor(rctx, &rctx->hw_states.scissor); - rctx->vtbl->dsa(rctx, &rctx->hw_states.dsa); - rctx->vtbl->cb_cntl(rctx, &rctx->hw_states.cb_cntl); - - /* bind states */ - radeon_draw_bind(&rctx->draw, &rctx->config); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.rasterizer); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.scissor); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.dsa); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_cntl); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - - if (rctx->viewport) { - radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]); - } - if (rctx->blend) { - radeon_draw_bind(&rctx->draw, &rctx->blend->rstate[0]); - } - if (rctx->clip) { - radeon_draw_bind(&rctx->draw, &rctx->clip->rstate[0]); - } - for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { - radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[i+1]); - } - if (rctx->framebuffer->state.framebuffer.zsbuf) { - radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[0]); + struct pipe_depth_stencil_alpha_state dsa; + struct r600_pipe_state *rstate; + boolean quirk = false; + + if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + quirk = true; + + memset(&dsa, 0, sizeof(dsa)); + + if (quirk) { + dsa.depth.enabled = 1; + dsa.depth.func = PIPE_FUNC_LEQUAL; + dsa.stencil[0].enabled = 1; + dsa.stencil[0].func = PIPE_FUNC_ALWAYS; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR; + dsa.stencil[0].writemask = 0xff; } - r600_bind_shader_sampler(rctx, &rctx->vs_sampler); - r600_bind_shader_sampler(rctx, &rctx->ps_sampler); - - return 0; + rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + 0x0, + S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + r600_pipe_state_add_reg(rstate, + R_028D0C_DB_RENDER_CONTROL, + S_028D0C_DEPTH_COPY_ENABLE(1) | + S_028D0C_STENCIL_COPY_ENABLE(1) | + S_028D0C_COPY_CENTROID(1), + S_028D0C_DEPTH_COPY_ENABLE(1) | + S_028D0C_STENCIL_COPY_ENABLE(1) | + S_028D0C_COPY_CENTROID(1), NULL); + return rstate; } diff --git a/src/gallium/drivers/r600/r600_state2.c b/src/gallium/drivers/r600/r600_state2.c deleted file mode 100644 index 153780594e..0000000000 --- a/src/gallium/drivers/r600/r600_state2.c +++ /dev/null @@ -1,2490 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* TODO: - * - fix mask for depth control & cull for query - */ -#include <stdio.h> -#include <errno.h> -#include <pipe/p_defines.h> -#include <pipe/p_state.h> -#include <pipe/p_context.h> -#include <tgsi/tgsi_scan.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_util.h> -#include <util/u_blitter.h> -#include <util/u_double_list.h> -#include <util/u_transfer.h> -#include <util/u_surface.h> -#include <util/u_pack_color.h> -#include <util/u_memory.h> -#include <util/u_inlines.h> -#include <util/u_upload_mgr.h> -#include <util/u_index_modify.h> -#include <pipebuffer/pb_buffer.h> -#include "r600.h" -#include "r600d.h" -#include "r700_sq.h" -struct radeon_state { - unsigned dummy; -}; -#include "r600_resource.h" -#include "r600_shader.h" -#include "r600_pipe.h" -#include "r600_state_inlines.h" - -/* r600_shader.c */ -static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned spi_vs_out_id[10]; - unsigned i, tmp; - - /* clear previous register */ - rstate->nregs = 0; - - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 10; i++) { - spi_vs_out_id[i] = 0; - } - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - spi_vs_out_id[i / 4] |= tmp; - } - for (i = 0; i < 10; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); - } - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028868_SQ_PGM_RESOURCES_VS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028858_SQ_PGM_START_VS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028894_SQ_PGM_START_FS, - 0x00000000, 0xFFFFFFFF, shader->bo); -} - -int r600_find_vs_semantic_index2(struct r600_shader *vs, - struct r600_shader *ps, int id) -{ - struct r600_shader_io *input = &ps->input[id]; - - for (int i = 0; i < vs->noutput; i++) { - if (input->name == vs->output[i].name && - input->sid == vs->output[i].sid) { - return i - 1; - } - } - return 0; -} - -static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE, have_face = FALSE; - - /* clear previous register */ - rstate->nregs = 0; - - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index2(&rctx->vs_shader->shader, rshader, i)); - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - for (i = 0; i < rshader->noutput; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; - } - } - exports_ps |= S_028854_EXPORT_COLORS(num_cout); - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - spi_input_z |= 1; - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028840_SQ_PGM_START_PS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028850_SQ_PGM_RESOURCES_PS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); - - if (rshader->uses_kill) { - /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02880C_DB_SHADER_CONTROL, - S_02880C_KILL_ENABLE(1), - S_02880C_KILL_ENABLE(1), NULL); - } -} - -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *rshader = &shader->shader; - void *ptr; - - /* copy new shader */ - if (shader->bo == NULL) { - shader->bo = radeon_ws_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); - if (shader->bo == NULL) { - return -ENOMEM; - } - ptr = radeon_ws_bo_map(rctx->radeon, shader->bo, 0, NULL); - memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); - radeon_ws_bo_unmap(rctx->radeon, shader->bo); - } - /* build state */ - rshader->flat_shade = rctx->flatshade; - switch (rshader->processor_type) { - case TGSI_PROCESSOR_VERTEX: - if (rshader->family >= CHIP_CEDAR) { - evergreen_pipe_shader_vs(ctx, shader); - } else { - r600_pipe_shader_vs(ctx, shader); - } - break; - case TGSI_PROCESSOR_FRAGMENT: - if (rshader->family >= CHIP_CEDAR) { - evergreen_pipe_shader_ps(ctx, shader); - } else { - r600_pipe_shader_ps(ctx, shader); - } - break; - default: - return -EINVAL; - } - r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); - return 0; -} - -static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *shader = &rshader->shader; - const struct util_format_description *desc; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - struct r600_bc *bc = &shader->bc; - struct r600_bc_cf *cf; - struct r600_bc_vtx *vtx; - - if (shader->processor_type != TGSI_PROCESSOR_VERTEX) - return 0; - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; - } - radeon_ws_bo_reference(rctx->radeon, &rshader->bo, NULL); - LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { - switch (cf->inst) { - case V_SQ_CF_WORD1_SQ_CF_INST_VTX: - case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - desc = util_format_description(resource_format[vtx->buffer_id]); - if (desc == NULL) { - R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); - return -EINVAL; - } - vtx->dst_sel_x = desc->swizzle[0]; - vtx->dst_sel_y = desc->swizzle[1]; - vtx->dst_sel_z = desc->swizzle[2]; - vtx->dst_sel_w = desc->swizzle[3]; - } - break; - default: - break; - } - } - return r600_bc_build(&shader->bc); -} - -int r600_pipe_shader_update2(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - - if (shader == NULL) - return -EINVAL; - /* there should be enough input */ - if (rctx->vertex_elements->count < shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, shader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, shader); - if (r) - return r; - return r600_pipe_shader(ctx, shader); -} - -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); -int r600_pipe_shader_create2(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); - shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); - if (r) { - R600_ERR("translation from TGSI failed !\n"); - return r; - } - r = r600_bc_build(&shader->shader.bc); - if (r) { - R600_ERR("building bytecode failed !\n"); - return r; - } -//fprintf(stderr, "______________________________________________________________\n"); - return 0; -} -/* r600_shader.c END */ - -static const char* r600_get_vendor(struct pipe_screen* pscreen) -{ - return "X.Org"; -} - -static const char* r600_get_name(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - enum radeon_family family = r600_get_family(rscreen->radeon); - - if (family >= CHIP_R600 && family < CHIP_RV770) - return "R600 (HD2XXX,HD3XXX)"; - else - return "R700 (HD4XXX)"; -} - -static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_TWO_SIDED_STENCIL: - case PIPE_CAP_GLSL: - case PIPE_CAP_DUAL_SOURCE_BLEND: - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_TEXTURE_SHADOW_MAP: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_SM3: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - case PIPE_CAP_DEPTH_CLAMP: - return 1; - - /* Unsupported features (boolean caps). */ - case PIPE_CAP_TIMER_QUERY: - case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - return 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* FIXME allow this once infrastructure is there */ - return 0; - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - /* FIXME some r6xx are buggy and can only do 4 */ - return 8; - - /* Fragment coordinate conventions. */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - - default: - R600_ERR("r600: unknown param %d\n", param); - return 0; - } -} - -static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - R600_ERR("r600: unsupported paramf %d\n", param); - return 0.0f; - } -} - -static boolean r600_is_format_supported(struct pipe_screen* screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned usage, - unsigned geom_flags) -{ - unsigned retval = 0; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - R600_ERR("r600: unsupported texture type %d\n", target); - return FALSE; - } - - /* Multisample */ - if (sample_count > 1) - return FALSE; - - if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(format)) { - retval |= PIPE_BIND_SAMPLER_VIEW; - } - - if ((usage & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && - r600_is_colorbuffer_format_supported(format)) { - retval |= usage & - (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED); - } - - if ((usage & PIPE_BIND_DEPTH_STENCIL) && - r600_is_zs_format_supported(format)) { - retval |= PIPE_BIND_DEPTH_STENCIL; - } - - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) - retval |= PIPE_BIND_VERTEX_BUFFER; - - if (usage & PIPE_BIND_TRANSFER_READ) - retval |= PIPE_BIND_TRANSFER_READ; - if (usage & PIPE_BIND_TRANSFER_WRITE) - retval |= PIPE_BIND_TRANSFER_WRITE; - - return retval == usage; -} - -static void r600_destroy_screen(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - - if (rscreen == NULL) - return; - FREE(rscreen); -} - -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -static void r600_draw_common(struct r600_drawl *draw) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - unsigned i, j, offset, format, prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct pipe_vertex_buffer *vertex_buffer; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - - switch (draw->index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw->index_size); - return; - } - if (r600_conv_pipe_prim(draw->mode, &prim)) - return; - - - /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update2(&rctx->context, rctx->vs_shader)) - return; - if (r600_pipe_shader_update2(&rctx->context, rctx->ps_shader)) - return; - - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - unsigned num_format = 0, format_comp = 0; - - rstate = &rctx->vs_resource[i]; - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - r600_translate_vertex_num_format(rctx->vertex_elements->elements[i].src_format, &num_format, &format_comp); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, - R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(vertex_buffer->stride) | - S_038008_DATA_FORMAT(format) | - S_038008_NUM_FORMAT_ALL(num_format) | - S_038008_FORMAT_COMP_ALL(format_comp), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); - } - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONFIG, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - /* build late state */ - if (rctx->rasterizer && rctx->framebuffer.zsbuf) { - float offset_units = rctx->rasterizer->offset_units; - unsigned offset_db_fmt_cntl = 0, depth; - - switch (rctx->framebuffer.zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - return; - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, - R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, - R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, - R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, - R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, - R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw->count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw->index_buffer_offset; - } - r600_context_draw(&rctx->ctx, &rdraw); -} - -void r600_translate_index_buffer2(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) -{ - switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); - *index_size = 2; - *start = 0; - break; - - case 2: - if (*start % 2 != 0) { - util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); - *start = 0; - } - break; - - case 4: - break; - } -} - -static void r600_draw_vbo2(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw; - - assert(info->index_bias == 0); - - if (rctx->any_user_vbs) { - r600_upload_user_buffers2(rctx); - rctx->any_user_vbs = FALSE; - } - - memset(&draw, 0, sizeof(struct r600_drawl)); - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer2(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - draw.index_buffer = rctx->index_buffer.buffer; - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer2(rctx, &draw); - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; - } - r600_draw_common(&draw); -} - -static void r600_flush2(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; -#if 0 - static int dc = 0; - char dname[256]; -#endif - - if (!rctx->ctx.pm4_cdwords) - return; - - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 20) { - r600_context_dump_bof(&rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif - r600_context_flush(&rctx->ctx); -} - -static void r600_destroy_context(struct pipe_context *context) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; - - r600_context_fini(&rctx->ctx); - for (int i = 0; i < R600_PIPE_NSTATES; i++) { - free(rctx->states[i]); - } - - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); - - FREE(rctx); -} - -static void r600_blitter_save_states(struct pipe_context *ctx) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); - util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); - if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { - util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); - } - util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]); - util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); - util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); - util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); - if (rctx->states[R600_PIPE_STATE_VIEWPORT]) { - util_blitter_save_viewport(rctx->blitter, &rctx->viewport); - } - if (rctx->states[R600_PIPE_STATE_CLIP]) { - util_blitter_save_clip(rctx->blitter, &rctx->clip); - } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - - rctx->vertex_elements = NULL; - - /* TODO queries */ -} - -int r600_blit_uncompress_depth2(struct pipe_context *ctx, struct r600_resource_texture *texture) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state fb = *rctx->pframebuffer; - struct pipe_surface *zsurf, *cbsurf; - int level = 0; - float depth = 1.0f; - - for (int i = 0; i < fb.nr_cbufs; i++) { - fb.cbufs[i] = NULL; - pipe_surface_reference(&fb.cbufs[i], rctx->pframebuffer->cbufs[i]); - } - fb.zsbuf = NULL; - pipe_surface_reference(&fb.zsbuf, rctx->pframebuffer->zsbuf); - - zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, - PIPE_BIND_DEPTH_STENCIL); - - cbsurf = ctx->screen->get_tex_surface(ctx->screen, texture->flushed_depth_texture, 0, level, 0, - PIPE_BIND_RENDER_TARGET); - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, &fb); - - if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || - rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) - depth = 0.0f; - - util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); - - pipe_surface_reference(&zsurf, NULL); - pipe_surface_reference(&cbsurf, NULL); - for (int i = 0; i < fb.nr_cbufs; i++) { - pipe_surface_reference(&fb.cbufs[i], NULL); - } - pipe_surface_reference(&fb.zsbuf, NULL); - - return 0; -} - -static void r600_clear(struct pipe_context *ctx, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_clear(rctx->blitter, fb->width, fb->height, - fb->nr_cbufs, buffers, rgba, depth, - stencil); -} - -static void r600_clear_render_target(struct pipe_context *ctx, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - util_blitter_save_framebuffer(rctx->blitter, fb); - util_blitter_clear_render_target(rctx->blitter, dst, rgba, - dstx, dsty, width, height); -} - -static void r600_clear_depth_stencil(struct pipe_context *ctx, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - util_blitter_save_framebuffer(rctx->blitter, fb); - util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, - dstx, dsty, width, height); -} - - -static void r600_resource_copy_region(struct pipe_context *ctx, - struct pipe_resource *dst, - struct pipe_subresource subdst, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) -{ - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); -} - -static void r600_init_blit_functions2(struct r600_pipe_context *rctx) -{ - rctx->context.clear = r600_clear; - rctx->context.clear_render_target = r600_clear_render_target; - rctx->context.clear_depth_stencil = r600_clear_depth_stencil; - rctx->context.resource_copy_region = r600_resource_copy_region; -} - -static void r600_init_context_resource_functions2(struct r600_pipe_context *r600) -{ - r600->context.get_transfer = u_get_transfer_vtbl; - r600->context.transfer_map = u_transfer_map_vtbl; - r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; - r600->context.transfer_unmap = u_transfer_unmap_vtbl; - r600->context.transfer_destroy = u_transfer_destroy_vtbl; - r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; -} - -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); - free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); - rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void *r600_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) -{ - struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); - struct r600_pipe_state *rstate; - u32 color_control, target_mask; - - if (blend == NULL) { - return NULL; - } - rstate = &blend->rstate; - - rstate->id = R600_PIPE_STATE_BLEND; - - target_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); - if (state->logicop_enable) { - color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ - if (state->independent_blend_enable) { - for (int i = 0; i < 8; i++) { - if (state->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (state->rt[i].colormask << (4 * i)); - } - } else { - for (int i = 0; i < 8; i++) { - if (state->rt[0].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (state->rt[0].colormask << (4 * i)); - } - } - blend->cb_target_mask = target_mask; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); - - for (int i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028804_SEPARATE_ALPHA_BLEND(1); - bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); - if (i == 0) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); - } - } - return rstate; -} - -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; - struct r600_pipe_state *rstate; - - if (state == NULL) - return; - rstate = &blend->rstate; - rctx->states[rstate->id] = rstate; - rctx->cb_target_mask = blend->cb_target_mask; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void *r600_create_dsa_state(struct pipe_context *ctx, - const struct pipe_depth_stencil_alpha_state *state) -{ - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - - if (rstate == NULL) { - return NULL; - } - - rstate->id = R600_PIPE_STATE_DSA; - /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ - db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - - /* stencil */ - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - } - } - - /* alpha */ - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - /* misc */ - db_render_control = 0; - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); - - return rstate; -} - -static void *r600_create_rs_state(struct pipe_context *ctx, - const struct pipe_rasterizer_state *state) -{ - struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); - struct r600_pipe_state *rstate; - unsigned tmp; - unsigned prov_vtx = 1, polygon_dual_mode; - - if (rs == NULL) { - return NULL; - } - - rstate = &rs->rstate; - rs->flatshade = state->flatshade; - rs->sprite_coord_enable = state->sprite_coord_enable; - - /* offset */ - rs->offset_units = state->offset_units; - rs->offset_scale = state->offset_scale * 12.0f; - - rstate->id = R600_PIPE_STATE_RASTERIZER; - if (state->flatshade_first) - prov_vtx = 0; - tmp = 0x00000001; - if (state->sprite_coord_enable) { - tmp |= S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); - - polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || - state->fill_back != PIPE_POLYGON_MODE_FILL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028814_PA_SU_SC_MODE_CNTL, - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | - S_028814_POLY_MODE(polygon_dual_mode) | - S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02881C_PA_CL_VS_OUT_CNTL, - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); - return rstate; -} - -static void r600_bind_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (state == NULL) - return; - - rctx->flatshade = rs->flatshade; - rctx->sprite_coord_enable = rs->sprite_coord_enable; - rctx->rasterizer = rs; - - rctx->states[rs->rstate.id] = &rs->rstate; - r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); -} - -static void r600_delete_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - - if (rctx->rasterizer == rs) { - rctx->rasterizer = NULL; - } - if (rctx->states[rs->rstate.id] == &rs->rstate) { - rctx->states[rs->rstate.id] = NULL; - } - free(rs); -} - -static void *r600_create_sampler_state(struct pipe_context *ctx, - const struct pipe_sampler_state *state) -{ - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - union util_color uc; - - if (rstate == NULL) { - return NULL; - } - - rstate->id = R600_PIPE_STATE_SAMPLER; - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C000_SQ_TEX_SAMPLER_WORD0_0, - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); - /* FIXME LOD it depends on texture base level ... */ - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C004_SQ_TEX_SAMPLER_WORD1_0, - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); - if (uc.ui) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); - } - return rstate; -} - -static void *r600_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - - assert(count < 32); - v->count = count; - v->refcount = 1; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - - pipe_resource_reference(&state->texture, NULL); - FREE(resource); -} - -static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); - struct r600_pipe_state *rstate; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; - struct radeon_ws_bo *bo[2]; - - if (resource == NULL) - return NULL; - rstate = &resource->state; - - /* initialize base object */ - resource->base = *state; - resource->base.texture = NULL; - pipe_reference(NULL, &texture->reference); - resource->base.texture = texture; - resource->base.reference.count = 1; - resource->base.context = ctx; - - swizzle[0] = state->swizzle_r; - swizzle[1] = state->swizzle_g; - swizzle[2] = state->swizzle_b; - swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - format = 0; - } - desc = util_format_description(texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", texture->format); - } - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { - r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - } - pitch = align(tmp->pitch[0] / tmp->bpt, 8); - - /* FIXME properly handle first level != 0 */ - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, - S_038000_DIM(r600_tex_dim(texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | - S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038008_RESOURCE0_WORD2, - tmp->offset[0] >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3, - tmp->offset[1] >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4, - word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | - S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5, - S_038014_LAST_LEVEL(state->last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6, - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); - - return &resource->base; -} - -static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ - /* TODO */ - assert(1); -} - -static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - - for (int i = 0; i < count; i++) { - if (resource[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); - } - } -} - -static void r600_bind_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (state == NULL) - return; - rctx->states[rstate->id] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - - for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); - } -} - -static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - - /* TODO implement */ - for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); - } -} - -static void r600_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (rctx->states[rstate->id] == rstate) { - rctx->states[rstate->id] = NULL; - } - for (int i = 0; i < rstate->nregs; i++) { - radeon_ws_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); - } - free(rstate); -} - -static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) -{ - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); -} - -static void r600_set_clip_state(struct pipe_context *ctx, - const struct pipe_clip_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rctx->clip = *state; - rstate->id = R600_PIPE_STATE_CLIP; - for (int i = 0; i < state->nr; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E20_PA_CL_UCP0_X + i * 4, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E24_PA_CL_UCP0_Y + i * 4, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E28_PA_CL_UCP0_Z + i * 4, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E2C_PA_CL_UCP0_W + i * 4, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL, - S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | - S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_CLIP]); - rctx->states[R600_PIPE_STATE_CLIP] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - r600_delete_vertex_element(ctx, rctx->vertex_elements); - rctx->vertex_elements = v; - if (v) { - v->refcount++; -// rctx->vs_rebuild = TRUE; - } -} - -static void r600_set_polygon_stipple(struct pipe_context *ctx, - const struct pipe_poly_stipple *state) -{ -} - -static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) -{ -} - -static void r600_set_scissor_state(struct pipe_context *ctx, - const struct pipe_scissor_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 tl, br; - - if (rstate == NULL) - return; - - rstate->id = R600_PIPE_STATE_SCISSOR; - tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - if (rctx->family >= CHIP_RV770) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); - } - - free(rctx->states[R600_PIPE_STATE_SCISSOR]); - rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_stencil_ref(struct pipe_context *ctx, - const struct pipe_stencil_ref *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 tmp; - - if (rstate == NULL) - return; - - rctx->stencil_ref = *state; - rstate->id = R600_PIPE_STATE_STENCIL_REF; - tmp = S_028430_STENCILREF(state->ref_value[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); - tmp = S_028434_STENCILREF_BF(state->ref_value[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); - - free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); - rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rctx->viewport = *state; - rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_VIEWPORT]); - rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - struct radeon_ws_bo *bo[3]; - - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - bo[2] = rbuffer->bo; - - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_0280A0_SOURCE_FORMAT(1); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028040_CB_COLOR0_BASE + cb * 4, - state->cbufs[cb]->offset >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280A0_CB_COLOR0_INFO + cb * 4, - color_info, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028060_CB_COLOR0_SIZE + cb * 4, - S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028080_CB_COLOR0_VIEW + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280E0_CB_COLOR0_FRAG + cb * 4, - 0x00000000, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280C0_CB_COLOR0_TILE + cb * 4, - 0x00000000, 0xFFFFFFFF, bo[2]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028100_CB_COLOR0_MASK + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); -} - -static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02800C_DB_DEPTH_BASE, - state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028000_DB_DEPTH_SIZE, - S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028010_DB_DEPTH_INFO, - S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), - 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D34_DB_PREFETCH_LIMIT, - (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); -} - -static void r600_set_framebuffer_state(struct pipe_context *ctx, - const struct pipe_framebuffer_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 shader_mask, tl, br, shader_control, target_mask; - - if (rstate == NULL) - return; - - /* unreference old buffer and reference new one */ - rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); - } - for (int i = 0; i < state->nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; - rctx->pframebuffer = &rctx->framebuffer; - - /* build states */ - for (int i = 0; i < state->nr_cbufs; i++) { - r600_cb(rctx, rstate, state, i); - } - if (state->zsbuf) { - r600_db(rctx, rstate, state); - } - - target_mask = 0x00000000; - target_mask = 0xFFFFFFFF; - shader_mask = 0; - shader_control = 0; - for (int i = 0; i < state->nr_cbufs; i++) { - target_mask ^= 0xf << (i * 4); - shader_mask |= 0xf << (i * 4); - shader_control |= 1 << i; - } - tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0287A0_CB_SHADER_CONTROL, - shader_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C30_CB_CLRCMP_CONTROL, - 0x01000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C34_CB_CLRCMP_SRC, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C38_CB_CLRCMP_DST, - 0x000000FF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C3C_CB_CLRCMP_MSK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C48_PA_SC_AA_MASK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); - rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); - } - - /* TODO make this more like a state */ -} - -static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (int i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - } - rctx->nvertex_buffer = count; -} - -static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate; - struct pipe_transfer *transfer; - unsigned *nconst = NULL; - u32 *ptr, offset; - - switch (shader) { - case PIPE_SHADER_VERTEX: - rstate = rctx->vs_const; - nconst = &rctx->vs_nconst; - offset = R_030000_SQ_ALU_CONSTANT0_0 + 0x1000; - break; - case PIPE_SHADER_FRAGMENT: - rstate = rctx->ps_const; - nconst = &rctx->ps_nconst; - offset = R_030000_SQ_ALU_CONSTANT0_0; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - if (buffer && buffer->width0 > 0) { - *nconst = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (int i = 0; i < *nconst; i++, offset += 0x10) { - rstate[i].nregs = 0; - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x0, ptr[i * 4 + 0], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x4, ptr[i * 4 + 1], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x8, ptr[i * 4 + 2], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0xC, ptr[i * 4 + 3], 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &rstate[i]); - } - pipe_buffer_unmap(ctx, buffer, transfer); - } -} - -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); - int r; - - r = r600_pipe_shader_create2(ctx, shader, state->tokens); - if (r) { - return NULL; - } - return shader; -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->ps_shader = (struct r600_pipe_shader *)state; -} - -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->vs_shader = (struct r600_pipe_shader *)state; -} - -static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->ps_shader == shader) { - rctx->ps_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->vs_shader == shader) { - rctx->vs_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void r600_init_state_functions2(struct r600_pipe_context *rctx) -{ - rctx->context.create_blend_state = r600_create_blend_state; - rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; - rctx->context.create_fs_state = r600_create_shader_state; - rctx->context.create_rasterizer_state = r600_create_rs_state; - rctx->context.create_sampler_state = r600_create_sampler_state; - rctx->context.create_sampler_view = r600_create_sampler_view; - rctx->context.create_vertex_elements_state = r600_create_vertex_elements; - rctx->context.create_vs_state = r600_create_shader_state; - rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; - rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; - rctx->context.bind_fs_state = r600_bind_ps_shader; - rctx->context.bind_rasterizer_state = r600_bind_rs_state; - rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; - rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; - rctx->context.bind_vs_state = r600_bind_vs_shader; - rctx->context.delete_blend_state = r600_delete_state; - rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.delete_fs_state = r600_delete_ps_shader; - rctx->context.delete_rasterizer_state = r600_delete_rs_state; - rctx->context.delete_sampler_state = r600_delete_state; - rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; - rctx->context.delete_vs_state = r600_delete_vs_shader; - rctx->context.set_blend_color = r600_set_blend_color; - rctx->context.set_clip_state = r600_set_clip_state; - rctx->context.set_constant_buffer = r600_set_constant_buffer; - rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; - rctx->context.set_framebuffer_state = r600_set_framebuffer_state; - rctx->context.set_polygon_stipple = r600_set_polygon_stipple; - rctx->context.set_sample_mask = r600_set_sample_mask; - rctx->context.set_scissor_state = r600_set_scissor_state; - rctx->context.set_stencil_ref = r600_set_stencil_ref; - rctx->context.set_vertex_buffers = r600_set_vertex_buffers; - rctx->context.set_index_buffer = r600_set_index_buffer; - rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; - rctx->context.set_viewport_state = r600_set_viewport_state; - rctx->context.sampler_view_destroy = r600_sampler_view_destroy; -} - -static void r600_init_config2(struct r600_pipe_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - enum radeon_family family; - struct r600_pipe_state *rstate = &rctx->config; - u32 tmp; - - family = r600_get_family(rctx->radeon); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - switch (family) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } - - rstate->id = R600_PIPE_STATE_CONFIG; - - /* SQ_CONFIG */ - tmp = 0; - switch (family) { - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV710: - break; - default: - tmp |= S_008C00_VC_ENABLE(1); - break; - } - tmp |= S_008C00_DX9_CONSTS(1); - tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1); - tmp |= S_008C00_PS_PRIO(ps_prio); - tmp |= S_008C00_VS_PRIO(vs_prio); - tmp |= S_008C00_GS_PRIO(gs_prio); - tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); - - /* SQ_GPR_RESOURCE_MGMT_1 */ - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - /* SQ_GPR_RESOURCE_MGMT_2 */ - tmp = 0; - tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - /* SQ_THREAD_RESOURCE_MGMT */ - tmp = 0; - tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); - - /* SQ_STACK_RESOURCE_MGMT_1 */ - tmp = 0; - tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - /* SQ_STACK_RESOURCE_MGMT_2 */ - tmp = 0; - tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); - - if (family >= CHIP_RV770) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00514000, 0xFFFFFFFF, NULL); - } else { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00004010, 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - return (struct pipe_query*)r600_context_query_create(&rctx->ctx, query_type); -} - -static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - r600_context_query_destroy(&rctx->ctx, (struct r600_query *)query); -} - -static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - rquery->result = 0; - rquery->num_results = 0; - r600_query_begin(&rctx->ctx, (struct r600_query *)query); -} - -static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - r600_query_end(&rctx->ctx, (struct r600_query *)query); -} - -static boolean r600_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, - boolean wait, void *vresult) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - if (rquery->num_results) { - ctx->flush(ctx, 0, NULL); - } - return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); -} - -static void r600_init_query_functions2(struct r600_pipe_context *rctx) -{ - rctx->context.create_query = r600_create_query; - rctx->context.destroy_query = r600_destroy_query; - rctx->context.begin_query = r600_begin_query; - rctx->context.end_query = r600_end_query; - rctx->context.get_query_result = r600_get_query_result; -} - -static void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) -{ - struct pipe_depth_stencil_alpha_state dsa; - struct r600_pipe_state *rstate; - boolean quirk = false; - - if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || - rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) - quirk = true; - - memset(&dsa, 0, sizeof(dsa)); - - if (quirk) { - dsa.depth.enabled = 1; - dsa.depth.func = PIPE_FUNC_LEQUAL; - dsa.stencil[0].enabled = 1; - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR; - dsa.stencil[0].writemask = 0xff; - } - - rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02880C_DB_SHADER_CONTROL, - 0x0, - S_02880C_DUAL_EXPORT_ENABLE(1), NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028D0C_DB_RENDER_CONTROL, - S_028D0C_DEPTH_COPY_ENABLE(1) | - S_028D0C_STENCIL_COPY_ENABLE(1) | - S_028D0C_COPY_CENTROID(1), - S_028D0C_DEPTH_COPY_ENABLE(1) | - S_028D0C_STENCIL_COPY_ENABLE(1) | - S_028D0C_COPY_CENTROID(1), NULL); - return rstate; -} - -static struct pipe_context *r600_create_context2(struct pipe_screen *screen, void *priv) -{ - struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); - struct r600_screen* rscreen = (struct r600_screen *)screen; - - if (rctx == NULL) - return NULL; - rctx->context.winsys = rscreen->screen.winsys; - rctx->context.screen = screen; - rctx->context.priv = priv; - rctx->context.destroy = r600_destroy_context; - rctx->context.flush = r600_flush2; - - /* Easy accessing of screen/winsys. */ - rctx->screen = rscreen; - rctx->radeon = rscreen->radeon; - rctx->family = r600_get_family(rctx->radeon); - - r600_init_blit_functions2(rctx); - r600_init_query_functions2(rctx); - r600_init_context_resource_functions2(rctx); - - switch (r600_get_family(rctx->radeon)) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rctx->context.draw_vbo = r600_draw_vbo2; - r600_init_state_functions2(rctx); - if (r600_context_init(&rctx->ctx, rctx->radeon)) { - r600_destroy_context(&rctx->context); - return NULL; - } - r600_init_config2(rctx); - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - rctx->context.draw_vbo = evergreen_draw; - evergreen_init_state_functions2(rctx); - if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { - r600_destroy_context(&rctx->context); - return NULL; - } - evergreen_init_config2(rctx); - break; - default: - R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); - r600_destroy_context(&rctx->context); - return NULL; - } - - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } - - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } - - rctx->blitter = util_blitter_create(&rctx->context); - if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - LIST_INITHEAD(&rctx->query_list); - rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); - - r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth2; - - return &rctx->context; -} - -static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) -{ - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - break; - case PIPE_SHADER_GEOMETRY: - /* TODO: support and enable geometry programs */ - return 0; - default: - /* TODO: support tessellation on Evergreen */ - return 0; - } - - /* TODO: all these should be fixed, since r600 surely supports much more! */ - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 8; /* FIXME */ - case PIPE_SHADER_CAP_MAX_INPUTS: - if(shader == PIPE_SHADER_FRAGMENT) - return 10; - else - return 16; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; //max native temporaries - case PIPE_SHADER_CAP_MAX_ADDRS: - return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ - case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; //max native parameters - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; /* FIXME */ - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; - default: - return 0; - } -} - -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ); -struct pipe_resource *r600_user_buffer_create2(struct pipe_screen *screen, - void *ptr, unsigned bytes, - unsigned bind) -{ - struct pipe_resource *resource; - struct r600_resource *rresource; - struct pipe_resource desc; - struct radeon *radeon = (struct radeon *)screen->winsys; - void *rptr; - - desc.screen = screen; - desc.target = PIPE_BUFFER; - desc.format = PIPE_FORMAT_R8_UNORM; - desc.usage = PIPE_USAGE_IMMUTABLE; - desc.bind = bind; - desc.width0 = bytes; - desc.height0 = 1; - desc.depth0 = 1; - desc.flags = 0; - resource = r600_buffer_create(screen, &desc); - if (resource == NULL) { - return NULL; - } - - rresource = (struct r600_resource *)resource; - rptr = radeon_ws_bo_map(radeon, rresource->bo, 0, NULL); - memcpy(rptr, ptr, bytes); - radeon_ws_bo_unmap(radeon, rresource->bo); - - return resource; -} - -void r600_init_screen_texture_functions(struct pipe_screen *screen); -struct pipe_screen *r600_screen_create2(struct radeon *radeon) -{ - struct r600_screen *rscreen; - - rscreen = CALLOC_STRUCT(r600_screen); - if (rscreen == NULL) { - return NULL; - } - - rscreen->radeon = radeon; - rscreen->screen.winsys = (struct pipe_winsys*)radeon; - rscreen->screen.destroy = r600_destroy_screen; - rscreen->screen.get_name = r600_get_name; - rscreen->screen.get_vendor = r600_get_vendor; - rscreen->screen.get_param = r600_get_param; - rscreen->screen.get_shader_param = r600_get_shader_param; - rscreen->screen.get_paramf = r600_get_paramf; - rscreen->screen.is_format_supported = r600_is_format_supported; - rscreen->screen.context_create = r600_create_context2; - r600_init_screen_texture_functions(&rscreen->screen); - r600_init_screen_resource_functions(&rscreen->screen); -// rscreen->screen.user_buffer_create = r600_user_buffer_create2; - - return &rscreen->screen; -} - -int r600_upload_index_buffer2(struct r600_pipe_context *rctx, struct r600_drawl *draw) -{ - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = draw->index_buffer_offset; - int ret = 0; - - if (r600_buffer_is_user_buffer(draw->index_buffer)) { - ret = u_upload_buffer(rctx->upload_ib, - index_offset, - draw->count * draw->index_size, - draw->index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - draw->index_buffer_offset = index_offset; - draw->index_buffer = upload_buffer; - } - -done: - return ret; -} - -int r600_upload_user_buffers2(struct r600_pipe_context *rctx) -{ - enum pipe_error ret = PIPE_OK; - int i, nr; - - nr = rctx->vertex_elements->count; - - for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index]; - - if (r600_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(rctx->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; - vb->buffer_offset = upload_offset; - } - } - return ret; -} diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 81ce1bb190..1c1978f8ab 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "r600d.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -302,6 +303,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_SWAP_STD; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -339,16 +347,19 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_0280A0_COLOR_16_16_16_16; + // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_0280A0_COLOR_16_16_16_16_FLOAT; + // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_0280A0_COLOR_32_32_32_32_FLOAT; + // return FMT_32_32_32_32_FLOAT; return 0; default: R600_ERR("unsupported colorswap format %d\n", format); @@ -383,6 +394,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_0280A0_COLOR_16; + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -420,6 +438,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_16_16_FLOAT; case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: return V_0280A0_COLOR_16_16; @@ -458,29 +477,6 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE void r600_translate_vertex_num_format(enum pipe_format format, uint32_t *num_format_p, - uint32_t *format_comp_p) -{ - uint32_t num_format = 0, format_comp = 0; - switch (format) { - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R32G32_SSCALED: - format_comp = 1; - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16_USCALED: - case PIPE_FORMAT_R32G32_USCALED: - num_format = V_038008_SQ_NUM_FORMAT_SCALED; - break; - default: - break; - } - *num_format_p = num_format; - *format_comp_p = format_comp; -} - static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) { return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; @@ -502,4 +498,139 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) return r600_translate_colorformat(format) != ~0; } +static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) +{ + uint32_t result = 0; + const struct util_format_description *desc; + unsigned i; + + desc = util_format_description(format); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16_FLOAT; + break; + case 2: + result = FMT_16_16_FLOAT; + break; + case 3: + result = FMT_16_16_16_FLOAT; + break; + case 4: + result = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32_FLOAT; + break; + case 2: + result = FMT_32_32_FLOAT; + break; + case 3: + result = FMT_32_32_32_FLOAT; + break; + case 4: + result = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + result = FMT_8; + break; + case 2: + result = FMT_8_8; + break; + case 3: + // result = FMT_8_8_8; /* fails piglit draw-vertices test */ + // break; + case 4: + result = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16; + break; + case 2: + result = FMT_16_16; + break; + case 3: + // result = FMT_16_16_16; /* fails piglit draw-vertices test */ + // break; + case 4: + result = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + break; + case 2: + result = FMT_32_32; + break; + case 3: + result = FMT_32_32_32; + break; + case 4: + result = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + result = S_038008_DATA_FORMAT(result); + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= S_038008_FORMAT_COMP_ALL(1); + } + if (desc->channel[i].normalized) { + result |= S_038008_NUM_FORMAT_ALL(0); + } else { + result |= S_038008_NUM_FORMAT_ALL(2); + } + return result; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(format)); + return ~0; +} + #endif diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index c24aaeefa7..95906a74eb 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,11 +31,11 @@ #include <util/u_inlines.h> #include <util/u_memory.h> #include "state_tracker/drm_driver.h" -#include "r600_screen.h" -#include "r600_context.h" +#include "r600_pipe.h" #include "r600_resource.h" #include "r600_state_inlines.h" #include "r600d.h" +#include "r600_formats.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -54,11 +54,30 @@ static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_t transfer->box.width, transfer->box.height); } -static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, + +/* Copy from a detiled texture to a tiled one. */ +static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *texture = transfer->resource; + struct pipe_subresource subsrc; + + subsrc.face = 0; + subsrc.level = 0; + ctx->resource_copy_region(ctx, texture, transfer->sr, + transfer->box.x, transfer->box.y, transfer->box.z, + rtransfer->linear_texture, subsrc, + 0, 0, 0, + transfer->box.width, transfer->box.height); + + ctx->flush(ctx, 0, NULL); +} + +static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned zslice, unsigned face) { - unsigned long offset = rtex->offset[level]; + unsigned offset = rtex->offset[level]; switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: @@ -73,22 +92,63 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_class chipc) +static unsigned r600_texture_get_stride(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned width, stride; + + if (rtex->pitch_override) + return rtex->pitch_override; + + width = u_minify(ptex->width0, level); + + stride = util_format_get_stride(ptex->format, align(width, 64)); + if (chipc == EVERGREEN) + stride = align(stride, 512); + else + stride = align(stride, 256); + return stride; +} + +static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + unsigned height; + + height = u_minify(ptex->height0, level); + height = util_next_power_of_two(height); + return util_format_get_nblocksy(ptex->format, height); +} + +/* Get a width in pixels from a stride in bytes. */ +static unsigned pitch_to_width(enum pipe_format format, + unsigned pitch_in_bytes) +{ + return (pitch_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + +static void r600_setup_miptree(struct pipe_screen *screen, + struct r600_resource_texture *rtex) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned long w, h, pitch, size, layer_size, i, offset; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned pitch, size, layer_size, i, offset; + unsigned nblocksy; - rtex->bpt = util_format_get_blocksize(ptex->format); for (i = 0, offset = 0; i <= ptex->last_level; i++) { - w = u_minify(ptex->width0, i); - h = u_minify(ptex->height0, i); - h = util_next_power_of_two(h); - pitch = util_format_get_stride(ptex->format, align(w, 64)); - if (chipc == EVERGREEN) - pitch = align(pitch, 512); - else - pitch = align(pitch, 256); - layer_size = pitch * h; + pitch = r600_texture_get_stride(screen, rtex, i); + nblocksy = r600_texture_get_nblocksy(screen, rtex, i); + + layer_size = pitch * nblocksy; + if (ptex->target == PIPE_TEXTURE_CUBE) { if (chipc >= R700) size = layer_size * 8; @@ -99,41 +159,63 @@ static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_cla size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch[i] = pitch; - rtex->width[i] = w; - rtex->height[i] = h; + rtex->pitch_in_bytes[i] = pitch; + rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); offset += size; } rtex->size = offset; } -struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) +static struct r600_resource_texture * +r600_texture_create_object(struct pipe_screen *screen, + const struct pipe_resource *base, + unsigned array_mode, + unsigned pitch_in_bytes_override, + unsigned max_buffer_size, + struct r600_bo *bo) { struct r600_resource_texture *rtex; struct r600_resource *resource; struct radeon *radeon = (struct radeon *)screen->winsys; rtex = CALLOC_STRUCT(r600_resource_texture); - if (!rtex) { + if (rtex == NULL) return NULL; - } + resource = &rtex->resource; - resource->base.b = *templ; + resource->base.b = *base; resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; - r600_setup_miptree(rtex, radeon_get_family_class(radeon)); - - /* FIXME alignment 4096 enought ? too much ? */ + resource->bo = bo; resource->domain = r600_domain_from_usage(resource->base.b.bind); + rtex->pitch_override = pitch_in_bytes_override; + rtex->array_mode = array_mode; + + if (array_mode) + rtex->tiled = 1; + r600_setup_miptree(screen, rtex); + resource->size = rtex->size; - resource->bo = radeon_ws_bo(radeon, rtex->size, 4096, 0); - if (resource->bo == NULL) { - FREE(rtex); - return NULL; + + if (!resource->bo) { + resource->bo = r600_bo(radeon, rtex->size, 4096, 0); + if (!resource->bo) { + FREE(rtex); + return NULL; + } } - return &resource->base.b; + return rtex; +} + +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + unsigned array_mode = 0; + + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, + 0, 0, NULL); + } static void r600_texture_destroy(struct pipe_screen *screen, @@ -147,7 +229,7 @@ static void r600_texture_destroy(struct pipe_screen *screen, pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); if (resource->bo) { - radeon_ws_bo_reference(radeon, &resource->bo, NULL); + r600_bo_reference(radeon, &resource->bo, NULL); } FREE(rtex); } @@ -159,7 +241,7 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - unsigned long offset; + unsigned offset; if (surface == NULL) return NULL; @@ -184,46 +266,29 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface) FREE(surface); } + struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_resource_texture *rtex; - struct r600_resource *resource; - struct radeon_ws_bo *bo = NULL; + struct r600_bo *bo = NULL; + unsigned array_mode = 0; /* Support only 2D textures without mipmaps */ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || templ->depth0 != 1 || templ->last_level != 0) return NULL; - rtex = CALLOC_STRUCT(r600_resource_texture); - if (rtex == NULL) - return NULL; - - bo = radeon_ws_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle, &array_mode); if (bo == NULL) { - FREE(rtex); return NULL; } - resource = &rtex->resource; - resource->base.b = *templ; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; - resource->bo = bo; - rtex->depth = 0; - rtex->pitch_override = whandle->stride; - rtex->bpt = util_format_get_blocksize(templ->format); - rtex->pitch[0] = whandle->stride; - rtex->width[0] = templ->width0; - rtex->height[0] = templ->height0; - rtex->offset[0] = 0; - rtex->size = align(rtex->pitch[0] * templ->height0, 64); - - return &resource->base.b; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, + whandle->stride, + 0, + bo); } static unsigned int r600_texture_is_referenced(struct pipe_context *context, @@ -249,14 +314,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.format = texture->format; resource.width0 = texture->width0; resource.height0 = texture->height0; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; resource.flags = 0; - resource.bind |= PIPE_BIND_RENDER_TARGET; + resource.bind |= PIPE_BIND_DEPTH_STENCIL; rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource); if (rtex->flushed_depth_texture == NULL) { @@ -287,8 +352,6 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->pitch[sr.level]; - trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); if (rtex->depth) { r = r600_texture_depth_flush(ctx, texture); if (r < 0) { @@ -302,7 +365,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; @@ -326,6 +389,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, FREE(trans); return NULL; } + + trans->transfer.stride = + ((struct r600_resource_texture *)trans->linear_texture)->pitch_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ @@ -333,7 +399,10 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, /* Always referenced in the blit. */ ctx->flush(ctx, 0, NULL); } + return &trans->transfer; } + trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; + trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); return &trans->transfer; } @@ -344,12 +413,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; if (rtransfer->linear_texture) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { + r600_copy_into_tiled_texture(ctx, rtransfer); + } pipe_resource_reference(&rtransfer->linear_texture, NULL); } if (rtex->flushed_depth_texture) { - if (transfer->usage & PIPE_TRANSFER_WRITE) { - // TODO - } pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); } pipe_resource_reference(&transfer->resource, NULL); @@ -360,10 +429,10 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct radeon_ws_bo *bo; + struct r600_bo *bo; enum pipe_format format = transfer->resource->format; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - unsigned long offset = 0; + unsigned offset = 0; char *map; if (rtransfer->linear_texture) { @@ -380,7 +449,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, transfer->box.y / util_format_get_blockheight(format) * transfer->stride + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - map = radeon_ws_bo_map(radeon, bo, 0, ctx); + map = r600_bo_map(radeon, bo, 0, ctx); if (!map) { return NULL; } @@ -393,7 +462,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - struct radeon_ws_bo *bo; + struct r600_bo *bo; if (rtransfer->linear_texture) { bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; @@ -406,7 +475,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, bo = ((struct r600_resource *)transfer->resource)->bo; } } - radeon_ws_bo_unmap(radeon, bo); + r600_bo_unmap(radeon, bo); } struct u_resource_vtbl r600_texture_vtbl = @@ -501,15 +570,23 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_ZS: switch (format) { case PIPE_FORMAT_Z16_UNORM: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; + case PIPE_FORMAT_X24S8_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - result = V_0280A0_COLOR_8_24; + result = FMT_8_24; goto out_word4; + case PIPE_FORMAT_S8X24_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - result = V_0280A0_COLOR_24_8; + result = FMT_24_8; + goto out_word4; + case PIPE_FORMAT_S8_USCALED: + result = V_0280A0_COLOR_8; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; default: goto out_unknown; @@ -563,7 +640,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && desc->channel[2].size == 5) { - result = V_0280A0_COLOR_5_6_5; + result = FMT_5_6_5; goto out_word4; } goto out_unknown; @@ -572,14 +649,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 5 && desc->channel[2].size == 5 && desc->channel[3].size == 1) { - result = V_0280A0_COLOR_1_5_5_5; + result = FMT_1_5_5_5; goto out_word4; } if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = V_0280A0_COLOR_10_10_10_2; + result = FMT_10_10_10_2; goto out_word4; } goto out_unknown; @@ -587,79 +664,89 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_unknown; } + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + goto out_unknown; + /* uniform formats */ - switch (desc->channel[0].type) { + switch (desc->channel[i].type) { case UTIL_FORMAT_TYPE_UNSIGNED: case UTIL_FORMAT_TYPE_SIGNED: - if (!desc->channel[0].normalized && + if (!desc->channel[i].normalized && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { goto out_unknown; } - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 4: switch (desc->nr_channels) { case 2: - result = V_0280A0_COLOR_4_4; + result = FMT_4_4; goto out_word4; case 4: - result = V_0280A0_COLOR_4_4_4_4; + result = FMT_4_4_4_4; goto out_word4; } goto out_unknown; case 8: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_8; + result = FMT_8; goto out_word4; case 2: - result = V_0280A0_COLOR_8_8; + result = FMT_8_8; goto out_word4; case 4: - result = V_0280A0_COLOR_8_8_8_8; + result = FMT_8_8_8_8; goto out_word4; } goto out_unknown; case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16; + result = FMT_16_16; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16; + result = FMT_16_16_16_16; goto out_word4; } } goto out_unknown; case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16_FLOAT; + result = FMT_16_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16_FLOAT; + result = FMT_16_16_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; goto out_word4; } goto out_unknown; case 32: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_32_FLOAT; + result = FMT_32_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_32_32_FLOAT; + result = FMT_32_32_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; goto out_word4; } } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 47ab1eb965..a3cb5b8600 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -667,6 +667,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_REF_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -901,6 +904,10 @@ #define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) #define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) #define C_038000_TILE_MODE 0xFFFFFF87 +#define V_038000_ARRAY_LINEAR_GENERAL 0x00000000 +#define V_038000_ARRAY_LINEAR_ALIGNED 0x00000001 +#define V_038000_ARRAY_1D_TILED_THIN1 0x00000002 +#define V_038000_ARRAY_2D_TILED_THIN1 0x00000004 #define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) #define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) #define C_038000_TILE_TYPE 0xFFFFFF7F @@ -1025,40 +1032,7 @@ #define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_038008_DATA_FORMAT 0xFC0FFFFF -#define V_038008_COLOR_INVALID 0x00000000 -#define V_038008_COLOR_8 0x00000001 -#define V_038008_COLOR_4_4 0x00000002 -#define V_038008_COLOR_3_3_2 0x00000003 -#define V_038008_COLOR_16 0x00000005 -#define V_038008_COLOR_16_FLOAT 0x00000006 -#define V_038008_COLOR_8_8 0x00000007 -#define V_038008_COLOR_5_6_5 0x00000008 -#define V_038008_COLOR_6_5_5 0x00000009 -#define V_038008_COLOR_1_5_5_5 0x0000000A -#define V_038008_COLOR_4_4_4_4 0x0000000B -#define V_038008_COLOR_5_5_5_1 0x0000000C -#define V_038008_COLOR_32 0x0000000D -#define V_038008_COLOR_32_FLOAT 0x0000000E -#define V_038008_COLOR_16_16 0x0000000F -#define V_038008_COLOR_16_16_FLOAT 0x00000010 -#define V_038008_COLOR_8_24 0x00000011 -#define V_038008_COLOR_8_24_FLOAT 0x00000012 -#define V_038008_COLOR_24_8 0x00000013 -#define V_038008_COLOR_24_8_FLOAT 0x00000014 -#define V_038008_COLOR_10_11_11 0x00000015 -#define V_038008_COLOR_10_11_11_FLOAT 0x00000016 -#define V_038008_COLOR_11_11_10 0x00000017 -#define V_038008_COLOR_11_11_10_FLOAT 0x00000018 -#define V_038008_COLOR_2_10_10_10 0x00000019 -#define V_038008_COLOR_8_8_8_8 0x0000001A -#define V_038008_COLOR_10_10_10_2 0x0000001B -#define V_038008_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_038008_COLOR_32_32 0x0000001D -#define V_038008_COLOR_32_32_FLOAT 0x0000001E -#define V_038008_COLOR_16_16_16_16 0x0000001F -#define V_038008_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_038008_COLOR_32_32_32_32 0x00000022 -#define V_038008_COLOR_32_32_32_32_FLOAT 0x00000023 + #define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF @@ -3484,6 +3458,16 @@ #define R_038014_RESOURCE0_WORD5 0x038014 #define R_038018_RESOURCE0_WORD6 0x038018 +#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 +#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 +#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 +#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 + +#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 +#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 + +#define R_03E200_SQ_LOOP_CONST_0 0x3E200 + #define SQ_TEX_INST_LD 0x03 #define SQ_TEX_INST_GET_GRADIENTS_H 0x7 #define SQ_TEX_INST_GET_GRADIENTS_V 0x8 diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 9c731f2dbb..892dee86ba 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -20,12 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_context.h" -#include "r600_asm.h" +#include <stdio.h> #include "util/u_memory.h" +#include "r600_pipe.h" +#include "r600_asm.h" #include "r700_sq.h" -#include <stdio.h> int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h deleted file mode 100644 index a7e7982c19..0000000000 --- a/src/gallium/drivers/r600/radeon.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright © 2009 Jerome Glisse <glisse@freedesktop.org> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#ifndef RADEON_H -#define RADEON_H - -#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) - -#include <stdint.h> - -#include <pipe/p_compiler.h> - -typedef uint64_t u64; -typedef uint32_t u32; -typedef uint16_t u16; -typedef uint8_t u8; - -struct radeon; - -enum radeon_family { - CHIP_UNKNOWN, - CHIP_R100, - CHIP_RV100, - CHIP_RS100, - CHIP_RV200, - CHIP_RS200, - CHIP_R200, - CHIP_RV250, - CHIP_RS300, - CHIP_RV280, - CHIP_R300, - CHIP_R350, - CHIP_RV350, - CHIP_RV380, - CHIP_R420, - CHIP_R423, - CHIP_RV410, - CHIP_RS400, - CHIP_RS480, - CHIP_RS600, - CHIP_RS690, - CHIP_RS740, - CHIP_RV515, - CHIP_R520, - CHIP_RV530, - CHIP_RV560, - CHIP_RV570, - CHIP_R580, - CHIP_R600, - CHIP_RV610, - CHIP_RV630, - CHIP_RV670, - CHIP_RV620, - CHIP_RV635, - CHIP_RS780, - CHIP_RS880, - CHIP_RV770, - CHIP_RV730, - CHIP_RV710, - CHIP_RV740, - CHIP_CEDAR, - CHIP_REDWOOD, - CHIP_JUNIPER, - CHIP_CYPRESS, - CHIP_HEMLOCK, - CHIP_LAST, -}; - -enum chip_class { - R600, - R700, - EVERGREEN, -}; - -enum { - R600_SHADER_PS = 1, - R600_SHADER_VS, - R600_SHADER_GS, - R600_SHADER_FS, - R600_SHADER_MAX = R600_SHADER_FS, -}; - -enum radeon_family radeon_get_family(struct radeon *rw); -enum chip_class radeon_get_family_class(struct radeon *radeon); -void radeon_set_mem_constant(struct radeon *radeon, boolean state); - -/* lowlevel WS bo */ -struct radeon_ws_bo; -struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, - unsigned size, unsigned alignment, unsigned usage); -struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon, - unsigned handle); -void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx); -void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); -void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, - struct radeon_ws_bo *src); - -struct radeon_stype_info; - -/* currently limited to max buffers in a cb flush */ -#define RADEON_STATE_MAX_BO 8 -/* - * states functions - */ -struct radeon_state { - struct radeon *radeon; - unsigned refcount; - struct radeon_stype_info *stype; - unsigned state_id; - unsigned id; - unsigned shader_index; - unsigned nstates; - u32 states[64]; - unsigned npm4; - unsigned cpm4; - u32 pm4_crc; - u32 pm4[128]; - unsigned nbo; - struct radeon_ws_bo *bo[RADEON_STATE_MAX_BO]; - unsigned nreloc; - unsigned reloc_pm4_id[8]; - unsigned reloc_bo_id[8]; - u32 placement[8]; - unsigned bo_dirty[4]; -}; - -int radeon_state_init(struct radeon_state *rstate, struct radeon *radeon, u32 type, u32 id, u32 shader_class); -void radeon_state_fini(struct radeon_state *state); -int radeon_state_pm4(struct radeon_state *state); -int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type); - -/* - * draw functions - */ -struct radeon_draw { - struct radeon *radeon; - struct radeon_state **state; -}; - -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); -void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state); -void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state); - -/* - * radeon context functions - */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx; - -struct radeon_ctx *radeon_ctx_init(struct radeon *radeon); -void radeon_ctx_fini(struct radeon_ctx *ctx); -void radeon_ctx_clear(struct radeon_ctx *ctx); -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); -int radeon_ctx_submit(struct radeon_ctx *ctx); -void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); -int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state); - -/* - * R600/R700 - */ - -enum r600_stype { - R600_STATE_CONFIG, - R600_STATE_CB_CNTL, - R600_STATE_RASTERIZER, - R600_STATE_VIEWPORT, - R600_STATE_SCISSOR, - R600_STATE_BLEND, - R600_STATE_DSA, - R600_STATE_SHADER, /* has PS,VS,GS,FS variants */ - R600_STATE_CONSTANT, /* has PS,VS,GS,FS variants */ - R600_STATE_CBUF, /* has PS,VS,GS,FS variants */ - R600_STATE_RESOURCE, /* has PS,VS,GS,FS variants */ - R600_STATE_SAMPLER, /* has PS,VS,GS,FS variants */ - R600_STATE_SAMPLER_BORDER, /* has PS,VS,GS,FS variants */ - R600_STATE_CB0, - R600_STATE_CB1, - R600_STATE_CB2, - R600_STATE_CB3, - R600_STATE_CB4, - R600_STATE_CB5, - R600_STATE_CB6, - R600_STATE_CB7, - R600_STATE_DB, - R600_STATE_QUERY_BEGIN, - R600_STATE_QUERY_END, - R600_STATE_UCP, - R600_STATE_VGT, - R600_STATE_DRAW, - R600_STATE_CB_FLUSH, - R600_STATE_DB_FLUSH, - R600_STATE_MAX, -}; - -#include "r600_states_inc.h" -#include "eg_states_inc.h" - -/* R600 QUERY BEGIN/END */ -#define R600_QUERY__OFFSET 0 -#define R600_QUERY_SIZE 1 -#define R600_QUERY_PM4 128 - -#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 67e2c8f8bc..346e1b402b 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -158,9 +158,17 @@ exec_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { + + for (j = 0; j < 4; j++) quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; - } + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + + for (j = 0; j < 4; j++) + quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index daa158df7c..5b18cd035e 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -169,9 +169,15 @@ fs_sse_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; - } + for (j = 0; j < 4; j++) + quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + for (j = 0; j < 4; j++) + quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index a3236bd116..e745aa8061 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -85,6 +85,7 @@ struct quad_header_output /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; + uint8_t stencil[QUAD_SIZE]; }; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index e9b9262617..c8f5f89568 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -47,6 +47,8 @@ struct depth_data { unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ ubyte stencilVals[QUAD_SIZE]; + boolean use_shader_stencil_refs; + ubyte shader_stencil_refs[QUAD_SIZE]; struct softpipe_cached_tile *tile; }; @@ -186,6 +188,33 @@ convert_quad_depth( struct depth_data *data, } +/** + * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values. + */ +static void +convert_quad_stencil( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + data->use_shader_stencil_refs = TRUE; + /* Copy quads stencil values + */ + switch (data->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + { + for (j = 0; j < QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); + } + } + break; + default: + assert(0); + } +} /** * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. @@ -272,8 +301,14 @@ do_stencil_test(struct depth_data *data, { unsigned passMask = 0x0; unsigned j; + ubyte refs[QUAD_SIZE]; - ref &= valMask; + for (j = 0; j < QUAD_SIZE; j++) { + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j] & valMask; + else + refs[j] = ref & valMask; + } switch (func) { case PIPE_FUNC_NEVER: @@ -281,42 +316,42 @@ do_stencil_test(struct depth_data *data, break; case PIPE_FUNC_LESS: for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (data->stencilVals[j] & valMask)) { + if (refs[j] < (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (data->stencilVals[j] & valMask)) { + if (refs[j] == (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (data->stencilVals[j] & valMask)) { + if (refs[j] <= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (data->stencilVals[j] & valMask)) { + if (refs[j] > (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (data->stencilVals[j] & valMask)) { + if (refs[j] != (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (data->stencilVals[j] & valMask)) { + if (refs[j] >= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } @@ -348,9 +383,14 @@ apply_stencil_op(struct depth_data *data, { unsigned j; ubyte newstencil[QUAD_SIZE]; + ubyte refs[QUAD_SIZE]; for (j = 0; j < QUAD_SIZE; j++) { newstencil[j] = data->stencilVals[j]; + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j]; + else + refs[j] = ref; } switch (op) { @@ -367,7 +407,7 @@ apply_stencil_op(struct depth_data *data, case PIPE_STENCIL_OP_REPLACE: for (j = 0; j < QUAD_SIZE; j++) { if (mask & (1 << j)) { - newstencil[j] = ref; + newstencil[j] = refs[j]; } } break; @@ -688,8 +728,10 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned i, pass = 0; const struct sp_fragment_shader *fs = qs->softpipe->fs; boolean interp_depth = !fs->info.writes_z; + boolean shader_stencil_ref = fs->info.writes_stencil; struct depth_data data; + data.use_shader_stencil_refs = FALSE; if (qs->softpipe->depth_stencil->alpha.enabled) { nr = alpha_test_quads(qs, quads, nr); @@ -716,6 +758,9 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (shader_stencil_ref) + convert_quad_stencil(&data, quads[i]); + depth_stencil_test_quad(qs, &data, quads[i]); write_depth_stencil_values(&data, quads[i]); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 43b8e88e33..2cfd02a22c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -47,7 +47,8 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; + !sp->fs->info.writes_z && + !sp->fs->info.writes_stencil; sp->quad.first = sp->quad.blend; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 2053d02f62..37557d1194 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -114,6 +114,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index aedb5bb19b..b59fbc33ed 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -33,7 +33,6 @@ #include "util/u_inlines.h" #include "draw/draw_context.h" -#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 271cd4aff5..04f30f82c3 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -92,15 +92,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "draw_vbo"); trace_dump_arg(ptr, pipe); - trace_dump_arg(bool, info->indexed); - trace_dump_arg(uint, info->mode); - trace_dump_arg(uint, info->start); - trace_dump_arg(uint, info->count); - trace_dump_arg(uint, info->start_instance); - trace_dump_arg(uint, info->instance_count); - trace_dump_arg(int, info->index_bias); - trace_dump_arg(uint, info->min_index); - trace_dump_arg(uint, info->max_index); + trace_dump_arg(draw_info, info); pipe->draw_vbo(pipe, info); @@ -987,24 +979,24 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, static INLINE void trace_context_set_index_buffer(struct pipe_context *_pipe, - const struct pipe_index_buffer *_ib) + const struct pipe_index_buffer *ib) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_index_buffer unwrapped_ib, *ib = NULL; - - if (_ib) { - unwrapped_ib = *_ib; - unwrapped_ib.buffer = trace_resource_unwrap(tr_ctx, _ib->buffer); - ib = &unwrapped_ib; - } trace_dump_call_begin("pipe_context", "set_index_buffer"); trace_dump_arg(ptr, pipe); - trace_dump_arg(index_buffer, _ib); + trace_dump_arg(index_buffer, ib); - pipe->set_index_buffer(pipe, ib); + if (ib) { + struct pipe_index_buffer _ib; + _ib = *ib; + _ib.buffer = trace_resource_unwrap(tr_ctx, ib->buffer); + pipe->set_index_buffer(pipe, &_ib); + } else { + pipe->set_index_buffer(pipe, NULL); + } trace_dump_call_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index bd9a9bfaf1..8f81606032 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -573,3 +573,32 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state) trace_dump_struct_end(); } + + +void trace_dump_draw_info(const struct pipe_draw_info *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_draw_info"); + + trace_dump_member(bool, state, indexed); + + trace_dump_member(uint, state, mode); + trace_dump_member(uint, state, start); + trace_dump_member(uint, state, count); + + trace_dump_member(uint, state, start_instance); + trace_dump_member(uint, state, instance_count); + + trace_dump_member(int, state, index_bias); + trace_dump_member(uint, state, min_index); + trace_dump_member(uint, state, max_index); + + trace_dump_struct_end(); +} diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 2e70f4e1c7..078d208610 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -79,5 +79,7 @@ void trace_dump_index_buffer(const struct pipe_index_buffer *state); void trace_dump_vertex_element(const struct pipe_vertex_element *state); +void trace_dump_draw_info(const struct pipe_draw_info *state); + #endif /* TR_STATE_H */ |