diff options
| author | Michal Krol <michal@vmware.com> | 2009-09-17 12:44:24 +0200 | 
|---|---|---|
| committer | Michal Krol <michal@vmware.com> | 2009-09-17 12:44:24 +0200 | 
| commit | 2a661c383fee65bc4413541e706925fa3e9b9cf5 (patch) | |
| tree | dbea0c9d65d17b81720fe2f161604dfc91eb0546 /src/gallium/drivers | |
| parent | 90daefd1c474a6e0502df5053b581987c12b8673 (diff) | |
| parent | 21caa29fbd332a2ee05a58df91e1664fbbc4e61f (diff) | |
Merge commit 'origin/master' into glsl-pp-rework-2
Conflicts:
	src/gallium/winsys/gdi/SConscript
Diffstat (limited to 'src/gallium/drivers')
58 files changed, 1254 insertions, 836 deletions
| diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index bd48ce7005..9161747fdb 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -64,8 +64,6 @@ cell_get_param(struct pipe_screen *screen, int param)        return 1;     case PIPE_CAP_GLSL:        return 1; -   case PIPE_CAP_S3TC: -      return 0;     case PIPE_CAP_ANISOTROPIC_FILTER:        return 0;     case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 9f017a14cc..a1dd43c1bc 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -101,8 +101,6 @@ i915_get_param(struct pipe_screen *screen, int param)        return 1;     case PIPE_CAP_GLSL:        return 0; -   case PIPE_CAP_S3TC: -      return 0;     case PIPE_CAP_ANISOTROPIC_FILTER:        return 0;     case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index b22e105f10..fb68fd624b 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -85,8 +85,6 @@ brw_get_param(struct pipe_screen *screen, int param)        return 1;     case PIPE_CAP_GLSL:        return 0; -   case PIPE_CAP_S3TC: -      return 0;     case PIPE_CAP_ANISOTROPIC_FILTER:        return 0;     case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 06c586e6bb..cd7b6356d2 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,6 +3,8 @@ include $(TOP)/configs/current  LIBNAME = llvmpipe +CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +  C_SOURCES = \  	lp_bld_alpha.c \  	lp_bld_arit.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index dea4b703c4..f4a9a3b22e 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*')  env = env.Clone()  env.Tool('llvm') -if env.has_key('LLVM_VERSION') is False: +if not env.has_key('LLVM_VERSION'):      print 'warning: LLVM not found: not building llvmpipe'      Return() diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index 49c2f911af..2b4bc5c819 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -45,7 +45,7 @@  void  lp_build_alpha_test(LLVMBuilderRef builder,                      const struct pipe_alpha_state *state, -                    union lp_type type, +                    struct lp_type type,                      struct lp_build_mask_context *mask,                      LLVMValueRef alpha,                      LLVMValueRef ref) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 9dbcdb4daa..634575670d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -38,14 +38,14 @@  #include <llvm-c/Core.h>    struct pipe_alpha_state; -union lp_type; +struct lp_type;  struct lp_build_mask_context;  void  lp_build_alpha_test(LLVMBuilderRef builder,                      const struct pipe_alpha_state *state, -                    union lp_type type, +                    struct lp_type type,                      struct lp_build_mask_context *mask,                      LLVMValueRef alpha,                      LLVMValueRef ref); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index ce3e5f91c0..0b115fc9b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld,                      LLVMValueRef a,                      LLVMValueRef b)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     const char *intrinsic = NULL;     LLVMValueRef cond; @@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld,                      LLVMValueRef a,                      LLVMValueRef b)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     const char *intrinsic = NULL;     LLVMValueRef cond; @@ -159,7 +159,7 @@ LLVMValueRef  lp_build_comp(struct lp_build_context *bld,                LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     if(a == bld->one)        return bld->zero; @@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld,               LLVMValueRef a,               LLVMValueRef b)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMValueRef res;     if(a == bld->zero) @@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld,               LLVMValueRef a,               LLVMValueRef b)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMValueRef res;     if(b == bld->zero) @@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld,               LLVMValueRef a,               LLVMValueRef b)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     if(a == bld->zero)        return bld->zero; @@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld,               LLVMValueRef a,               LLVMValueRef b)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     if(a == bld->zero)        return bld->zero; @@ -590,7 +590,7 @@ LLVMValueRef  lp_build_abs(struct lp_build_context *bld,               LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     if(!type.sign) @@ -627,7 +627,7 @@ LLVMValueRef  lp_build_sgn(struct lp_build_context *bld,               LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     LLVMValueRef cond;     LLVMValueRef res; @@ -678,7 +678,7 @@ lp_build_round_sse41(struct lp_build_context *bld,                       LLVMValueRef a,                       enum lp_build_round_sse41_mode mode)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     const char *intrinsic; @@ -706,7 +706,7 @@ LLVMValueRef  lp_build_round(struct lp_build_context *bld,                 LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     assert(type.floating); @@ -724,7 +724,7 @@ LLVMValueRef  lp_build_floor(struct lp_build_context *bld,                 LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     assert(type.floating); @@ -742,7 +742,7 @@ LLVMValueRef  lp_build_ceil(struct lp_build_context *bld,                LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     assert(type.floating); @@ -760,7 +760,7 @@ LLVMValueRef  lp_build_trunc(struct lp_build_context *bld,                 LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     assert(type.floating); @@ -782,7 +782,7 @@ LLVMValueRef  lp_build_int(struct lp_build_context *bld,               LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);     assert(type.floating); @@ -805,7 +805,7 @@ LLVMValueRef  lp_build_sqrt(struct lp_build_context *bld,                LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     char intrinsic[32]; @@ -823,7 +823,7 @@ LLVMValueRef  lp_build_rcp(struct lp_build_context *bld,               LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     if(a == bld->zero)        return bld->undef; @@ -854,7 +854,7 @@ LLVMValueRef  lp_build_rsqrt(struct lp_build_context *bld,                 LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     assert(type.floating); @@ -875,7 +875,7 @@ LLVMValueRef  lp_build_cos(struct lp_build_context *bld,                LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     char intrinsic[32]; @@ -895,7 +895,7 @@ LLVMValueRef  lp_build_sin(struct lp_build_context *bld,                LLVMValueRef a)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     char intrinsic[32]; @@ -966,7 +966,7 @@ lp_build_polynomial(struct lp_build_context *bld,                      const double *coeffs,                      unsigned num_coeffs)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMValueRef res = NULL;     unsigned i; @@ -1014,7 +1014,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,                       LLVMValueRef *p_frac_part,                       LLVMValueRef *p_exp2)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);     LLVMValueRef ipart = NULL; @@ -1107,7 +1107,7 @@ lp_build_log2_approx(struct lp_build_context *bld,                       LLVMValueRef *p_floor_log2,                       LLVMValueRef *p_log2)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 5e083b847f..d68a97c4b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@  #include <llvm-c/Core.h>   -union lp_type type; +struct lp_type type;  struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index d19e18846c..da272e549f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -46,7 +46,7 @@  struct pipe_blend_state; -union lp_type; +struct lp_type;  struct lp_build_context; @@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld,  LLVMValueRef  lp_build_blend_aos(LLVMBuilderRef builder,                     const struct pipe_blend_state *blend, -                   union lp_type type, +                   struct lp_type type,                     LLVMValueRef src,                     LLVMValueRef dst,                     LLVMValueRef const_, @@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder,  void  lp_build_blend_soa(LLVMBuilderRef builder,                     const struct pipe_blend_state *blend, -                   union lp_type type, +                   struct lp_type type,                     LLVMValueRef src[4],                     LLVMValueRef dst[4],                     LLVMValueRef const_[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index c11a9398f8..d14f468ba9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld,  LLVMValueRef  lp_build_blend_aos(LLVMBuilderRef builder,                     const struct pipe_blend_state *blend, -                   union lp_type type, +                   struct lp_type type,                     LLVMValueRef src,                     LLVMValueRef dst,                     LLVMValueRef const_, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b92254a7d6..9511299d55 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,  void  lp_build_blend_soa(LLVMBuilderRef builder,                     const struct pipe_blend_state *blend, -                   union lp_type type, +                   struct lp_type type,                     LLVMValueRef src[4],                     LLVMValueRef dst[4],                     LLVMValueRef con[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c index 21487365ea..c8eaa8c394 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c @@ -42,7 +42,7 @@  unsigned -lp_mantissa(union lp_type type) +lp_mantissa(struct lp_type type)  {     assert(type.floating); @@ -72,7 +72,7 @@ lp_mantissa(union lp_type type)   * Same as lp_const_scale(), but in terms of shifts.   */  unsigned -lp_const_shift(union lp_type type) +lp_const_shift(struct lp_type type)  {     if(type.floating)        return 0; @@ -86,7 +86,7 @@ lp_const_shift(union lp_type type)  unsigned -lp_const_offset(union lp_type type) +lp_const_offset(struct lp_type type)  {     if(type.floating || type.fixed)        return 0; @@ -104,7 +104,7 @@ lp_const_offset(union lp_type type)   * else for the fixed points types and normalized integers.   */  double -lp_const_scale(union lp_type type) +lp_const_scale(struct lp_type type)  {     unsigned long long llscale;     double dscale; @@ -122,7 +122,7 @@ lp_const_scale(union lp_type type)   * Minimum value representable by the type.   */  double -lp_const_min(union lp_type type) +lp_const_min(struct lp_type type)  {     unsigned bits; @@ -158,7 +158,7 @@ lp_const_min(union lp_type type)   * Maximum value representable by the type.   */  double -lp_const_max(union lp_type type) +lp_const_max(struct lp_type type)  {     unsigned bits; @@ -190,7 +190,7 @@ lp_const_max(union lp_type type)  double -lp_const_eps(union lp_type type) +lp_const_eps(struct lp_type type)  {     if (type.floating) {        switch(type.width) { @@ -211,7 +211,7 @@ lp_const_eps(union lp_type type)  LLVMValueRef -lp_build_undef(union lp_type type) +lp_build_undef(struct lp_type type)  {     LLVMTypeRef vec_type = lp_build_vec_type(type);     return LLVMGetUndef(vec_type); @@ -219,7 +219,7 @@ lp_build_undef(union lp_type type)  LLVMValueRef -lp_build_zero(union lp_type type) +lp_build_zero(struct lp_type type)  {     LLVMTypeRef vec_type = lp_build_vec_type(type);     return LLVMConstNull(vec_type); @@ -227,7 +227,7 @@ lp_build_zero(union lp_type type)  LLVMValueRef -lp_build_one(union lp_type type) +lp_build_one(struct lp_type type)  {     LLVMTypeRef elem_type;     LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -269,7 +269,7 @@ lp_build_one(union lp_type type)  LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type,                        double val)  {     LLVMTypeRef elem_type = lp_build_elem_type(type); @@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type,  LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type,                            long long val)  {     LLVMTypeRef elem_type = lp_build_int_elem_type(type); @@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type,  LLVMValueRef -lp_build_const_aos(union lp_type type,  +lp_build_const_aos(struct lp_type type,                      double r, double g, double b, double a,                      const unsigned char *swizzle)  { @@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type,  LLVMValueRef -lp_build_const_mask_aos(union lp_type type, -                        boolean cond[4]) +lp_build_const_mask_aos(struct lp_type type, +                        const boolean cond[4])  {     LLVMTypeRef elem_type = LLVMIntType(type.width);     LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index 1934530ea3..ffb302f736 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,67 +42,67 @@  #include <pipe/p_compiler.h> -union lp_type type; +struct lp_type type;  unsigned -lp_mantissa(union lp_type type); +lp_mantissa(struct lp_type type);  unsigned -lp_const_shift(union lp_type type); +lp_const_shift(struct lp_type type);  unsigned -lp_const_offset(union lp_type type); +lp_const_offset(struct lp_type type);  double -lp_const_scale(union lp_type type); +lp_const_scale(struct lp_type type);  double -lp_const_min(union lp_type type); +lp_const_min(struct lp_type type);  double -lp_const_max(union lp_type type); +lp_const_max(struct lp_type type);  double -lp_const_eps(union lp_type type); +lp_const_eps(struct lp_type type);  LLVMValueRef -lp_build_undef(union lp_type type); +lp_build_undef(struct lp_type type);  LLVMValueRef -lp_build_zero(union lp_type type); +lp_build_zero(struct lp_type type);  LLVMValueRef -lp_build_one(union lp_type type); +lp_build_one(struct lp_type type);  LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type,                        double val);  LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type,                            long long val);  LLVMValueRef -lp_build_const_aos(union lp_type type,  +lp_build_const_aos(struct lp_type type,                      double r, double g, double b, double a,                      const unsigned char *swizzle);  LLVMValueRef -lp_build_const_mask_aos(union lp_type type, -                        boolean cond[4]); +lp_build_const_mask_aos(struct lp_type type, +                        const boolean cond[4]);  #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index c5a71d2c72..186cac70f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -86,7 +86,7 @@   */  LLVMValueRef  lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, -                                        union lp_type src_type, +                                        struct lp_type src_type,                                          unsigned dst_width,                                          LLVMValueRef src)  { @@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,  LLVMValueRef  lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,                                  unsigned src_width, -                                union lp_type dst_type, +                                struct lp_type dst_type,                                  LLVMValueRef src)  {     LLVMTypeRef vec_type = lp_build_vec_type(dst_type); @@ -248,8 +248,8 @@ lp_build_const_pack_shuffle(unsigned n)   */  static void  lp_build_expand(LLVMBuilderRef builder, -               union lp_type src_type, -               union lp_type dst_type, +               struct lp_type src_type, +               struct lp_type dst_type,                 LLVMValueRef src,                 LLVMValueRef *dst, unsigned num_dsts)  { @@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder,     dst[0] = src;     while(src_type.width < dst_type.width) { -      union lp_type new_type = src_type; +      struct lp_type new_type = src_type;        LLVMTypeRef new_vec_type;        new_type.width *= 2; @@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder,   */  static LLVMValueRef  lp_build_pack2(LLVMBuilderRef builder, -               union lp_type src_type, -               union lp_type dst_type, +               struct lp_type src_type, +               struct lp_type dst_type,                 boolean clamped,                 LLVMValueRef lo,                 LLVMValueRef hi) @@ -392,8 +392,8 @@ lp_build_pack2(LLVMBuilderRef builder,   */  static LLVMValueRef  lp_build_pack(LLVMBuilderRef builder, -              union lp_type src_type, -              union lp_type dst_type, +              struct lp_type src_type, +              struct lp_type dst_type,                boolean clamped,                const LLVMValueRef *src, unsigned num_srcs)  { @@ -410,7 +410,7 @@ lp_build_pack(LLVMBuilderRef builder,        tmp[i] = src[i];     while(src_type.width > dst_type.width) { -      union lp_type new_type = src_type; +      struct lp_type new_type = src_type;        new_type.width /= 2;        new_type.length *= 2; @@ -442,12 +442,12 @@ lp_build_pack(LLVMBuilderRef builder,   */  void  lp_build_conv(LLVMBuilderRef builder, -              union lp_type src_type, -              union lp_type dst_type, +              struct lp_type src_type, +              struct lp_type dst_type,                const LLVMValueRef *src, unsigned num_srcs,                LLVMValueRef *dst, unsigned num_dsts)  { -   union lp_type tmp_type; +   struct lp_type tmp_type;     LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];     unsigned num_tmps;     unsigned i; @@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder,      * Clamp if necessary      */ -   if(src_type.value != dst_type.value) { +   if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {        struct lp_build_context bld;        double src_min = lp_const_min(src_type);        double dst_min = lp_const_min(dst_type); @@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder,   */  void  lp_build_conv_mask(LLVMBuilderRef builder, -                   union lp_type src_type, -                   union lp_type dst_type, +                   struct lp_type src_type, +                   struct lp_type dst_type,                     const LLVMValueRef *src, unsigned num_srcs,                     LLVMValueRef *dst, unsigned num_dsts)  { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index 05c1ef2a10..ca378804d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,33 +40,33 @@  #include <llvm-c/Core.h>   -union lp_type type; +struct lp_type type;  LLVMValueRef  lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, -                                        union lp_type src_type, +                                        struct lp_type src_type,                                          unsigned dst_width,                                          LLVMValueRef src);  LLVMValueRef  lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,                                  unsigned src_width, -                                union lp_type dst_type, +                                struct lp_type dst_type,                                  LLVMValueRef src);  void  lp_build_conv(LLVMBuilderRef builder, -              union lp_type src_type, -              union lp_type dst_type, +              struct lp_type src_type, +              struct lp_type dst_type,                const LLVMValueRef *srcs, unsigned num_srcs,                LLVMValueRef *dsts, unsigned num_dsts);  void  lp_build_conv_mask(LLVMBuilderRef builder, -                   union lp_type src_type, -                   union lp_type dst_type, +                   struct lp_type src_type, +                   struct lp_type dst_type,                     const LLVMValueRef *src, unsigned num_srcs,                     LLVMValueRef *dst, unsigned num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index e5fe81193f..21c665c4d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -71,11 +71,11 @@  /**   * Return a type appropriate for depth/stencil testing.   */ -union lp_type +struct lp_type  lp_depth_type(const struct util_format_description *format_desc,                unsigned length)  { -   union lp_type type; +   struct lp_type type;     unsigned swizzle;     assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); @@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc,     swizzle = format_desc->swizzle[0];     assert(swizzle < 4); -   type.value = 0; +   memset(&type, 0, sizeof type);     type.width = format_desc->block.bits;     if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { @@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc,  void  lp_build_depth_test(LLVMBuilderRef builder,                      const struct pipe_depth_state *state, -                    union lp_type type, +                    struct lp_type type,                      const struct util_format_description *format_desc,                      struct lp_build_mask_context *mask,                      LLVMValueRef src, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 5d2e042fcc..79d6981bb5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -41,11 +41,11 @@  struct pipe_depth_state;  struct util_format_description; -union lp_type; +struct lp_type;  struct lp_build_mask_context; -union lp_type +struct lp_type  lp_depth_type(const struct util_format_description *format_desc,                unsigned length); @@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc,  void  lp_build_depth_test(LLVMBuilderRef builder,                      const struct pipe_depth_state *state, -                    union lp_type type, +                    struct lp_type type,                      const struct util_format_description *format_desc,                      struct lp_build_mask_context *mask,                      LLVMValueRef src, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 69ed014ff3..dcc25fbff8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -405,7 +405,7 @@ lp_build_mask_check(struct lp_build_mask_context *mask)  void  lp_build_mask_begin(struct lp_build_mask_context *mask,                      struct lp_build_flow_context *flow, -                    union lp_type type, +                    struct lp_type type,                      LLVMValueRef value)  {     memset(mask, 0, sizeof *mask); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 9d76e3064d..e61999ff06 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -38,7 +38,7 @@  #include <llvm-c/Core.h>   -union lp_type; +struct lp_type;  struct lp_build_flow_context; @@ -84,7 +84,7 @@ struct lp_build_mask_context  void  lp_build_mask_begin(struct lp_build_mask_context *mask,                      struct lp_build_flow_context *flow, -                    union lp_type type, +                    struct lp_type type,                      LLVMValueRef value);  /** diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 5ee0656093..6d3f692619 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -39,7 +39,7 @@  #include "pipe/p_format.h"  struct util_format_description; -union lp_type; +struct lp_type;  /** @@ -103,7 +103,7 @@ lp_build_gather(LLVMBuilderRef builder,  void  lp_build_unpack_rgba_soa(LLVMBuilderRef builder,                           const struct util_format_description *format_desc, -                         union lp_type type, +                         struct lp_type type,                           LLVMValueRef packed,                           LLVMValueRef *rgba); @@ -111,7 +111,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,  void  lp_build_load_rgba_soa(LLVMBuilderRef builder,                         const struct util_format_description *format_desc, -                       union lp_type type, +                       struct lp_type type,                         LLVMValueRef base_ptr,                         LLVMValueRef offsets,                         LLVMValueRef *rgba); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 569e8d10a3..b5ff434e1a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -84,7 +84,7 @@ lp_build_gather(LLVMBuilderRef builder,  static LLVMValueRef -lp_build_format_swizzle(union lp_type type, +lp_build_format_swizzle(struct lp_type type,                          const LLVMValueRef *inputs,                          enum util_format_swizzle swizzle)  { @@ -110,7 +110,7 @@ lp_build_format_swizzle(union lp_type type,  void  lp_build_unpack_rgba_soa(LLVMBuilderRef builder,                           const struct util_format_description *format_desc, -                         union lp_type type, +                         struct lp_type type,                           LLVMValueRef packed,                           LLVMValueRef *rgba)  { @@ -188,7 +188,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,  void  lp_build_load_rgba_soa(LLVMBuilderRef builder,                         const struct util_format_description *format_desc, -                       union lp_type type, +                       struct lp_type type,                         LLVMValueRef base_ptr,                         LLVMValueRef offsets,                         LLVMValueRef *rgba) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index cfe20a0d75..338dbca6d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -292,7 +292,7 @@ void  lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,                           const struct tgsi_token *tokens,                           LLVMBuilderRef builder, -                         union lp_type type, +                         struct lp_type type,                           LLVMValueRef a0_ptr,                           LLVMValueRef dadx_ptr,                           LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9194f6233a..9c57a10879 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -83,7 +83,7 @@ void  lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,                           const struct tgsi_token *tokens,                           LLVMBuilderRef builder, -                         union lp_type type, +                         struct lp_type type,                           LLVMValueRef a0_ptr,                           LLVMValueRef dadx_ptr,                           LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 995a69c0f4..6b6f820769 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld,               LLVMValueRef a,               LLVMValueRef b)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMTypeRef vec_type = lp_build_vec_type(type);     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);     LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld,                  LLVMValueRef a,                  LLVMValueRef b)  { -   union lp_type type = bld->type; +   struct lp_type type = bld->type;     LLVMValueRef res;     if(a == b) @@ -339,7 +339,7 @@ lp_build_select_aos(struct lp_build_context *bld,                      LLVMValueRef b,                      const boolean cond[4])  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     const unsigned n = type.length;     unsigned i, j; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index 9099e0fb5b..a4ee7723b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@  #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -union lp_type type; +struct lp_type type;  struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 6f565af76d..403d0e4836 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,7 @@  struct pipe_texture;  struct pipe_sampler_state; -union lp_type; +struct lp_type;  /** @@ -123,7 +123,7 @@ void  lp_build_sample_soa(LLVMBuilderRef builder,                      const struct lp_sampler_static_state *static_state,                      struct lp_sampler_dynamic_state *dynamic_state, -                    union lp_type fp_type, +                    struct lp_type fp_type,                      unsigned unit,                      unsigned num_coords,                      const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 3ca25b0e76..8ca1be6f1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -97,15 +97,15 @@ struct lp_build_sample_context     const struct util_format_description *format_desc;     /** Incoming coordinates type and build context */ -   union lp_type coord_type; +   struct lp_type coord_type;     struct lp_build_context coord_bld;     /** Integer coordinates */ -   union lp_type int_coord_type; +   struct lp_type int_coord_type;     struct lp_build_context int_coord_bld;     /** Output texels type and build context */ -   union lp_type texel_type; +   struct lp_type texel_type;     struct lp_build_context texel_bld;  }; @@ -208,6 +208,11 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:        /* FIXME */ +      _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); +      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); +      coord = lp_build_min(int_coord_bld, coord, length_minus_one); +      break; +     default:        assert(0);     } @@ -337,7 +342,7 @@ void  lp_build_sample_soa(LLVMBuilderRef builder,                      const struct lp_sampler_static_state *static_state,                      struct lp_sampler_dynamic_state *dynamic_state, -                    union lp_type type, +                    struct lp_type type,                      unsigned unit,                      unsigned num_coords,                      const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index f35638be44..64e81f7b1f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -64,7 +64,7 @@ LLVMValueRef  lp_build_broadcast_scalar(struct lp_build_context *bld,                            LLVMValueRef scalar)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     LLVMValueRef res;     unsigned i; @@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,                         LLVMValueRef a,                         unsigned channel)  { -   const union lp_type type = bld->type; +   const struct lp_type type = bld->type;     const unsigned n = type.length;     unsigned i, j; @@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,         *   YY00 YY00 .... YY00         *   YYYY YYYY .... YYYY  <= output         */ -      union lp_type type4 = type; +      struct lp_type type4 = type;        const char shifts[4][2] = {           { 1,  2},           {-1,  2}, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index cb0b6707ec..1f6da80448 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@  #include <llvm-c/Core.h>   -union lp_type type; +struct lp_type type;  struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 10c251c416..eddb7a83fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -39,7 +39,7 @@  struct tgsi_token; -union lp_type; +struct lp_type;  struct lp_build_context;  struct lp_build_mask_context; @@ -60,7 +60,7 @@ struct lp_build_sampler_soa     void     (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,                          LLVMBuilderRef builder, -                        union lp_type type, +                        struct lp_type type,                          unsigned unit,                          unsigned num_coords,                          const LLVMValueRef *coords, @@ -72,7 +72,7 @@ struct lp_build_sampler_soa  void  lp_build_tgsi_soa(LLVMBuilderRef builder,                    const struct tgsi_token *tokens, -                  union lp_type type, +                  struct lp_type type,                    struct lp_build_mask_context *mask,                    LLVMValueRef consts_ptr,                    const LLVMValueRef *pos, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index b106ce2317..adc81569ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -1415,7 +1415,7 @@ emit_instruction(  void  lp_build_tgsi_soa(LLVMBuilderRef builder,                    const struct tgsi_token *tokens, -                  union lp_type type, +                  struct lp_type type,                    struct lp_build_mask_context *mask,                    LLVMValueRef consts_ptr,                    const LLVMValueRef *pos, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 577644b7ab..606243d6c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -33,7 +33,7 @@  LLVMTypeRef -lp_build_elem_type(union lp_type type) +lp_build_elem_type(struct lp_type type)  {     if (type.floating) {        switch(type.width) { @@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type)  LLVMTypeRef -lp_build_vec_type(union lp_type type) +lp_build_vec_type(struct lp_type type)  {     LLVMTypeRef elem_type = lp_build_elem_type(type);     return LLVMVectorType(elem_type, type.length); @@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type)   * type and check for identity.   */  boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)  +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type)   {     LLVMTypeKind elem_kind; @@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)  boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)  +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type)   {     LLVMTypeRef elem_type; @@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)  boolean -lp_check_value(union lp_type type, LLVMValueRef val)  +lp_check_value(struct lp_type type, LLVMValueRef val)   {     LLVMTypeRef vec_type; @@ -143,25 +143,26 @@ lp_check_value(union lp_type type, LLVMValueRef val)  LLVMTypeRef -lp_build_int_elem_type(union lp_type type) +lp_build_int_elem_type(struct lp_type type)  {     return LLVMIntType(type.width);  }  LLVMTypeRef -lp_build_int_vec_type(union lp_type type) +lp_build_int_vec_type(struct lp_type type)  {     LLVMTypeRef elem_type = lp_build_int_elem_type(type);     return LLVMVectorType(elem_type, type.length);  } -union lp_type -lp_int_type(union lp_type type) +struct lp_type +lp_int_type(struct lp_type type)  { -   union lp_type int_type; -   int_type.value = 0; +   struct lp_type int_type; + +   memset(&int_type, 0, sizeof int_type);     int_type.width = type.width;     int_type.length = type.length;     return int_type; @@ -171,7 +172,7 @@ lp_int_type(union lp_type type)  void  lp_build_context_init(struct lp_build_context *bld,                        LLVMBuilderRef builder, -                      union lp_type type) +                      struct lp_type type)  {     bld->builder = builder;     bld->type = type; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 9933e0b45c..ee5ca3483c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -56,58 +56,55 @@   * on the types used for intermediate computations, such as signed vs unsigned,   * normalized values, or fixed point.   */ -union lp_type { -   struct { -      /**  -       * Floating-point. Cannot be used with fixed. Integer numbers are -       * represented by this zero. -       */ -      unsigned floating:1; +struct lp_type { +   /** +    * Floating-point. Cannot be used with fixed. Integer numbers are +    * represented by this zero. +    */ +   unsigned floating:1; -      /**  -       * Fixed-point. Cannot be used with floating. Integer numbers are -       * represented by this zero. -       */ -      unsigned fixed:1; -       -      /**  -       * Whether it can represent negative values or not. -       * -       * If this is not set for floating point, it means that all values are -       * assumed to be positive. -       */ -      unsigned sign:1; +   /** +    * Fixed-point. Cannot be used with floating. Integer numbers are +    * represented by this zero. +    */ +   unsigned fixed:1; -      /** -       * Whether values are normalized to fit [0, 1] interval, or [-1, 1] -       * interval for signed types. -       * -       * For integer types it means the representable integer range should be -       * interpreted as the interval above. -       * -       * For floating and fixed point formats it means the values should be -       * clamped to the interval above. -       */ -      unsigned norm:1; +   /** +    * Whether it can represent negative values or not. +    * +    * If this is not set for floating point, it means that all values are +    * assumed to be positive. +    */ +   unsigned sign:1; -      /** -       * Element width. -       * -       * For fixed point values, the fixed point is assumed to be at half the -       * width. -       */ -      unsigned width:14; +   /** +    * Whether values are normalized to fit [0, 1] interval, or [-1, 1] +    * interval for signed types. +    * +    * For integer types it means the representable integer range should be +    * interpreted as the interval above. +    * +    * For floating and fixed point formats it means the values should be +    * clamped to the interval above. +    */ +   unsigned norm:1; + +   /** +    * Element width. +    * +    * For fixed point values, the fixed point is assumed to be at half the +    * width. +    */ +   unsigned width:14; -      /**  -       * Vector length. -       * -       * width*length should be a power of two greater or equal to eight. -       * -       * @sa LP_MAX_VECTOR_LENGTH -       */ -      unsigned length:14; -   }; -   uint32_t value; +   /** +    * Vector length. +    * +    * width*length should be a power of two greater or equal to eight. +    * +    * @sa LP_MAX_VECTOR_LENGTH +    */ +   unsigned length:14;  }; @@ -124,7 +121,7 @@ struct lp_build_context      * This not only describes the input/output LLVM types, but also whether      * to normalize/clamp the results.      */ -   union lp_type type; +   struct lp_type type;     /** Same as lp_build_undef(type) */     LLVMValueRef undef; @@ -138,41 +135,41 @@ struct lp_build_context  LLVMTypeRef -lp_build_elem_type(union lp_type type); +lp_build_elem_type(struct lp_type type);  LLVMTypeRef -lp_build_vec_type(union lp_type type); +lp_build_vec_type(struct lp_type type);  boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type); +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type);  boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type); +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type);  boolean -lp_check_value(union lp_type type, LLVMValueRef val); +lp_check_value(struct lp_type type, LLVMValueRef val);  LLVMTypeRef -lp_build_int_elem_type(union lp_type type); +lp_build_int_elem_type(struct lp_type type);  LLVMTypeRef -lp_build_int_vec_type(union lp_type type); +lp_build_int_vec_type(struct lp_type type); -union lp_type -lp_int_type(union lp_type type); +struct lp_type +lp_int_type(struct lp_type type);  void  lp_build_context_init(struct lp_build_context *bld,                        LLVMBuilderRef builder, -                      union lp_type type); +                      struct lp_type type);  #endif /* !LP_BLD_TYPE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 9465f763d5..b4a22ff4a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -152,7 +152,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)     screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);     if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { -      fprintf(stderr, "%s\n", error); +      _debug_printf("%s\n", error);        LLVMDisposeMessage(error);        abort();     } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 0ce1a37bd4..ff7ef8658a 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -65,8 +65,6 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)        return 1;     case PIPE_CAP_GLSL:        return 1; -   case PIPE_CAP_S3TC: -      return 0;     case PIPE_CAP_ANISOTROPIC_FILTER:        return 0;     case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 618cf1ffb8..9faed5a0b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -133,13 +133,13 @@ generate_pos0(LLVMBuilderRef builder,  static void  generate_depth(LLVMBuilderRef builder,                 const struct lp_fragment_shader_variant_key *key, -               union lp_type src_type, +               struct lp_type src_type,                 struct lp_build_mask_context *mask,                 LLVMValueRef src,                 LLVMValueRef dst_ptr)  {     const struct util_format_description *format_desc; -   union lp_type dst_type; +   struct lp_type dst_type;     if(!key->depth.enabled)        return; @@ -181,7 +181,7 @@ generate_fs(struct llvmpipe_context *lp,              struct lp_fragment_shader *shader,              const struct lp_fragment_shader_variant_key *key,              LLVMBuilderRef builder, -            union lp_type type, +            struct lp_type type,              LLVMValueRef context_ptr,              unsigned i,              const struct lp_build_interp_soa_context *interp, @@ -299,7 +299,7 @@ generate_fs(struct llvmpipe_context *lp,  static void  generate_blend(const struct pipe_blend_state *blend,                 LLVMBuilderRef builder, -               union lp_type type, +               struct lp_type type,                 LLVMValueRef context_ptr,                 LLVMValueRef mask,                 LLVMValueRef *src, @@ -364,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp,  {     struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);     struct lp_fragment_shader_variant *variant; -   union lp_type fs_type; -   union lp_type blend_type; +   struct lp_type fs_type; +   struct lp_type blend_type;     LLVMTypeRef fs_elem_type;     LLVMTypeRef fs_vec_type;     LLVMTypeRef fs_int_vec_type; @@ -431,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp,     /* TODO: actually pick these based on the fs and color buffer      * characteristics. */ -   fs_type.value = 0; +   memset(&fs_type, 0, sizeof fs_type);     fs_type.floating = TRUE; /* floating point values */     fs_type.sign = TRUE;     /* values are signed */     fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */ @@ -439,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp,     fs_type.length = 4;      /* 4 element per vector */     num_fs = 4; -   blend_type.value = 0; +   memset(&blend_type, 0, sizeof blend_type);     blend_type.floating = FALSE; /* values are integers */     blend_type.sign = FALSE;     /* values are unsigned */     blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */ diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 69aaae26e0..a88e110c66 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -86,43 +86,43 @@ random_float(void);  void -dump_type(FILE *fp, union lp_type type); +dump_type(FILE *fp, struct lp_type type);  double -read_elem(union lp_type type, const void *src, unsigned index); +read_elem(struct lp_type type, const void *src, unsigned index);  void -write_elem(union lp_type type, void *dst, unsigned index, double src); +write_elem(struct lp_type type, void *dst, unsigned index, double src);  void -random_elem(union lp_type type, void *dst, unsigned index); +random_elem(struct lp_type type, void *dst, unsigned index);  void -read_vec(union lp_type type, const void *src, double *dst); +read_vec(struct lp_type type, const void *src, double *dst);  void -write_vec(union lp_type type, void *dst, const double *src); +write_vec(struct lp_type type, void *dst, const double *src);  void -random_vec(union lp_type type, void *dst); +random_vec(struct lp_type type, void *dst);  boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps); +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps);  boolean -compare_vec(union lp_type type, const void *res, const void *ref); +compare_vec(struct lp_type type, const void *res, const void *ref);  void -dump_vec(FILE *fp, union lp_type type, const void *src); +dump_vec(FILE *fp, struct lp_type type, const void *src);  #endif /* !LP_TEST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 8dfad468e3..94b661dcba 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -80,7 +80,7 @@ static void  write_tsv_row(FILE *fp,                const struct pipe_blend_state *blend,                enum vector_mode mode, -              union lp_type type, +              struct lp_type type,                double cycles,                boolean success)  { @@ -125,7 +125,7 @@ static void  dump_blend_type(FILE *fp,                  const struct pipe_blend_state *blend,                  enum vector_mode mode, -                union lp_type type) +                struct lp_type type)  {     fprintf(fp, "%s", mode ? "soa" : "aos"); @@ -153,7 +153,7 @@ static LLVMValueRef  add_blend_test(LLVMModuleRef module,                 const struct pipe_blend_state *blend,                 enum vector_mode mode, -               union lp_type type) +               struct lp_type type)  {     LLVMTypeRef ret_type;     LLVMTypeRef vec_type; @@ -467,7 +467,7 @@ test_one(unsigned verbose,           FILE *fp,           const struct pipe_blend_state *blend,           enum vector_mode mode, -         union lp_type type) +         struct lp_type type)  {     LLVMModuleRef module = NULL;     LLVMValueRef func = NULL; @@ -765,10 +765,10 @@ blend_funcs[] = {  }; -const union lp_type blend_types[] = { +const struct lp_type blend_types[] = {     /* float, fixed,  sign,  norm, width, len */ -   {{  TRUE, FALSE, FALSE,  TRUE,    32,   4 }}, /* f32 x 4 */ -   {{ FALSE, FALSE, FALSE,  TRUE,     8,  16 }}, /* u8n x 16 */ +   {   TRUE, FALSE, FALSE,  TRUE,    32,   4 }, /* f32 x 4 */ +   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */  }; @@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp)     const unsigned *alpha_dst_factor;     struct pipe_blend_state blend;     enum vector_mode mode; -   const union lp_type *type; +   const struct lp_type *type;     bool success = TRUE;     for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { @@ -841,27 +841,27 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)     const unsigned *alpha_dst_factor;     struct pipe_blend_state blend;     enum vector_mode mode; -   const union lp_type *type; +   const struct lp_type *type;     unsigned long i;     bool success = TRUE;     for(i = 0; i < n; ++i) { -      rgb_func = &blend_funcs[random() % num_funcs]; -      alpha_func = &blend_funcs[random() % num_funcs]; -      rgb_src_factor = &blend_factors[random() % num_factors]; -      alpha_src_factor = &blend_factors[random() % num_factors]; +      rgb_func = &blend_funcs[rand() % num_funcs]; +      alpha_func = &blend_funcs[rand() % num_funcs]; +      rgb_src_factor = &blend_factors[rand() % num_factors]; +      alpha_src_factor = &blend_factors[rand() % num_factors];        do { -         rgb_dst_factor = &blend_factors[random() % num_factors]; +         rgb_dst_factor = &blend_factors[rand() % num_factors];        } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);        do { -         alpha_dst_factor = &blend_factors[random() % num_factors]; +         alpha_dst_factor = &blend_factors[rand() % num_factors];        } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); -      mode = random() & 1; +      mode = rand() & 1; -      type = &blend_types[random() % num_types]; +      type = &blend_types[rand() % num_types];        memset(&blend, 0, sizeof blend);        blend.blend_enable      = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index e6489834af..9dcf58e5dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -59,8 +59,8 @@ write_tsv_header(FILE *fp)  static void  write_tsv_row(FILE *fp, -              union lp_type src_type, -              union lp_type dst_type, +              struct lp_type src_type, +              struct lp_type dst_type,                double cycles,                boolean success)  { @@ -80,8 +80,8 @@ write_tsv_row(FILE *fp,  static void  dump_conv_types(FILE *fp, -               union lp_type src_type, -               union lp_type dst_type) +               struct lp_type src_type, +               struct lp_type dst_type)  {     fprintf(fp, "src_type=");     dump_type(fp, src_type); @@ -96,8 +96,8 @@ dump_conv_types(FILE *fp,  static LLVMValueRef  add_conv_test(LLVMModuleRef module, -              union lp_type src_type, unsigned num_srcs, -              union lp_type dst_type, unsigned num_dsts) +              struct lp_type src_type, unsigned num_srcs, +              struct lp_type dst_type, unsigned num_dsts)  {     LLVMTypeRef args[2];     LLVMValueRef func; @@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module,  static boolean  test_one(unsigned verbose,           FILE *fp, -         union lp_type src_type, -         union lp_type dst_type) +         struct lp_type src_type, +         struct lp_type dst_type)  {     LLVMModuleRef module = NULL;     LLVMValueRef func = NULL; @@ -343,35 +343,35 @@ test_one(unsigned verbose,  } -const union lp_type conv_types[] = { +const struct lp_type conv_types[] = {     /* float, fixed,  sign,  norm, width, len */ -   {{  TRUE, FALSE,  TRUE,  TRUE,    32,   4 }}, -   {{  TRUE, FALSE,  TRUE, FALSE,    32,   4 }}, -   {{  TRUE, FALSE, FALSE,  TRUE,    32,   4 }}, -   {{  TRUE, FALSE, FALSE, FALSE,    32,   4 }}, +   {   TRUE, FALSE,  TRUE,  TRUE,    32,   4 }, +   {   TRUE, FALSE,  TRUE, FALSE,    32,   4 }, +   {   TRUE, FALSE, FALSE,  TRUE,    32,   4 }, +   {   TRUE, FALSE, FALSE, FALSE,    32,   4 },     /* TODO: test fixed formats too */ -   {{ FALSE, FALSE,  TRUE,  TRUE,    16,   8 }}, -   {{ FALSE, FALSE,  TRUE, FALSE,    16,   8 }}, -   {{ FALSE, FALSE, FALSE,  TRUE,    16,   8 }}, -   {{ FALSE, FALSE, FALSE, FALSE,    16,   8 }}, +   {  FALSE, FALSE,  TRUE,  TRUE,    16,   8 }, +   {  FALSE, FALSE,  TRUE, FALSE,    16,   8 }, +   {  FALSE, FALSE, FALSE,  TRUE,    16,   8 }, +   {  FALSE, FALSE, FALSE, FALSE,    16,   8 }, -   {{ FALSE, FALSE,  TRUE,  TRUE,    32,   4 }}, -   {{ FALSE, FALSE,  TRUE, FALSE,    32,   4 }}, -   {{ FALSE, FALSE, FALSE,  TRUE,    32,   4 }}, -   {{ FALSE, FALSE, FALSE, FALSE,    32,   4 }}, +   {  FALSE, FALSE,  TRUE,  TRUE,    32,   4 }, +   {  FALSE, FALSE,  TRUE, FALSE,    32,   4 }, +   {  FALSE, FALSE, FALSE,  TRUE,    32,   4 }, +   {  FALSE, FALSE, FALSE, FALSE,    32,   4 }, -   {{ FALSE, FALSE,  TRUE,  TRUE,    16,   8 }}, -   {{ FALSE, FALSE,  TRUE, FALSE,    16,   8 }}, -   {{ FALSE, FALSE, FALSE,  TRUE,    16,   8 }}, -   {{ FALSE, FALSE, FALSE, FALSE,    16,   8 }}, +   {  FALSE, FALSE,  TRUE,  TRUE,    16,   8 }, +   {  FALSE, FALSE,  TRUE, FALSE,    16,   8 }, +   {  FALSE, FALSE, FALSE,  TRUE,    16,   8 }, +   {  FALSE, FALSE, FALSE, FALSE,    16,   8 }, -   {{ FALSE, FALSE,  TRUE,  TRUE,     8,  16 }}, -   {{ FALSE, FALSE,  TRUE, FALSE,     8,  16 }}, -   {{ FALSE, FALSE, FALSE,  TRUE,     8,  16 }}, -   {{ FALSE, FALSE, FALSE, FALSE,     8,  16 }}, +   {  FALSE, FALSE,  TRUE,  TRUE,     8,  16 }, +   {  FALSE, FALSE,  TRUE, FALSE,     8,  16 }, +   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, +   {  FALSE, FALSE, FALSE, FALSE,     8,  16 },  }; @@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);  boolean  test_all(unsigned verbose, FILE *fp)  { -   const union lp_type *src_type; -   const union lp_type *dst_type; +   const struct lp_type *src_type; +   const struct lp_type *dst_type;     bool success = TRUE;     for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) { @@ -407,16 +407,16 @@ test_all(unsigned verbose, FILE *fp)  boolean  test_some(unsigned verbose, FILE *fp, unsigned long n)  { -   const union lp_type *src_type; -   const union lp_type *dst_type; +   const struct lp_type *src_type; +   const struct lp_type *dst_type;     unsigned long i;     bool success = TRUE;     for(i = 0; i < n; ++i) { -      src_type = &conv_types[random() % num_types]; +      src_type = &conv_types[rand() % num_types];        do { -         dst_type = &conv_types[random() % num_types]; +         dst_type = &conv_types[rand() % num_types];        } while (src_type == dst_type || src_type->norm != dst_type->norm);        if(!test_one(verbose, fp, *src_type, *dst_type)) diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 49213fb4f0..4592dc0b2d 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -40,7 +40,7 @@  void  dump_type(FILE *fp, -          union lp_type type) +          struct lp_type type)  {     fprintf(fp, "%s%s%u%sx%u",             type.sign ? (type.floating || type.fixed ? "" : "s") : "u", @@ -52,7 +52,7 @@ dump_type(FILE *fp,  double -read_elem(union lp_type type, const void *src, unsigned index) +read_elem(struct lp_type type, const void *src, unsigned index)  {     double scale = lp_const_scale(type);     double value; @@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index)  void -write_elem(union lp_type type, void *dst, unsigned index, double value) +write_elem(struct lp_type type, void *dst, unsigned index, double value)  {     assert(index < type.length);     if(!type.sign && value < 0.0) @@ -184,11 +184,11 @@ write_elem(union lp_type type, void *dst, unsigned index, double value)  void -random_elem(union lp_type type, void *dst, unsigned index) +random_elem(struct lp_type type, void *dst, unsigned index)  {     double value;     assert(index < type.length); -   value = (double)random()/(double)RAND_MAX; +   value = (double)rand()/(double)RAND_MAX;     if(!type.norm) {        unsigned long long mask;        if (type.floating) @@ -199,17 +199,17 @@ random_elem(union lp_type type, void *dst, unsigned index)           mask = ((unsigned long long)1 << (type.width - 1)) - 1;        else           mask = ((unsigned long long)1 << type.width) - 1; -      value += (double)(mask & random()); +      value += (double)(mask & rand());     }     if(!type.sign) -      if(random() & 1) +      if(rand() & 1)           value = -value;     write_elem(type, dst, index, value);  }  void -read_vec(union lp_type type, const void *src, double *dst) +read_vec(struct lp_type type, const void *src, double *dst)  {     unsigned i;     for (i = 0; i < type.length; ++i) @@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst)  void -write_vec(union lp_type type, void *dst, const double *src) +write_vec(struct lp_type type, void *dst, const double *src)  {     unsigned i;     for (i = 0; i < type.length; ++i) @@ -229,12 +229,12 @@ write_vec(union lp_type type, void *dst, const double *src)  float  random_float(void)  { -    return (float)((double)random()/(double)RAND_MAX); +    return (float)((double)rand()/(double)RAND_MAX);  }  void -random_vec(union lp_type type, void *dst) +random_vec(struct lp_type type, void *dst)  {     unsigned i;     for (i = 0; i < type.length; ++i) @@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst)  boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps) +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)  {     unsigned i;     for (i = 0; i < type.length; ++i) { @@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl  boolean -compare_vec(union lp_type type, const void *res, const void *ref) +compare_vec(struct lp_type type, const void *res, const void *ref)  {     double eps = lp_const_eps(type);     return compare_vec_with_eps(type, res, ref, eps); @@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref)  void -dump_vec(FILE *fp, union lp_type type, const void *src) +dump_vec(FILE *fp, struct lp_type type, const void *src)  {     unsigned i;     for (i = 0; i < type.length; ++i) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 9a876f404d..a1365a045f 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1654,7 +1654,7 @@ lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)  static void  lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,                                    LLVMBuilderRef builder, -                                  union lp_type type, +                                  struct lp_type type,                                    unsigned unit,                                    unsigned num_coords,                                    const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index 7d31705d01..d2a6ae21f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -149,7 +149,7 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)  static void  lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base,                                       LLVMBuilderRef builder, -                                     union lp_type type, +                                     struct lp_type type,                                       unsigned unit,                                       unsigned num_coords,                                       const LLVMValueRef *coords, diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index ff2febb668..170ce3eb7e 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -16,8 +16,6 @@ nv04_screen_get_param(struct pipe_screen *screen, int param)  		return 0;  	case PIPE_CAP_GLSL:  		return 0; -	case PIPE_CAP_S3TC: -		return 0;  	case PIPE_CAP_ANISOTROPIC_FILTER:  		return 0;  	case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 4469b22d91..ee5901e743 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -15,8 +15,6 @@ nv10_screen_get_param(struct pipe_screen *screen, int param)  		return 0;  	case PIPE_CAP_GLSL:  		return 0; -	case PIPE_CAP_S3TC: -		return 0;  	case PIPE_CAP_ANISOTROPIC_FILTER:  		return 1;  	case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index e6924ad71e..4eeacd1afd 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -15,8 +15,6 @@ nv20_screen_get_param(struct pipe_screen *screen, int param)  		return 0;  	case PIPE_CAP_GLSL:  		return 0; -	case PIPE_CAP_S3TC: -		return 0;  	case PIPE_CAP_ANISOTROPIC_FILTER:  		return 1;  	case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index f8285e4455..41af38450b 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -22,8 +22,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param)  		return 1;  	case PIPE_CAP_GLSL:  		return 0; -	case PIPE_CAP_S3TC: -		return 0;  	case PIPE_CAP_ANISOTROPIC_FILTER:  		return 1;  	case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 5d2a4216c5..bd13dfddd1 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -21,8 +21,6 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param)  		return 1;  	case PIPE_CAP_GLSL:  		return 0; -	case PIPE_CAP_S3TC: -		return 1;  	case PIPE_CAP_ANISOTROPIC_FILTER:  		return 1;  	case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 6e8f4f9750..fca078b174 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -37,11 +37,12 @@ nv50_flush(struct pipe_context *pipe, unsigned flags,  	/* We need this in the ddx for reliable composite, not sure what we're  	 * actually flushing. We generate all our own flushes with flags = 0. */ -	WAIT_RING(chan, 3); +	WAIT_RING(chan, 2);  	BEGIN_RING(chan, eng2d, 0x0110, 1);  	OUT_RING  (chan, 0); -	FIRE_RING(chan); +	if (flags & PIPE_FLUSH_FRAME) +		FIRE_RING(chan);  }  static void @@ -110,6 +111,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)  	nv50->pipe.is_texture_referenced = nv50_is_texture_referenced;  	nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced; +	screen->base.channel->user_private = nv50; +	screen->base.channel->flush_notify = nv50_state_flush_notify; +  	nv50_init_surface_functions(nv50);  	nv50_init_state_functions(nv50);  	nv50_init_query_functions(nv50); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 1e9e8e49bf..4608854d71 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -116,6 +116,7 @@ struct nv50_state {  	unsigned miptree_nr;  	struct nouveau_stateobj *vertprog;  	struct nouveau_stateobj *fragprog; +	struct nouveau_stateobj *programs;  	struct nouveau_stateobj *vtxfmt;  	struct nouveau_stateobj *vtxbuf;  	struct nouveau_stateobj *vtxattr; @@ -190,10 +191,12 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,  /* nv50_program.c */  extern void nv50_vertprog_validate(struct nv50_context *nv50);  extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_linkage_validate(struct nv50_context *nv50);  extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);  /* nv50_state_validate.c */  extern boolean nv50_state_validate(struct nv50_context *nv50); +extern void nv50_state_flush_notify(struct nouveau_channel *chan);  /* nv50_tex.c */  extern void nv50_tex_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 4a838529de..eb90d5e66f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -112,6 +112,10 @@ struct nv50_pc {  	struct nv50_reg *temp_temp[16];  	unsigned temp_temp_nr; +	/* broadcast and destination replacement regs */ +	struct nv50_reg *r_brdc; +	struct nv50_reg *r_dst[4]; +  	unsigned interp_mode[32];  	/* perspective interpolation registers */  	struct nv50_reg *iv_p; @@ -124,6 +128,25 @@ struct nv50_pc {  	boolean allow32;  }; +static INLINE void +ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) +{ +	reg->type = type; +	reg->index = index; +	reg->hw = hw; +	reg->neg = 0; +	reg->rhw = -1; +	reg->acc = 0; +} + +static INLINE unsigned +popcnt4(uint32_t val) +{ +	static const unsigned cnt[16] +	= { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; +	return cnt[val & 0xf]; +} +  static void  alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)  { @@ -184,11 +207,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)  	for (i = 0; i < NV50_SU_MAX_TEMP; i++) {  		if (!pc->r_temp[i]) { -			r = CALLOC_STRUCT(nv50_reg); -			r->type = P_TEMP; -			r->index = -1; -			r->hw = i; -			r->rhw = -1; +			r = MALLOC_STRUCT(nv50_reg); +			ctor_reg(r, P_TEMP, -1, i);  			pc->r_temp[i] = r;  			return r;  		} @@ -254,10 +274,8 @@ alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)  		return alloc_temp4(pc, dst, idx + 4);  	for (i = 0; i < 4; i++) { -		dst[i] = CALLOC_STRUCT(nv50_reg); -		dst[i]->type = P_TEMP; -		dst[i]->index = -1; -		dst[i]->hw = idx + i; +		dst[i] = MALLOC_STRUCT(nv50_reg); +		ctor_reg(dst[i], P_TEMP, -1, idx + i);  		pc->r_temp[idx + i] = dst[i];  	} @@ -309,7 +327,7 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)  static struct nv50_reg *  alloc_immd(struct nv50_pc *pc, float f)  { -	struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); +	struct nv50_reg *r = MALLOC_STRUCT(nv50_reg);  	unsigned hw;  	for (hw = 0; hw < pc->immd_nr * 4; hw++) @@ -319,9 +337,7 @@ alloc_immd(struct nv50_pc *pc, float f)  	if (hw == pc->immd_nr * 4)  		hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; -	r->type = P_IMMD; -	r->hw = hw; -	r->index = -1; +	ctor_reg(r, P_IMMD, -1, hw);  	return r;  } @@ -786,6 +802,9 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)  #define CVTOP_SAT	0x08  #define CVTOP_ABS	0x10 +/* 0x04 == 32 bit */ +/* 0x40 == dst is float */ +/* 0x80 == src is float */  #define CVT_F32_F32 0xc4  #define CVT_F32_S32 0x44  #define CVT_F32_U32 0x64 @@ -795,7 +814,7 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)  static void  emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, -	 int wp, unsigned cop, unsigned fmt) +	 int wp, unsigned cvn, unsigned fmt)  {  	struct nv50_program_exec *e; @@ -804,7 +823,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,  	e->inst[0] |= 0xa0000000;  	e->inst[1] |= 0x00004000; -	e->inst[1] |= (cop << 16); +	e->inst[1] |= (cvn << 16);  	e->inst[1] |= (fmt << 24);  	set_src_0(pc, src, e); @@ -821,49 +840,80 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,  	emit(pc, e);  } +/* nv50 Condition codes: + *  0x1 = LT + *  0x2 = EQ + *  0x3 = LE + *  0x4 = GT + *  0x5 = NE + *  0x6 = GE + *  0x7 = set condition code ? (used before bra.lt/le/gt/ge) + *  0x8 = unordered bit (allows NaN) + */  static void -emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, +emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,  	 struct nv50_reg *src0, struct nv50_reg *src1)  { +	static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; +  	struct nv50_program_exec *e = exec(pc); -	unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };  	struct nv50_reg *rdst; -	assert(c_op <= 7); +	assert(ccode < 16);  	if (check_swap_src_0_1(pc, &src0, &src1)) -		c_op = inv_cop[c_op]; +		ccode = cc_swapped[ccode & 7] | (ccode & 8);  	rdst = dst; -	if (dst->type != P_TEMP) +	if (dst && dst->type != P_TEMP)  		dst = alloc_temp(pc, NULL);  	/* set.u32 */  	set_long(pc, e);  	e->inst[0] |= 0xb0000000; -	e->inst[1] |= (3 << 29); -	e->inst[1] |= (c_op << 14); -	/*XXX: breaks things, .u32 by default? -	 *     decuda will disasm as .u16 and use .lo/.hi regs, but this -	 *     doesn't seem to match what the hw actually does. -	inst[1] |= 0x04000000; << breaks things.. .u32 by default? +	e->inst[1] |= 0x60000000 | (ccode << 14); + +	/* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but +	 * that doesn't seem to match what the hw actually does +	e->inst[1] |= 0x04000000; << breaks things, u32 by default ?  	 */ -	set_dst(pc, dst, e); + +	if (wp >= 0) +		set_pred_wr(pc, 1, wp, e); +	if (dst) +		set_dst(pc, dst, e); +	else { +		e->inst[0] |= 0x000001fc; +		e->inst[1] |= 0x00000008; +	} +  	set_src_0(pc, src0, e);  	set_src_1(pc, src1, e); -	emit(pc, e); -	/* cvt.f32.u32 */ -	e = exec(pc); -	e->inst[0] = 0xa0000001; -	e->inst[1] = 0x64014780; -	set_dst(pc, rdst, e); -	set_src_0(pc, dst, e);  	emit(pc, e); -	if (dst != rdst) +	/* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ +	if (rdst) +		emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); +	if (rdst && rdst != dst)  		free_temp(pc, dst);  } +static INLINE unsigned +map_tgsi_setop_cc(unsigned op) +{ +	switch (op) { +	case TGSI_OPCODE_SLT: return 0x1; +	case TGSI_OPCODE_SGE: return 0x6; +	case TGSI_OPCODE_SEQ: return 0x2; +	case TGSI_OPCODE_SGT: return 0x4; +	case TGSI_OPCODE_SLE: return 0x3; +	case TGSI_OPCODE_SNE: return 0xd; +	default: +		assert(0); +		return 0; +	} +} +  static INLINE void  emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)  { @@ -890,6 +940,12 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)  	emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);  } +static INLINE void +emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); +} +  static void  emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,  	 struct nv50_reg **src) @@ -1159,6 +1215,70 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)  	}  } +/* Return a read mask for source registers deduced from opcode & write mask. */ +static unsigned +nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) +{ +	unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; + +	switch (insn->Instruction.Opcode) { +	case TGSI_OPCODE_COS: +	case TGSI_OPCODE_SIN: +		return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); +	case TGSI_OPCODE_DP3: +		return 0x7; +	case TGSI_OPCODE_DP4: +	case TGSI_OPCODE_DPH: +	case TGSI_OPCODE_KIL: /* WriteMask ignored */ +		return 0xf; +	case TGSI_OPCODE_DST: +		return mask & (c ? 0xa : 0x6); +	case TGSI_OPCODE_EX2: +	case TGSI_OPCODE_LG2: +	case TGSI_OPCODE_POW: +	case TGSI_OPCODE_RCP: +	case TGSI_OPCODE_RSQ: +	case TGSI_OPCODE_SCS: +		return 0x1; +	case TGSI_OPCODE_LIT: +		return 0xb; +	case TGSI_OPCODE_TEX: +	case TGSI_OPCODE_TXP: +	{ +		const struct tgsi_instruction_ext_texture *tex; + +		assert(insn->Instruction.Extended); +		tex = &insn->InstructionExtTexture; + +		mask = 0x7; +		if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) +			mask |= 0x8; + +		switch (tex->Texture) { +		case TGSI_TEXTURE_1D: +			mask &= 0x9; +			break; +		case TGSI_TEXTURE_2D: +			mask &= 0xb; +			break; +		default: +			break; +		} +	} +		return mask; +	case TGSI_OPCODE_XPD: +		x = 0; +		if (mask & 1) x |= 0x6; +		if (mask & 2) x |= 0x5; +		if (mask & 4) x |= 0x3; +		return x; +	default: +		break; +	} + +	return mask; +} +  static struct nv50_reg *  tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)  { @@ -1258,93 +1378,126 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,  	return r;  } -/* returns TRUE if instruction can overwrite sources before they're read */ +/* return TRUE for ops that produce only a single result */  static boolean -direct2dest_op(const struct tgsi_full_instruction *insn) +is_scalar_op(unsigned op)  { -	if (insn->Instruction.Saturate) -		return FALSE; - -	switch (insn->Instruction.Opcode) { +	switch (op) {  	case TGSI_OPCODE_COS: +	case TGSI_OPCODE_DP2:  	case TGSI_OPCODE_DP3:  	case TGSI_OPCODE_DP4:  	case TGSI_OPCODE_DPH: -	case TGSI_OPCODE_KIL: -	case TGSI_OPCODE_LIT: +	case TGSI_OPCODE_EX2: +	case TGSI_OPCODE_LG2:  	case TGSI_OPCODE_POW:  	case TGSI_OPCODE_RCP:  	case TGSI_OPCODE_RSQ: -	case TGSI_OPCODE_SCS:  	case TGSI_OPCODE_SIN: +		/* +	case TGSI_OPCODE_KIL: +	case TGSI_OPCODE_LIT: +	case TGSI_OPCODE_SCS: +		*/ +		return TRUE; +	default: +		return FALSE; +	} +} + +/* Returns a bitmask indicating which dst components depend + * on source s, component c (reverse of nv50_tgsi_src_mask). + */ +static unsigned +nv50_tgsi_dst_revdep(unsigned op, int s, int c) +{ +	if (is_scalar_op(op)) +		return 0x1; + +	switch (op) { +	case TGSI_OPCODE_DST: +		return (1 << c) & (s ? 0xa : 0x6); +	case TGSI_OPCODE_XPD: +		switch (c) { +		case 0: return 0x6; +		case 1: return 0x5; +		case 2: return 0x3; +		case 3: return 0x0; +		default: +			assert(0); +			return 0x0; +		} +	case TGSI_OPCODE_LIT: +	case TGSI_OPCODE_SCS:  	case TGSI_OPCODE_TEX:  	case TGSI_OPCODE_TXP: -		return FALSE; +		/* these take care of dangerous swizzles themselves */ +		return 0x0; +	case TGSI_OPCODE_IF: +	case TGSI_OPCODE_KIL: +		/* don't call this function for these ops */ +		assert(0); +		return 0;  	default: -		return TRUE; +		/* linear vector instruction */ +		return (1 << c);  	}  }  static boolean -nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +nv50_program_tx_insn(struct nv50_pc *pc, +		     const struct tgsi_full_instruction *inst)  { -	const struct tgsi_full_instruction *inst = &tok->FullInstruction; -	struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; +	struct nv50_reg *rdst[4], *dst[4], *brdc, *src[3][4], *temp;  	unsigned mask, sat, unit; -	boolean assimilate = FALSE;  	int i, c;  	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;  	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; +	memset(src, 0, sizeof(src)); +  	for (c = 0; c < 4; c++) { -		if (mask & (1 << c)) +		if ((mask & (1 << c)) && !pc->r_dst[c])  			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);  		else -			dst[c] = NULL; -		rdst[c] = NULL; -		src[0][c] = NULL; -		src[1][c] = NULL; -		src[2][c] = NULL; +			dst[c] = pc->r_dst[c]; +		rdst[c] = dst[c];  	}  	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {  		const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; +		unsigned src_mask; +		boolean neg_supp; + +		src_mask = nv50_tgsi_src_mask(inst, i); +		neg_supp = negate_supported(inst, i);  		if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)  			unit = fs->SrcRegister.Index;  		for (c = 0; c < 4; c++) -			src[i][c] = tgsi_src(pc, c, fs, -					     negate_supported(inst, i)); +			if (src_mask & (1 << c)) +				src[i][c] = tgsi_src(pc, c, fs, neg_supp);  	} -	if (sat) { -		for (c = 0; c < 4; c++) { -			rdst[c] = dst[c]; -			dst[c] = temp_temp(pc); -		} +	brdc = temp = pc->r_brdc; +	if (brdc && brdc->type != P_TEMP) { +		temp = temp_temp(pc); +		if (sat) +			brdc = temp;  	} else -	if (direct2dest_op(inst)) { +	if (sat) {  		for (c = 0; c < 4; c++) { -			if (!dst[c] || dst[c]->type != P_TEMP) +			if (!(mask & (1 << c)) || dst[c]->type == P_TEMP)  				continue; - -			for (i = c + 1; i < 4; i++) { -				if (dst[c] == src[0][i] || -				    dst[c] == src[1][i] || -				    dst[c] == src[2][i]) -					break; -			} -			if (i == 4) -				continue; - -			assimilate = TRUE;  			rdst[c] = dst[c]; -			dst[c] = alloc_temp(pc, NULL); +			dst[c] = temp_temp(pc);  		}  	} +	assert(brdc || !is_scalar_op(inst->Instruction.Opcode)); +  	switch (inst->Instruction.Opcode) {  	case TGSI_OPCODE_ABS:  		for (c = 0; c < 4; c++) { @@ -1360,74 +1513,56 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  			emit_add(pc, dst[c], src[0][c], src[1][c]);  		}  		break; -	case TGSI_OPCODE_COS: -		temp = temp_temp(pc); -		emit_precossin(pc, temp, src[0][0]); -		emit_flop(pc, 5, temp, temp); +	case TGSI_OPCODE_CEIL:  		for (c = 0; c < 4; c++) {  			if (!(mask & (1 << c)))  				continue; -			emit_mov(pc, dst[c], temp); +			emit_cvt(pc, dst[c], src[0][c], -1, +				 CVTOP_CEIL, CVT_F32_F32); +		} +		break; +	case TGSI_OPCODE_COS: +		if (mask & 8) { +			emit_precossin(pc, temp, src[0][3]); +			emit_flop(pc, 5, dst[3], temp); +			if (!(mask &= 7)) +				break; +			if (temp == dst[3]) +				temp = brdc = temp_temp(pc);  		} +		emit_precossin(pc, temp, src[0][0]); +		emit_flop(pc, 5, brdc, temp);  		break;  	case TGSI_OPCODE_DP3: -		temp = temp_temp(pc);  		emit_mul(pc, temp, src[0][0], src[1][0]);  		emit_mad(pc, temp, src[0][1], src[1][1], temp); -		emit_mad(pc, temp, src[0][2], src[1][2], temp); -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_mov(pc, dst[c], temp); -		} +		emit_mad(pc, brdc, src[0][2], src[1][2], temp);  		break;  	case TGSI_OPCODE_DP4: -		temp = temp_temp(pc);  		emit_mul(pc, temp, src[0][0], src[1][0]);  		emit_mad(pc, temp, src[0][1], src[1][1], temp);  		emit_mad(pc, temp, src[0][2], src[1][2], temp); -		emit_mad(pc, temp, src[0][3], src[1][3], temp); -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_mov(pc, dst[c], temp); -		} +		emit_mad(pc, brdc, src[0][3], src[1][3], temp);  		break;  	case TGSI_OPCODE_DPH: -		temp = temp_temp(pc);  		emit_mul(pc, temp, src[0][0], src[1][0]);  		emit_mad(pc, temp, src[0][1], src[1][1], temp);  		emit_mad(pc, temp, src[0][2], src[1][2], temp); -		emit_add(pc, temp, src[1][3], temp); -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_mov(pc, dst[c], temp); -		} +		emit_add(pc, brdc, src[1][3], temp);  		break;  	case TGSI_OPCODE_DST: -	{ -		struct nv50_reg *one = alloc_immd(pc, 1.0); -		if (mask & (1 << 0)) -			emit_mov(pc, dst[0], one);  		if (mask & (1 << 1))  			emit_mul(pc, dst[1], src[0][1], src[1][1]);  		if (mask & (1 << 2))  			emit_mov(pc, dst[2], src[0][2]);  		if (mask & (1 << 3))  			emit_mov(pc, dst[3], src[1][3]); -		FREE(one); -	} +		if (mask & (1 << 0)) +			emit_mov_immdval(pc, dst[0], 1.0f);  		break;  	case TGSI_OPCODE_EX2: -		temp = temp_temp(pc);  		emit_preex2(pc, temp, src[0][0]); -		emit_flop(pc, 6, temp, temp); -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_mov(pc, dst[c], temp); -		} +		emit_flop(pc, 6, brdc, temp);  		break;  	case TGSI_OPCODE_FLR:  		for (c = 0; c < 4; c++) { @@ -1450,19 +1585,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  		emit_kil(pc, src[0][1]);  		emit_kil(pc, src[0][2]);  		emit_kil(pc, src[0][3]); -		pc->p->cfg.fp.regs[2] |= 0x00100000;  		break;  	case TGSI_OPCODE_LIT:  		emit_lit(pc, &dst[0], mask, &src[0][0]);  		break;  	case TGSI_OPCODE_LG2: -		temp = temp_temp(pc); -		emit_flop(pc, 3, temp, src[0][0]); -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_mov(pc, dst[c], temp); -		} +		emit_flop(pc, 3, brdc, src[0][0]);  		break;  	case TGSI_OPCODE_LRP:  		temp = temp_temp(pc); @@ -1510,31 +1638,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  		}  		break;  	case TGSI_OPCODE_POW: -		temp = temp_temp(pc); -		emit_pow(pc, temp, src[0][0], src[1][0]); -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_mov(pc, dst[c], temp); -		} +		emit_pow(pc, brdc, src[0][0], src[1][0]);  		break;  	case TGSI_OPCODE_RCP: -		for (c = 3; c >= 0; c--) { -			if (!(mask & (1 << c))) -				continue; -			emit_flop(pc, 0, dst[c], src[0][0]); -		} +		emit_flop(pc, 0, brdc, src[0][0]);  		break;  	case TGSI_OPCODE_RSQ: -		for (c = 3; c >= 0; c--) { -			if (!(mask & (1 << c))) -				continue; -			emit_flop(pc, 2, dst[c], src[0][0]); -		} +		emit_flop(pc, 2, brdc, src[0][0]);  		break;  	case TGSI_OPCODE_SCS:  		temp = temp_temp(pc); -		emit_precossin(pc, temp, src[0][0]); +		if (mask & 3) +			emit_precossin(pc, temp, src[0][0]);  		if (mask & (1 << 0))  			emit_flop(pc, 5, dst[0], temp);  		if (mask & (1 << 1)) @@ -1544,28 +1659,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  		if (mask & (1 << 3))  			emit_mov_immdval(pc, dst[3], 1.0);  		break; -	case TGSI_OPCODE_SGE: -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_set(pc, 6, dst[c], src[0][c], src[1][c]); -		} -		break;  	case TGSI_OPCODE_SIN: -		temp = temp_temp(pc); -		emit_precossin(pc, temp, src[0][0]); -		emit_flop(pc, 4, temp, temp); -		for (c = 0; c < 4; c++) { -			if (!(mask & (1 << c))) -				continue; -			emit_mov(pc, dst[c], temp); +		if (mask & 8) { +			emit_precossin(pc, temp, src[0][3]); +			emit_flop(pc, 4, dst[3], temp); +			if (!(mask &= 7)) +				break; +			if (temp == dst[3]) +				temp = brdc = temp_temp(pc);  		} +		emit_precossin(pc, temp, src[0][0]); +		emit_flop(pc, 4, brdc, temp);  		break;  	case TGSI_OPCODE_SLT: +	case TGSI_OPCODE_SGE: +	case TGSI_OPCODE_SEQ: +	case TGSI_OPCODE_SGT: +	case TGSI_OPCODE_SLE: +	case TGSI_OPCODE_SNE: +		i = map_tgsi_setop_cc(inst->Instruction.Opcode);  		for (c = 0; c < 4; c++) {  			if (!(mask & (1 << c)))  				continue; -			emit_set(pc, 1, dst[c], src[0][c], src[1][c]); +			emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]);  		}  		break;  	case TGSI_OPCODE_SUB: @@ -1583,6 +1699,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  		emit_tex(pc, dst, mask, src[0], unit,  			 inst->InstructionExtTexture.Texture, TRUE);  		break; +	case TGSI_OPCODE_TRUNC: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_cvt(pc, dst[c], src[0][c], -1, +				 CVTOP_TRUNC, CVT_F32_F32); +		} +		break;  	case TGSI_OPCODE_XPD:  		temp = temp_temp(pc);  		if (mask & (1 << 0)) { @@ -1607,17 +1731,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  		return FALSE;  	} +	if (brdc) { +		if (sat) +			emit_sat(pc, brdc, brdc); +		for (c = 0; c < 4; c++) +			if ((mask & (1 << c)) && dst[c] != brdc) +				emit_mov(pc, dst[c], brdc); +	} else  	if (sat) {  		for (c = 0; c < 4; c++) {  			if (!(mask & (1 << c)))  				continue; -			emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, -				 CVT_F32_F32); +			/* in this case we saturate later */ +			if (dst[c]->type == P_TEMP && dst[c]->index < 0) +				continue; +			emit_sat(pc, rdst[c], dst[c]);  		} -	} else if (assimilate) { -		for (c = 0; c < 4; c++) -			if (rdst[c]) -				assimilate_temp(pc, rdst[c], dst[c]);  	}  	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -1626,9 +1755,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  				continue;  			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)  				FREE(src[i][c]); -			else -			if (src[i][c]->acc == pc->insn_cur) -				release_hw(pc, src[i][c]);  		}  	} @@ -1636,180 +1762,271 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  	return TRUE;  } -/* Adjust a bitmask that indicates what components of a source are used, - * we use this in tx_prep so we only load interpolants that are needed. - */  static void -insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask) +prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)  { -	const struct tgsi_instruction_ext_texture *tex; - -	switch (insn->Instruction.Opcode) { -	case TGSI_OPCODE_DP3: -		*mask = 0x7; -		break; -	case TGSI_OPCODE_DP4: -	case TGSI_OPCODE_DPH: -		*mask = 0xF; -		break; -	case TGSI_OPCODE_LIT: -		*mask = 0xB; -		break; -	case TGSI_OPCODE_RCP: -	case TGSI_OPCODE_RSQ: -		*mask = 0x1; -		break; -	case TGSI_OPCODE_TEX: -	case TGSI_OPCODE_TXP: -		assert(insn->Instruction.Extended); -		tex = &insn->InstructionExtTexture; - -		*mask = 0x7; -		if (tex->Texture == TGSI_TEXTURE_1D) -			*mask = 0x1; -		else -		if (tex->Texture == TGSI_TEXTURE_2D) -			*mask = 0x3; - -		if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) -			*mask |= 0x8; -		break; -	default: -		break; -	} -} - -static void -prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, -		  unsigned *r_usage[2]) -{ -	const struct tgsi_full_instruction *insn; +	struct nv50_reg *reg = NULL;  	const struct tgsi_full_src_register *src;  	const struct tgsi_dst_register *dst; +	unsigned i, c, k, mask; -	unsigned i, c, k, n, mask, *acc_p; - -	insn = &tok->FullInstruction;  	dst = &insn->FullDstRegisters[0].DstRegister;  	mask = dst->WriteMask; -	if (!r_usage[0]) -		r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); -	if (!r_usage[1]) -		r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); +        if (dst->File == TGSI_FILE_TEMPORARY) +                reg = pc->temp; +        else +        if (dst->File == TGSI_FILE_OUTPUT) +                reg = pc->result; -	if (dst->File == TGSI_FILE_TEMPORARY) { +	if (reg) {  		for (c = 0; c < 4; c++) {  			if (!(mask & (1 << c)))  				continue; -			r_usage[0][dst->Index * 4 + c] = pc->insn_nr; +			reg[dst->Index * 4 + c].acc = pc->insn_nr;  		}  	}  	for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {  		src = &insn->FullSrcRegisters[i]; -		switch (src->SrcRegister.File) { -		case TGSI_FILE_TEMPORARY: -			acc_p = r_usage[0]; -			break; -		case TGSI_FILE_INPUT: -			acc_p = r_usage[1]; -			break; -		default: +		if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) +			reg = pc->temp; +		else +		if (src->SrcRegister.File == TGSI_FILE_INPUT) +			reg = pc->attr; +		else  			continue; -		} -		insn_adjust_mask(insn, &mask); +		mask = nv50_tgsi_src_mask(insn, i);  		for (c = 0; c < 4; c++) {  			if (!(mask & (1 << c)))  				continue; -  			k = tgsi_util_get_full_src_register_extswizzle(src, c); -			switch (k) { -			case TGSI_EXTSWIZZLE_X: -			case TGSI_EXTSWIZZLE_Y: -			case TGSI_EXTSWIZZLE_Z: -			case TGSI_EXTSWIZZLE_W: -				n = src->SrcRegister.Index * 4 + k; -				acc_p[n] = pc->insn_nr; -				break; -			default: -				break; -			} + +			if (k > TGSI_EXTSWIZZLE_W) +				continue; + +			reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr;  		}  	}  } +/* Returns a bitmask indicating which dst components need to be + * written to temporaries first to avoid 'corrupting' sources. + * + * m[i]   (out) indicate component to write in the i-th position + * rdep[c] (in) bitmasks of dst[i] that require dst[c] as source + */  static unsigned -load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, -	       int *aid, int *p_oid) +nv50_revdep_reorder(unsigned m[4], unsigned rdep[4])  { -	struct nv50_reg *iv; -	int oid, c, n; -	unsigned mask = 0; +	unsigned i, c, x, unsafe; -	iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p; +	for (c = 0; c < 4; c++) +		m[c] = c; -	for (c = 0, n = i * 4; c < 4; c++, n++) { -		oid = (*p_oid)++; -		pc->attr[n].type = P_TEMP; -		pc->attr[n].index = i; +	/* Swap as long as a dst component written earlier is depended on +	 * by one written later, but the next one isn't depended on by it. +	 */ +	for (c = 0; c < 3; c++) { +		if (rdep[m[c + 1]] & (1 << m[c])) +			continue; /* if next one is depended on by us */ +		for (i = c + 1; i < 4; i++) +			/* if we are depended on by a later one */ +			if (rdep[m[c]] & (1 << m[i])) +				break; +		if (i == 4) +			continue; +		/* now, swap */ +		x = m[c]; +		m[c] = m[c + 1]; +		m[c + 1] = x; + +		/* restart */ +		c = 0; +	} + +	/* mark dependencies that could not be resolved by reordering */ +	for (i = 0; i < 3; ++i) +		for (c = i + 1; c < 4; ++c) +			if (rdep[m[i]] & (1 << m[c])) +				unsafe |= (1 << i); + +	/* NOTE: $unsafe is with respect to order, not component */ +	return unsafe; +} -		if (pc->attr[n].acc == acc[n]) +/* Select a suitable dst register for broadcasting scalar results, + * or return NULL if we have to allocate an extra TEMP. + * + * If e.g. only 1 component is written, we may also emit the final + * result to a write-only register. + */ +static struct nv50_reg * +tgsi_broadcast_dst(struct nv50_pc *pc, +		   const struct tgsi_full_dst_register *fd, unsigned mask) +{ +	if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { +		int c = ffs(~mask & fd->DstRegister.WriteMask); +		if (c) +			return tgsi_dst(pc, c - 1, fd); +	} else { +		int c = ffs(fd->DstRegister.WriteMask) - 1; +		if ((1 << c) == fd->DstRegister.WriteMask) +			return tgsi_dst(pc, c, fd); +	} + +	return NULL; +} + +/* Scan source swizzles and return a bitmask indicating dst regs that + * also occur among the src regs, and fill rdep for nv50_revdep_reoder. + */ +static unsigned +nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, +		       unsigned rdep[4]) +{ +	const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; +	const struct tgsi_full_src_register *fs; +	unsigned i, deqs = 0; + +	for (i = 0; i < 4; ++i) +		rdep[i] = 0; + +	for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { +		unsigned chn, mask = nv50_tgsi_src_mask(insn, i); +		boolean neg_supp = negate_supported(insn, i); + +		fs = &insn->FullSrcRegisters[i]; +		if (fs->SrcRegister.File != fd->DstRegister.File || +		    fs->SrcRegister.Index != fd->DstRegister.Index)  			continue; -		mask |= (1 << c); -		pc->attr[n].acc = acc[n]; -		pc->attr[n].rhw = pc->attr[n].hw = -1; -		alloc_reg(pc, &pc->attr[n]); +		for (chn = 0; chn < 4; ++chn) { +			unsigned s, c; + +			if (!(mask & (1 << chn))) /* src is not read */ +				continue; +			c = tgsi_util_get_full_src_register_extswizzle(fs, chn); +			s = tgsi_util_get_full_src_register_sign_mode(fs, chn); -		pc->attr[n].rhw = (*aid)++; -		emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]); +			if (c > TGSI_EXTSWIZZLE_W || +			    !(fd->DstRegister.WriteMask & (1 << c))) +				continue; -		pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4)); -		(*mid)++; -		pc->p->cfg.fp.regs[1] += 0x00010001; +			/* no danger if src is copied to TEMP first */ +			if ((s != TGSI_UTIL_SIGN_KEEP) && +			    (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) +				continue; + +			rdep[c] |= nv50_tgsi_dst_revdep( +				insn->Instruction.Opcode, i, chn); +			deqs |= (1 << c); +		}  	} -	return mask; +	return deqs;  }  static boolean -nv50_program_tx_prep(struct nv50_pc *pc) +nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  { -	struct tgsi_parse_context p; -	boolean ret = FALSE; -	unsigned i, c; -	unsigned fcol, bcol, fcrd, depr; +	struct tgsi_full_instruction insn = tok->FullInstruction; +	const struct tgsi_full_dst_register *fd; +	unsigned i, deqs, rdep[4], m[4]; + +	fd = &tok->FullInstruction.FullDstRegisters[0]; +	deqs = nv50_tgsi_scan_swizzle(&insn, rdep); + +	if (is_scalar_op(insn.Instruction.Opcode)) { +		pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); +		if (!pc->r_brdc) +			pc->r_brdc = temp_temp(pc); +		return nv50_program_tx_insn(pc, &insn); +	} +	pc->r_brdc = NULL; + +	if (!deqs) +		return nv50_program_tx_insn(pc, &insn); + +	deqs = nv50_revdep_reorder(m, rdep); -	/* count (centroid) perspective interpolations */ -	unsigned centroid_loads = 0; -	unsigned perspect_loads = 0; +	for (i = 0; i < 4; ++i) { +		assert(pc->r_dst[m[i]] == NULL); -	/* track register access for temps and attrs */ -	unsigned *r_usage[2]; -	r_usage[0] = NULL; -	r_usage[1] = NULL; +		insn.FullDstRegisters[0].DstRegister.WriteMask = +			fd->DstRegister.WriteMask & (1 << m[i]); -	depr = fcol = bcol = fcrd = 0xffff; +		if (!insn.FullDstRegisters[0].DstRegister.WriteMask) +			continue; + +		if (deqs & (1 << i)) +			pc->r_dst[m[i]] = alloc_temp(pc, NULL); -	if (pc->p->type == PIPE_SHADER_FRAGMENT) { -		pc->p->cfg.fp.regs[0] = 0x01000404; -		pc->p->cfg.fp.regs[1] = 0x00000400; +		if (!nv50_program_tx_insn(pc, &insn)) +			return FALSE;  	} -	tgsi_parse_init(&p, pc->p->pipe.tokens); -	while (!tgsi_parse_end_of_tokens(&p)) { -		const union tgsi_full_token *tok = &p.FullToken; +	for (i = 0; i < 4; i++) { +		struct nv50_reg *reg = pc->r_dst[i]; +		if (!reg) +			continue; +		pc->r_dst[i] = NULL; + +		if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE) +			emit_sat(pc, tgsi_dst(pc, i, fd), reg); +		else +			emit_mov(pc, tgsi_dst(pc, i, fd), reg); +		free_temp(pc, reg); +	} -		tgsi_parse_token(&p); +	return TRUE; +} + +static void +load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) +{ +	struct nv50_reg *iv, **ppiv; +	unsigned mode = pc->interp_mode[reg->index]; + +	ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p; +	iv = *ppiv; + +	if ((mode & INTERP_PERSPECTIVE) && !iv) { +		iv = *ppiv = alloc_temp(pc, NULL); +		iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; + +		emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); +		emit_flop(pc, 0, iv, iv); + +		/* XXX: when loading interpolants dynamically, move these +		 * to the program head, or make sure it can't be skipped. +		 */ +	} + +	emit_interp(pc, reg, iv, mode); +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ +	struct tgsi_parse_context tp; +	struct nv50_program *p = pc->p; +	boolean ret = FALSE; +	unsigned i, c, flat_nr = 0; + +	tgsi_parse_init(&tp, pc->p->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&tp)) { +		const union tgsi_full_token *tok = &tp.FullToken; + +		tgsi_parse_token(&tp);  		switch (tok->Token.Type) {  		case TGSI_TOKEN_TYPE_IMMEDIATE:  		{  			const struct tgsi_full_immediate *imm = -				&p.FullToken.FullImmediate; +				&tp.FullToken.FullImmediate;  			ctor_immd(pc, imm->u[0].Float,  				      imm->u[1].Float, @@ -1820,78 +2037,61 @@ nv50_program_tx_prep(struct nv50_pc *pc)  		case TGSI_TOKEN_TYPE_DECLARATION:  		{  			const struct tgsi_full_declaration *d; -			unsigned last, first, mode; +			unsigned si, last, first, mode; -			d = &p.FullToken.FullDeclaration; +			d = &tp.FullToken.FullDeclaration;  			first = d->DeclarationRange.First;  			last = d->DeclarationRange.Last;  			switch (d->Declaration.File) {  			case TGSI_FILE_TEMPORARY: -				if (pc->temp_nr < (last + 1)) -					pc->temp_nr = last + 1;  				break;  			case TGSI_FILE_OUTPUT: -				if (pc->result_nr < (last + 1)) -					pc->result_nr = last + 1; - -				if (!d->Declaration.Semantic) +				if (!d->Declaration.Semantic || +				    p->type == PIPE_SHADER_FRAGMENT)  					break; +				si = d->Semantic.SemanticIndex;  				switch (d->Semantic.SemanticName) { -				case TGSI_SEMANTIC_POSITION: -					depr = first; -					pc->p->cfg.fp.regs[2] |= 0x00000100; -					pc->p->cfg.fp.regs[3] |= 0x00000011; +				case TGSI_SEMANTIC_BCOLOR: +					p->cfg.two_side[si].hw = first; +					if (p->cfg.io_nr > first) +						p->cfg.io_nr = first; +					break; +				case TGSI_SEMANTIC_PSIZE: +					p->cfg.psiz = first; +					if (p->cfg.io_nr > first) +						p->cfg.io_nr = first; +					break; +					/* +				case TGSI_SEMANTIC_CLIP_DISTANCE: +					p->cfg.clpd = MIN2(p->cfg.clpd, first);  					break; +					*/  				default:  					break;  				} -  				break;  			case TGSI_FILE_INPUT:  			{ -				if (pc->attr_nr < (last + 1)) -					pc->attr_nr = last + 1; - -				if (pc->p->type != PIPE_SHADER_FRAGMENT) +				if (p->type != PIPE_SHADER_FRAGMENT)  					break;  				switch (d->Declaration.Interpolate) {  				case TGSI_INTERPOLATE_CONSTANT:  					mode = INTERP_FLAT; +					flat_nr++;  					break;  				case TGSI_INTERPOLATE_PERSPECTIVE:  					mode = INTERP_PERSPECTIVE; +					p->cfg.regs[1] |= 0x08 << 24;  					break;  				default:  					mode = INTERP_LINEAR;  					break;  				} - -				if (d->Declaration.Semantic) { -					switch (d->Semantic.SemanticName) { -					case TGSI_SEMANTIC_POSITION: -						fcrd = first; -						break; -					case TGSI_SEMANTIC_COLOR: -						fcol = first; -						mode = INTERP_PERSPECTIVE; -						break; -					case TGSI_SEMANTIC_BCOLOR: -						bcol = first; -						mode = INTERP_PERSPECTIVE; -						break; -					} -				} - -				if (d->Declaration.Centroid) { +				if (d->Declaration.Centroid)  					mode |= INTERP_CENTROID; -					if (mode & INTERP_PERSPECTIVE) -						centroid_loads++; -				} else -				if (mode & INTERP_PERSPECTIVE) -					perspect_loads++;  				assert(last < 32);  				for (i = first; i <= last; i++) @@ -1899,8 +2099,6 @@ nv50_program_tx_prep(struct nv50_pc *pc)  			}  				break;  			case TGSI_FILE_CONSTANT: -				if (pc->param_nr < (last + 1)) -					pc->param_nr = last + 1;  				break;  			case TGSI_FILE_SAMPLER:  				break; @@ -1913,182 +2111,155 @@ nv50_program_tx_prep(struct nv50_pc *pc)  			break;  		case TGSI_TOKEN_TYPE_INSTRUCTION:  			pc->insn_nr++; -			prep_inspect_insn(pc, tok, r_usage); +			prep_inspect_insn(pc, &tok->FullInstruction);  			break;  		default:  			break;  		}  	} -	if (pc->temp_nr) { -		pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); -		if (!pc->temp) -			goto out_err; +	if (p->type == PIPE_SHADER_VERTEX) { +		int rid = 0; -		for (i = 0; i < pc->temp_nr; i++) { -			for (c = 0; c < 4; c++) { -				pc->temp[i*4+c].type = P_TEMP; -				pc->temp[i*4+c].hw = -1; -				pc->temp[i*4+c].rhw = -1; -				pc->temp[i*4+c].index = i; -				pc->temp[i*4+c].acc = r_usage[0][i*4+c]; +		for (i = 0; i < pc->attr_nr * 4; ++i) { +			if (pc->attr[i].acc) { +				pc->attr[i].hw = rid++; +				p->cfg.attr[i / 32] |= 1 << (i % 32);  			}  		} -	} - -	if (pc->attr_nr) { -		int oid = 4, mid = 4, aid = 0; -		/* oid = VP output id -		 * aid = FP attribute/interpolant id -		 * mid = VP output mapping field ID -		 */ -		pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); -		if (!pc->attr) -			goto out_err; +		for (i = 0, rid = 0; i < pc->result_nr; ++i) { +			p->cfg.io[i].hw = rid; +			p->cfg.io[i].id_vp = i; -		if (pc->p->type == PIPE_SHADER_FRAGMENT) { -			/* position should be loaded first */ -			if (fcrd != 0xffff) { -				unsigned mask; -				mid = 0; -				mask = load_fp_attrib(pc, fcrd, r_usage[1], -						      &mid, &aid, &oid); -				oid = 0; -				pc->p->cfg.fp.regs[1] |= (mask << 24); -				pc->p->cfg.fp.map[0] = 0x04040404 * fcrd; +			for (c = 0; c < 4; ++c) { +				int n = i * 4 + c; +				if (!pc->result[n].acc) +					continue; +				pc->result[n].hw = rid++; +				p->cfg.io[i].mask |= 1 << c;  			} -			pc->p->cfg.fp.map[0] += 0x03020100; +		} -			/* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */ +		for (c = 0; c < 2; ++c) +			if (p->cfg.two_side[c].hw < 0x40) +				p->cfg.two_side[c] = p->cfg.io[ +					p->cfg.two_side[c].hw]; -			if (perspect_loads) { -				pc->iv_p = alloc_temp(pc, NULL); +		if (p->cfg.psiz < 0x40) +			p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; +	} else +	if (p->type == PIPE_SHADER_FRAGMENT) { +		int rid, aid; +		unsigned n = 0, m = pc->attr_nr - flat_nr; -				if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) { -					pc->p->cfg.fp.regs[1] |= 0x08000000; -					pc->iv_p->rhw = aid++; -					emit_interp(pc, pc->iv_p, NULL, -						    INTERP_LINEAR); -					emit_flop(pc, 0, pc->iv_p, pc->iv_p); -				} else { -					pc->iv_p->rhw = aid - 1; -					emit_flop(pc, 0, pc->iv_p, -						  &pc->attr[fcrd * 4 + 3]); -				} -			} +		int base = (TGSI_SEMANTIC_POSITION == +			    p->info.input_semantic_name[0]) ? 0 : 1; -			if (centroid_loads) { -				pc->iv_c = alloc_temp(pc, NULL); -				pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++; -				emit_interp(pc, pc->iv_c, NULL, -					    INTERP_CENTROID); -				emit_flop(pc, 0, pc->iv_c, pc->iv_c); -				pc->p->cfg.fp.regs[1] |= 0x08000000; +		/* non-flat interpolants have to be mapped to +		 * the lower hardware IDs, so sort them: +		 */ +		for (i = 0; i < pc->attr_nr; i++) { +			if (pc->interp_mode[i] == INTERP_FLAT) { +				p->cfg.io[m].id_vp = i + base; +				p->cfg.io[m++].id_fp = i; +			} else { +				if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) +					p->cfg.io[n].linear = TRUE; +				p->cfg.io[n].id_vp = i + base; +				p->cfg.io[n++].id_fp = i;  			} +		} -			for (c = 0; c < 4; c++) { -				/* I don't know what these values do, but -				 * let's set them like the blob does: -				 */ -				if (fcol != 0xffff && r_usage[1][fcol * 4 + c]) -					pc->p->cfg.fp.regs[0] += 0x00010000; -				if (bcol != 0xffff && r_usage[1][bcol * 4 + c]) -					pc->p->cfg.fp.regs[0] += 0x00010000; -			} +		if (!base) /* set w-coordinate mask from perspective interp */ +			p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; -			for (i = 0; i < pc->attr_nr; i++) -				load_fp_attrib(pc, i, r_usage[1], -					       &mid, &aid, &oid); +		aid = popcnt4( /* if fcrd isn't contained in cfg.io */ +			base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); -			if (pc->iv_p) -				free_temp(pc, pc->iv_p); -			if (pc->iv_c) -				free_temp(pc, pc->iv_c); +		for (n = 0; n < pc->attr_nr; ++n) { +			p->cfg.io[n].hw = rid = aid; +			i = p->cfg.io[n].id_fp; -			pc->p->cfg.fp.high_map = (mid / 4); -			pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0); -		} else { -			/* vertex program */ -			for (i = 0; i < pc->attr_nr * 4; i++) { -				pc->p->cfg.vp.attr[aid / 32] |= -					(1 << (aid % 32)); -				pc->attr[i].type = P_ATTR; -				pc->attr[i].hw = aid++; -				pc->attr[i].index = i / 4; +			for (c = 0; c < 4; ++c) { +				if (!pc->attr[i * 4 + c].acc) +					continue; +				pc->attr[i * 4 + c].rhw = rid++; +				p->cfg.io[n].mask |= 1 << c; + +				load_interpolant(pc, &pc->attr[i * 4 + c]);  			} +			aid += popcnt4(p->cfg.io[n].mask);  		} -	} -	if (pc->result_nr) { -		int rid = 0; +		if (!base) +			p->cfg.regs[1] |= p->cfg.io[0].mask << 24; -		pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); -		if (!pc->result) -			goto out_err; +		m = popcnt4(p->cfg.regs[1] >> 24); -		for (i = 0; i < pc->result_nr; i++) { -			for (c = 0; c < 4; c++) { -				if (pc->p->type == PIPE_SHADER_FRAGMENT) { -					pc->result[i*4+c].type = P_TEMP; -					pc->result[i*4+c].hw = -1; -					pc->result[i*4+c].rhw = (i == depr) ? -						-1 : rid++; -				} else { -					pc->result[i*4+c].type = P_RESULT; -					pc->result[i*4+c].hw = rid++; -				} -				pc->result[i*4+c].index = i; -			} +		/* set count of non-position inputs and of non-flat +		 * non-position inputs for FP_INTERPOLANT_CTRL +		 */ +		p->cfg.regs[1] |= aid - m; -			if (pc->p->type == PIPE_SHADER_FRAGMENT && -			    depr != 0xffff) { -				pc->result[depr * 4 + 2].rhw = -					(pc->result_nr - 1) * 4; -			} -		} -	} +		if (flat_nr) { +			i = p->cfg.io[pc->attr_nr - flat_nr].hw; +			p->cfg.regs[1] |= (i - m) << 16; +		} else +			p->cfg.regs[1] |= p->cfg.regs[1] << 16; -	if (pc->param_nr) { -		int rid = 0; +		/* mark color semantic for light-twoside */ +		n = 0x40; +		for (i = 0; i < pc->attr_nr; i++) { +			ubyte si, sn; -		pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); -		if (!pc->param) -			goto out_err; +			sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; +			si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; + +			if (sn == TGSI_SEMANTIC_COLOR) { +				p->cfg.two_side[si] = p->cfg.io[i]; + +				/* increase colour count */ +				p->cfg.regs[0] += popcnt4( +					p->cfg.two_side[si].mask) << 16; -		for (i = 0; i < pc->param_nr; i++) { -			for (c = 0; c < 4; c++) { -				pc->param[i*4+c].type = P_CONST; -				pc->param[i*4+c].hw = rid++; -				pc->param[i*4+c].index = i; +				n = MIN2(n, p->cfg.io[i].hw - m);  			}  		} +		if (n < 0x40) +			p->cfg.regs[0] += n; + +		/* Initialize FP results: +		 * FragDepth is always first TGSI and last hw output +		 */ +		i = p->info.writes_z ? 4 : 0; +		for (rid = 0; i < pc->result_nr * 4; i++) +			pc->result[i].rhw = rid++; +		if (p->info.writes_z) +			pc->result[2].rhw = rid;  	}  	if (pc->immd_nr) {  		int rid = 0; -		pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); +		pc->immd = MALLOC(pc->immd_nr * 4 * sizeof(struct nv50_reg));  		if (!pc->immd)  			goto out_err;  		for (i = 0; i < pc->immd_nr; i++) { -			for (c = 0; c < 4; c++) { -				pc->immd[i*4+c].type = P_IMMD; -				pc->immd[i*4+c].hw = rid++; -				pc->immd[i*4+c].index = i; -			} +			for (c = 0; c < 4; c++, rid++) +				ctor_reg(&pc->immd[rid], P_IMMD, i, rid);  		}  	}  	ret = TRUE;  out_err: -	if (r_usage[0]) -		FREE(r_usage[0]); -	if (r_usage[1]) -		FREE(r_usage[1]); +	if (pc->iv_p) +		free_temp(pc, pc->iv_p); +	if (pc->iv_c) +		free_temp(pc, pc->iv_c); -	tgsi_parse_free(&p); +	tgsi_parse_free(&tp);  	return ret;  } @@ -2110,6 +2281,88 @@ free_nv50_pc(struct nv50_pc *pc)  }  static boolean +ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) +{ +	int i, c; +	unsigned rtype[2] = { P_ATTR, P_RESULT }; + +	pc->p = p; +	pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1; +	pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1; +	pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1; +	pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; + +	p->cfg.high_temp = 4; + +	p->cfg.two_side[0].hw = 0x40; +	p->cfg.two_side[1].hw = 0x40; + +	switch (p->type) { +	case PIPE_SHADER_VERTEX: +		p->cfg.psiz = 0x40; +		p->cfg.clpd = 0x40; +		p->cfg.io_nr = pc->result_nr; +		break; +	case PIPE_SHADER_FRAGMENT: +		rtype[0] = rtype[1] = P_TEMP; + +		p->cfg.regs[0] = 0x01000004; +		p->cfg.io_nr = pc->attr_nr; + +		if (p->info.writes_z) { +			p->cfg.regs[2] |= 0x00000100; +			p->cfg.regs[3] |= 0x00000011; +		} +		if (p->info.uses_kill) +			p->cfg.regs[2] |= 0x00100000; +		break; +	} + +	if (pc->temp_nr) { +		pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg)); +		if (!pc->temp) +			return FALSE; + +		for (i = 0; i < pc->temp_nr * 4; ++i) +			ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1); +	} + +	if (pc->attr_nr) { +		pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg)); +		if (!pc->attr) +			return FALSE; + +		for (i = 0; i < pc->attr_nr * 4; ++i) +			ctor_reg(&pc->attr[i], rtype[0], i / 4, -1); +	} + +	if (pc->result_nr) { +		unsigned nr = pc->result_nr * 4; + +		pc->result = MALLOC(nr * sizeof(struct nv50_reg)); +		if (!pc->result) +			return FALSE; + +		for (i = 0; i < nr; ++i) +			ctor_reg(&pc->result[i], rtype[1], i / 4, -1); +	} + +	if (pc->param_nr) { +		int rid = 0; + +		pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg)); +		if (!pc->param) +			return FALSE; + +		for (i = 0; i < pc->param_nr; ++i) +			for (c = 0; c < 4; ++c, ++rid) +				ctor_reg(&pc->param[rid], P_CONST, i, rid); +	} + +	return TRUE; +} + +static boolean  nv50_program_tx(struct nv50_program *p)  {  	struct tgsi_parse_context parse; @@ -2120,8 +2373,10 @@ nv50_program_tx(struct nv50_program *p)  	pc = CALLOC_STRUCT(nv50_pc);  	if (!pc)  		return FALSE; -	pc->p = p; -	pc->p->cfg.high_temp = 4; + +	ret = ctor_nv50_pc(pc, p); +	if (ret == FALSE) +		goto out_cleanup;  	ret = nv50_program_tx_prep(pc);  	if (ret == FALSE) @@ -2141,7 +2396,7 @@ nv50_program_tx(struct nv50_program *p)  		switch (tok->Token.Type) {  		case TGSI_TOKEN_TYPE_INSTRUCTION:  			++pc->insn_cur; -			ret = nv50_program_tx_insn(pc, tok); +			ret = nv50_tgsi_insn(pc, tok);  			if (ret == FALSE)  				goto out_err;  			break; @@ -2152,8 +2407,8 @@ nv50_program_tx(struct nv50_program *p)  	if (p->type == PIPE_SHADER_FRAGMENT) {  		struct nv50_reg out; +		ctor_reg(&out, P_TEMP, -1, -1); -		out.type = P_TEMP;  		for (k = 0; k < pc->result_nr * 4; k++) {  			if (pc->result[k].rhw == -1)  				continue; @@ -2258,30 +2513,19 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)  					 p->immd_nr, NV50_CB_PMISC);  	} -	if (!p->data[1] && p->param_nr) { -		struct nouveau_resource *heap = -			nv50->screen->parm_heap[p->type]; - -		if (nouveau_resource_alloc(heap, p->param_nr, p, &p->data[1])) { -			while (heap->next && heap->size < p->param_nr) { -				struct nv50_program *evict = heap->next->priv; -				nouveau_resource_free(&evict->data[1]); -			} - -			if (nouveau_resource_alloc(heap, p->param_nr, p, -						   &p->data[1])) -				assert(0); -		} -	} +	assert(p->param_nr <= 128);  	if (p->param_nr) { -		unsigned cbuf = NV50_CB_PVP; +		unsigned cb;  		float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],  					     PIPE_BUFFER_USAGE_CPU_READ); -		if (p->type == PIPE_SHADER_FRAGMENT) -			cbuf = NV50_CB_PFP; -		nv50_program_upload_data(nv50, map, p->data[1]->start, -					 p->param_nr, cbuf); + +		if (p->type == PIPE_SHADER_VERTEX) +			cb = NV50_CB_PVP; +		else +			cb = NV50_CB_PFP; + +		nv50_program_upload_data(nv50, map, 0, p->param_nr, cb);  		pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]);  	}  } @@ -2303,32 +2547,30 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)  		upload = TRUE;  	} -	if ((p->data[0] && p->data[0]->start != p->data_start[0]) || -		(p->data[1] && p->data[1]->start != p->data_start[1])) { -		for (e = p->exec_head; e; e = e->next) { -			unsigned ei, ci, bs; +	if (p->data[0] && p->data[0]->start != p->data_start[0]) +		upload = TRUE; -			if (e->param.index < 0) -				continue; -			bs = (e->inst[1] >> 22) & 0x07; -			assert(bs < 2); -			ei = e->param.shift >> 5; -			ci = e->param.index + p->data[bs]->start; +	if (!upload) +		return; -			e->inst[ei] &= ~e->param.mask; -			e->inst[ei] |= (ci << e->param.shift); -		} +	for (e = p->exec_head; e; e = e->next) { +		unsigned ei, ci, bs; -		if (p->data[0]) -			p->data_start[0] = p->data[0]->start; -		if (p->data[1]) -			p->data_start[1] = p->data[1]->start; +		if (e->param.index < 0) +			continue; +		bs = (e->inst[1] >> 22) & 0x07; +		assert(bs < 2); +		ei = e->param.shift >> 5; +		ci = e->param.index; +		if (bs == 0) +			ci += p->data[bs]->start; -		upload = TRUE; +		e->inst[ei] &= ~e->param.mask; +		e->inst[ei] |= (ci << e->param.shift);  	} -	if (!upload) -		return; +	if (p->data[0]) +		p->data_start[0] = p->data[0]->start;  #ifdef NV50_PROGRAM_DUMP  	NOUVEAU_ERR("-------\n"); @@ -2402,8 +2644,8 @@ nv50_vertprog_validate(struct nv50_context *nv50)  	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |  		      NOUVEAU_BO_LOW, 0, 0);  	so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); -	so_data  (so, p->cfg.vp.attr[0]); -	so_data  (so, p->cfg.vp.attr[1]); +	so_data  (so, p->cfg.attr[0]); +	so_data  (so, p->cfg.attr[1]);  	so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);  	so_data  (so, p->cfg.high_result);  	so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); @@ -2421,7 +2663,6 @@ nv50_fragprog_validate(struct nv50_context *nv50)  	struct nouveau_grobj *tesla = nv50->screen->tesla;  	struct nv50_program *p = nv50->fragprog;  	struct nouveau_stateobj *so; -	unsigned i;  	if (!p->translated) {  		nv50_program_validate(nv50, p); @@ -2438,29 +2679,186 @@ nv50_fragprog_validate(struct nv50_context *nv50)  		      NOUVEAU_BO_HIGH, 0, 0);  	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |  		      NOUVEAU_BO_LOW, 0, 0); -	so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); -	so_data  (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ -	so_data  (so, 0x00000004); -	so_data  (so, 0x00000000); -	so_data  (so, 0x00000000); -	so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map); -	for (i = 0; i < p->cfg.fp.high_map; i++) -		so_data(so, p->cfg.fp.map[i]); -	so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2); -	so_data  (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ +	so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);  	so_data  (so, p->cfg.high_temp);  	so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);  	so_data  (so, p->cfg.high_result);  	so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); -	so_data  (so, p->cfg.fp.regs[2]); +	so_data  (so, p->cfg.regs[2]);  	so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); -	so_data  (so, p->cfg.fp.regs[3]); +	so_data  (so, p->cfg.regs[3]);  	so_method(so, tesla, NV50TCL_FP_START_ID, 1);  	so_data  (so, 0); /* program start offset */  	so_ref(so, &nv50->state.fragprog);  	so_ref(NULL, &so);  } +static void +nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) +{ +	struct nv50_program *fp = nv50->fragprog; +	struct nv50_program *vp = nv50->vertprog; +	unsigned i, c, m = base; + +	/* XXX: This can't work correctly in all cases yet, we either +	 * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has +	 * to be per FP input instead of per VP output +	 */ +	memset(pntc, 0, 8 * sizeof(uint32_t)); + +	for (i = 0; i < fp->cfg.io_nr; i++) { +		uint8_t sn, si; +		uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; +		unsigned n = popcnt4(fp->cfg.io[i].mask); + +		if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { +			m += n; +			continue; +		} + +		sn = vp->info.input_semantic_name[j]; +		si = vp->info.input_semantic_index[j]; + +		if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { +			ubyte mode = +				nv50->rasterizer->pipe.sprite_coord_mode[si]; + +			if (mode == PIPE_SPRITE_COORD_NONE) { +				m += n; +				continue; +			} +		} + +		/* this is either PointCoord or replaced by sprite coords */ +		for (c = 0; c < 4; c++) { +			if (!(fp->cfg.io[i].mask & (1 << c))) +				continue; +			pntc[m / 8] |= (c + 1) << ((m % 8) * 4); +			++m; +		} +	} +} + +static int +nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], +	       struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +{ +	int c; +	uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; +	uint8_t *map = (uint8_t *)p_map; + +	for (c = 0; c < 4; ++c) { +		if (mf & 1) { +			if (fpi->linear == TRUE) +				lin[mid / 32] |= 1 << (mid % 32); +			map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); +		} + +		oid += mv & 1; +		mf >>= 1; +		mv >>= 1; +	} + +	return mid; +} + +void +nv50_linkage_validate(struct nv50_context *nv50) +{ +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nv50_program *vp = nv50->vertprog; +	struct nv50_program *fp = nv50->fragprog; +	struct nouveau_stateobj *so; +	struct nv50_sreg4 dummy, *vpo; +	int i, n, c, m = 0; +	uint32_t map[16], lin[4], reg[5], pcrd[8]; + +	memset(map, 0, sizeof(map)); +	memset(lin, 0, sizeof(lin)); + +	reg[1] = 0x00000004; /* low and high clip distance map ids */ +	reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ +	reg[3] = 0x00000000; /* point size map id & enable */ +	reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ +	reg[4] = fp->cfg.regs[1]; /* interpolant info */ + +	dummy.linear = FALSE; +	dummy.mask = 0xf; /* map all components of HPOS */ +	m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + +	dummy.mask = 0x0; + +	if (vp->cfg.clpd < 0x40) { +		for (c = 0; c < vp->cfg.clpd_nr; ++c) +			map[m++] = vp->cfg.clpd + c; +		reg[1] = (m << 8); +	} + +	reg[0] |= m << 8; /* adjust BFC0 id */ + +	/* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ +	if (nv50->rasterizer->pipe.light_twoside) { +		vpo = &vp->cfg.two_side[0]; + +		m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); +		m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); +	} + +	reg[0] += m - 4; /* adjust FFC0 id */ +	reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ + +	i = 0; +	if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) +		i = 1; +	for (; i < fp->cfg.io_nr; i++) { +		ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; +		ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; + +		n = fp->cfg.io[i].id_vp; +		if (n >= vp->cfg.io_nr || +		    vp->info.output_semantic_name[n] != sn || +		    vp->info.output_semantic_index[n] != si) +			vpo = &dummy; +		else +			vpo = &vp->cfg.io[n]; + +		m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); +	} + +	if (nv50->rasterizer->pipe.point_size_per_vertex) { +		map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8); +		reg[3] = (m++ << 4) | 1; +	} + +	/* now fill the stateobj */ +	so = so_new(64, 0); + +	n = (m + 3) / 4; +	so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); +	so_data  (so, m); +	so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); +	so_datap (so, map, n); + +	so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); +	so_datap (so, reg, 4); + +	so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); +	so_data  (so, reg[4]); + +	so_method(so, tesla, 0x1540, 4); +	so_datap (so, lin, 4); + +	if (nv50->rasterizer->pipe.point_sprite) { +		nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + +		so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); +		so_datap (so, pcrd, 8); +	} + +        so_ref(so, &nv50->state.programs); +        so_ref(NULL, &so); +} +  void  nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)  { @@ -2476,7 +2874,6 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)  	nouveau_bo_ref(NULL, &p->bo);  	nouveau_resource_free(&p->data[0]); -	nouveau_resource_free(&p->data[1]);  	p->translated = 0;  } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 096e0476aa..d78dee083f 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -15,6 +15,15 @@ struct nv50_program_exec {  	} param;  }; +struct nv50_sreg4 { +	uint8_t hw; +	uint8_t id_vp; +	uint8_t id_fp; + +	uint8_t mask; +	boolean linear; +}; +  struct nv50_program {  	struct pipe_shader_state pipe;  	struct tgsi_shader_info info; @@ -24,8 +33,8 @@ struct nv50_program {  	struct nv50_program_exec *exec_head;  	struct nv50_program_exec *exec_tail;  	unsigned exec_size; -	struct nouveau_resource *data[2]; -	unsigned data_start[2]; +	struct nouveau_resource *data[1]; +	unsigned data_start[1];  	struct nouveau_bo *bo; @@ -36,14 +45,20 @@ struct nv50_program {  	struct {  		unsigned high_temp;  		unsigned high_result; -		struct { -			unsigned attr[2]; -		} vp; -		struct { -			unsigned regs[4]; -			unsigned map[5]; -			unsigned high_map; -		} fp; + +		uint32_t attr[2]; +		uint32_t regs[4]; + +		/* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ +		unsigned io_nr; +		struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + +		/* FP colour inputs, VP/GP back colour outputs */ +		struct nv50_sreg4 two_side[2]; + +		/* VP only */ +		uint8_t clpd, clpd_nr; +		uint8_t psiz;  	} cfg;  }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index c7f80a2203..3b08e1b89f 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -87,12 +87,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)  		return 1;  	case PIPE_CAP_GLSL:  		return 0; -	case PIPE_CAP_S3TC: -		return 1;  	case PIPE_CAP_ANISOTROPIC_FILTER:  		return 1;  	case PIPE_CAP_POINT_SPRITE: -		return 0; +		return 1;  	case PIPE_CAP_MAX_RENDER_TARGETS:  		return 8;  	case PIPE_CAP_OCCLUSION_QUERY: diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 4283808ed9..81fa3e34c5 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -276,6 +276,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,  	so_method(so, tesla, 0x1684, 1);  	so_data  (so, cso->flatshade_first ? 0 : 1); +	so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); +	so_data  (so, cso->light_twoside); +  	so_method(so, tesla, NV50TCL_LINE_WIDTH, 1);  	so_data  (so, fui(cso->line_width));  	so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); @@ -294,6 +297,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,  	so_method(so, tesla, NV50TCL_POINT_SIZE, 1);  	so_data  (so, fui(cso->point_size)); +	so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1); +	so_data  (so, cso->point_sprite); +  	so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3);  	if (cso->front_winding == PIPE_WINDING_CCW) {  		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index d294356f75..5a3559ed18 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -189,6 +189,8 @@ nv50_state_emit(struct nv50_context *nv50)  		so_emit(chan, nv50->state.vertprog);  	if (nv50->state.dirty & NV50_NEW_FRAGPROG)  		so_emit(chan, nv50->state.fragprog); +	if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) +		so_emit(chan, nv50->state.programs);  	if (nv50->state.dirty & NV50_NEW_RASTERIZER)  		so_emit(chan, nv50->state.rast);  	if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -210,6 +212,12 @@ nv50_state_emit(struct nv50_context *nv50)  			so_emit(chan, nv50->state.vtxattr);  	}  	nv50->state.dirty = 0; +} + +void +nv50_state_flush_notify(struct nouveau_channel *chan) +{ +	struct nv50_context *nv50 = chan->user_private;  	so_emit_reloc_markers(chan, nv50->state.fb);  	so_emit_reloc_markers(chan, nv50->state.vertprog); @@ -240,6 +248,9 @@ nv50_state_validate(struct nv50_context *nv50)  	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))  		nv50_fragprog_validate(nv50); +	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) +		nv50_linkage_validate(nv50); +  	if (nv50->dirty & NV50_NEW_RASTERIZER)  		so_ref(nv50->rasterizer->so, &nv50->state.rast); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index e9c3562194..bb7731855c 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -89,14 +89,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,  		if (src_bo->tile_flags) {  			BEGIN_RING(chan, m2mf,  				NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); -			OUT_RING  (chan, (sy << 16) | sx); +			OUT_RING  (chan, (sy << 16) | (sx * cpp));  		} else {  			src_offset += (line_count * src_pitch);  		}  		if (dst_bo->tile_flags) {  			BEGIN_RING(chan, m2mf,  				NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); -			OUT_RING  (chan, (dy << 16) | dx); +			OUT_RING  (chan, (dy << 16) | (dx * cpp));  		} else {  			dst_offset += (line_count * dst_pitch);  		} diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 593178c50b..3b5b1bbd37 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -93,8 +93,6 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)              } else {                  return 0;              } -        case PIPE_CAP_S3TC: -            return 1;          case PIPE_CAP_ANISOTROPIC_FILTER:              return 1;          case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 6178c4ac7e..ce77018415 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -65,8 +65,6 @@ softpipe_get_param(struct pipe_screen *screen, int param)        return 1;     case PIPE_CAP_GLSL:        return 1; -   case PIPE_CAP_S3TC: -      return 0;     case PIPE_CAP_ANISOTROPIC_FILTER:        return 0;     case PIPE_CAP_POINT_SPRITE: @@ -141,6 +139,7 @@ softpipe_is_format_supported( struct pipe_screen *screen,     case PIPE_FORMAT_DXT1_RGBA:     case PIPE_FORMAT_DXT3_RGBA:     case PIPE_FORMAT_DXT5_RGBA: +   case PIPE_FORMAT_Z32_FLOAT:        return FALSE;     default:        return TRUE; | 
