diff options
| -rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 314 | ||||
| -rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.h | 6 | 
2 files changed, 293 insertions, 27 deletions
| diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 5154c1f3c5..36254d359c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -31,6 +31,9 @@  #include "draw_vs.h"  #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_swizzle.h"  #include "gallivm/lp_bld_struct.h"  #include "gallivm/lp_bld_type.h"  #include "gallivm/lp_bld_flow.h" @@ -549,19 +552,28 @@ static void  store_aos(LLVMBuilderRef builder,            LLVMValueRef io_ptr,            LLVMValueRef index, -          LLVMValueRef value) +          LLVMValueRef value, +          LLVMValueRef clipmask)  {     LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);     LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);     LLVMValueRef indices[3]; +   LLVMValueRef val, shift;     indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);     indices[1] = index;     indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); -   /* undefined vertex */ -   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), -                                        0xffff, 0), id_ptr); +   /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ +   val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0);  +   shift  = LLVMConstInt(LLVMInt32Type(), 12, 0);           +   val = LLVMBuildShl(builder, val, shift, ""); +   /* add clipmask:12 */    +   val = LLVMBuildOr(builder, val, clipmask, "");                + +   /* store vertex header */ +   LLVMBuildStore(builder, val, id_ptr); +  #if DEBUG_STORE     lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); @@ -616,7 +628,8 @@ store_aos_array(LLVMBuilderRef builder,                  LLVMValueRef io_ptr,                  LLVMValueRef aos[NUM_CHANNELS],                  int attrib, -                int num_outputs) +                int num_outputs, +                LLVMValueRef clipmask)  {     LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);     LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); @@ -624,7 +637,8 @@ store_aos_array(LLVMBuilderRef builder,     LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);     LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);     LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - +   LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; +        debug_assert(NUM_CHANNELS == 4);     io0_ptr = LLVMBuildGEP(builder, io_ptr, @@ -636,21 +650,31 @@ store_aos_array(LLVMBuilderRef builder,     io3_ptr = LLVMBuildGEP(builder, io_ptr,                            &ind3, 1, ""); +   clipmask0 = LLVMBuildExtractElement(builder, clipmask, +                                       ind0, ""); +   clipmask1 = LLVMBuildExtractElement(builder, clipmask, +                                       ind1, ""); +   clipmask2 = LLVMBuildExtractElement(builder, clipmask, +                                       ind2, ""); +   clipmask3 = LLVMBuildExtractElement(builder, clipmask, +                                       ind3, ""); +  #if DEBUG_STORE -   lp_build_printf(builder, "   io = %p, indexes[%d, %d, %d, %d]\n", -                   io_ptr, ind0, ind1, ind2, ind3); +   lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", +                   io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);  #endif - -   store_aos(builder, io0_ptr, attr_index, aos[0]); -   store_aos(builder, io1_ptr, attr_index, aos[1]); -   store_aos(builder, io2_ptr, attr_index, aos[2]); -   store_aos(builder, io3_ptr, attr_index, aos[3]); +   /* store for each of the 4 vertices */ +   store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); +   store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); +   store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); +   store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3);  }  static void  convert_to_aos(LLVMBuilderRef builder,                 LLVMValueRef io,                 LLVMValueRef (*outputs)[NUM_CHANNELS], +               LLVMValueRef clipmask,                 int num_outputs,                 int max_vertices)  { @@ -679,13 +703,214 @@ convert_to_aos(LLVMBuilderRef builder,                        io,                        aos,                        attrib, -                      num_outputs); +                      num_outputs, +                      clipmask);     }  #if DEBUG_STORE     lp_build_printf(builder, "   # storing end\n");  #endif  } +/* + * Stores original vertex positions in clip coordinates + * There is probably a more efficient way to do this, 4 floats at once + * rather than extracting each element one by one. + */ +static void +store_clip(LLVMBuilderRef builder, +           LLVMValueRef io_ptr,            +           LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ +   LLVMValueRef out[4]; +   LLVMValueRef indices[2];  +   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; +   LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; +   LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;     +   LLVMValueRef out0elem, out1elem, out2elem, out3elem; + +   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); +   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); +   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); +   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); +    +   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); +   indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); +    +   out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 y0 z0 w0*/ +   out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*x1 y1 z1 w1*/ +   out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*x2 y2 z2 w2*/ +   out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*x3 y3 z3 w3*/   + +   io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); +   io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); +   io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); +   io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + +   clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); +   clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); +   clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); +   clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); + +   for (int i = 0; i<4; i++){ +      clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, +                               indices, 2, ""); //x1 +      clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, +                               indices, 2, ""); //y1 +      clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, +                               indices, 2, ""); //z1 +      clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, +                               indices, 2, ""); //w1 + +      out0elem = LLVMBuildExtractElement(builder, out[0], +                                         indices[1], ""); //x1 +      out1elem = LLVMBuildExtractElement(builder, out[1], +                                         indices[1], ""); //y1 +      out2elem = LLVMBuildExtractElement(builder, out[2], +                                         indices[1], ""); //z1 +      out3elem = LLVMBuildExtractElement(builder, out[3], +                                         indices[1], ""); //w1 +   +      LLVMBuildStore(builder, out0elem, clip0_ptr); +      LLVMBuildStore(builder, out1elem, clip1_ptr); +      LLVMBuildStore(builder, out2elem, clip2_ptr); +      LLVMBuildStore(builder, out3elem, clip3_ptr); + +      indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); +   } + +} + +/* + * Transforms the outputs for viewport mapping + */ +static void +generate_viewport(struct draw_llvm *llvm, +                  LLVMBuilderRef builder, +                  LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ +   int i; +   const float *scaleA = llvm->draw->viewport.scale; +   const float *transA = llvm->draw->viewport.translate; +   struct lp_type f32_type = lp_type_float_vec(32); +   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/    +   LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/  +    +   /* for 1/w convention*/ +   out3 = LLVMBuildFDiv(builder, const1, out3, ""); + +   /* Viewport Mapping */ +   for (i=0; i<4; i++){ +      LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ +      LLVMValueRef scale = lp_build_const_vec(f32_type, scaleA[i]); /*sx sx sx sx*/      +      LLVMValueRef trans = lp_build_const_vec(f32_type, transA[i]); /*tx tx tx tx*/ +       +      /* divide by w */ +      out = LLVMBuildMul(builder, out, out3, ""); +      /* mult by scale */ +      out = LLVMBuildMul(builder, out, scale, ""); +      /* add translation */ +      out = LLVMBuildAdd(builder, out, trans, ""); + +      /* store transformed outputs */ +      LLVMBuildStore(builder, out, outputs[0][i]); +   } +    +} + +/* + * Returns clipmask as 4xi32 bitmask for the 4 vertices + */ +static LLVMValueRef  +generate_clipmask(LLVMBuilderRef builder, +                  LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ +   LLVMValueRef mask; /* stores the <4xi32> clipmasks */      +   LLVMValueRef test, temp;  +   LLVMValueRef zero, shift; +   LLVMValueRef pos_x, pos_y, pos_z, pos_w; + +   struct lp_type f32_type = lp_type_float_vec(32);  + +   zero = lp_build_const_vec(f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */ +   shift = lp_build_const_int_vec(lp_type_int_vec(32), 1);    /* 1 1 1 1 */ +   +   /* Assuming position stored at output[0] */ +   pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ +   pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ +   pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ +   pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/    + +   /* Cliptest, for hardwired planes */ +   /* plane 1 */ +   test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); +   temp = shift; +   test = LLVMBuildAnd(builder, test, temp, "");  +   mask = test; +    +   /* plane 2 */ +   test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); +   test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); +   temp = LLVMBuildShl(builder, temp, shift, ""); +   test = LLVMBuildAnd(builder, test, temp, "");  +   mask = LLVMBuildOr(builder, mask, test, ""); +    +   /* plane 3 */ +   test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); +   temp = LLVMBuildShl(builder, temp, shift, ""); +   test = LLVMBuildAnd(builder, test, temp, "");  +   mask = LLVMBuildOr(builder, mask, test, ""); + +   /* plane 4 */ +   test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); +   test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); +   temp = LLVMBuildShl(builder, temp, shift, ""); +   test = LLVMBuildAnd(builder, test, temp, "");  +   mask = LLVMBuildOr(builder, mask, test, ""); +    +   /* plane 5 */ +   test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); +   test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); +   temp = LLVMBuildShl(builder, temp, shift, ""); +   test = LLVMBuildAnd(builder, test, temp, "");  +   mask = LLVMBuildOr(builder, mask, test, ""); +    +   /* plane 6 */ +   test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); +   temp = LLVMBuildShl(builder, temp, shift, ""); +   test = LLVMBuildAnd(builder, test, temp, "");  +   mask = LLVMBuildOr(builder, mask, test, "");              +                 +   return mask; +  +} + +/* + * Returns boolean if any clipping has occurred + * Used zero/non-zero i32 value to represent boolean  + */ +static void +clipmask_bool(LLVMBuilderRef builder,  +              LLVMValueRef clipmask, +              LLVMValueRef ret_ptr) +{ +   LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");    +   LLVMValueRef temp; +   int i; + +   LLVMDumpValue(clipmask); + +   for (i=0; i<4; i++){    +      temp = LLVMBuildExtractElement(builder, clipmask, +                                     LLVMConstInt(LLVMInt32Type(), i, 0) , ""); +      ret = LLVMBuildOr(builder, ret, temp, ""); +      LLVMDumpValue(ret);   +   } +    +   LLVMBuildStore(builder, ret, ret_ptr); +   LLVMDumpValue(ret_ptr);  + +} +  static void  draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)  { @@ -706,6 +931,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)     void *code;     struct lp_build_sampler_soa *sampler = 0; +   LLVMValueRef ret, ret_ptr; +        arg_types[0] = llvm->context_ptr_type;           /* context */     arg_types[1] = llvm->vertex_header_ptr_type;     /* vertex_header */     arg_types[2] = llvm->buffer_ptr_type;            /* vbuffers */ @@ -715,7 +942,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)     arg_types[6] = llvm->vb_ptr_type;                /* pipe_vertex_buffer's */     arg_types[7] = LLVMInt32Type();                  /* instance_id */ -   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); +   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);     variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);     LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); @@ -755,6 +982,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)     step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); +   /* function will return non-zero i32 value if any clipped vertices */      +   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");    +   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); +     /* code generated texture sampling */     sampler = draw_llvm_sampler_soa_create(        draw_llvm_variant_key_samplers(&variant->key), @@ -769,6 +1000,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)        LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];        LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };        LLVMValueRef io; +      LLVMValueRef clipmask;   /* holds the clipmask value */        const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];        io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); @@ -805,10 +1037,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)                    context_ptr,                    sampler); -      convert_to_aos(builder, io, outputs, +      /* store original positions in clip before further manipulation */ +      store_clip(builder, io, outputs); + +      /* allocate clipmask, assign it integer type */ +      clipmask = generate_clipmask(builder, outputs);            +      clipmask_bool(builder, clipmask, ret_ptr); +       +      /* do viewport mapping */ +      generate_viewport(llvm, builder, outputs); +       +      /* store clipmask in vertex header and positions in data */ +      convert_to_aos(builder, io, outputs, clipmask,                       draw->vs.vertex_shader->info.num_outputs,                       max_vertices);     } +     lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);     sampler->destroy(sampler); @@ -818,8 +1062,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)     lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);  #endif -   LLVMBuildRetVoid(builder); - +   ret = LLVMBuildLoad(builder, ret_ptr,""); +   LLVMBuildRet(builder, ret); +           LLVMDisposeBuilder(builder);     /* @@ -869,6 +1114,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian     LLVMValueRef fetch_max;     void *code;     struct lp_build_sampler_soa *sampler = 0; +   LLVMValueRef ret, ret_ptr;     arg_types[0] = llvm->context_ptr_type;               /* context */     arg_types[1] = llvm->vertex_header_ptr_type;         /* vertex_header */ @@ -879,10 +1125,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian     arg_types[6] = llvm->vb_ptr_type;                    /* pipe_vertex_buffer's */     arg_types[7] = LLVMInt32Type();                      /* instance_id */ -   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); +   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); -   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", -                                            func_type); +   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);     LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);     for(i = 0; i < Elements(arg_types); ++i)        if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) @@ -928,11 +1173,16 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian                              LLVMConstInt(LLVMInt32Type(), 1, 0),                              "fetch_max"); +   /* function returns non-zero i32 value if any clipped vertices */ +   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");  +   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); +     lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);     {        LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];        LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };        LLVMValueRef io; +      LLVMValueRef clipmask;   /* holds the clipmask value */        const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];        io_itr = lp_loop.counter; @@ -979,10 +1229,25 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian                    context_ptr,                    sampler); -      convert_to_aos(builder, io, outputs, +      /* store original positions in clip before further manipulation */ +      store_clip(builder, io, outputs); + +      /* allocate clipmask, assign it integer type */ +      clipmask = generate_clipmask(builder, outputs); +      clipmask_bool(builder, clipmask, ret_ptr); + +      /* do viewport mapping */ +      generate_viewport(llvm, builder, outputs); +       +      /* store clipmask in vertex header,  +       * original positions in clip  +       * and transformed positions in data  +       */    +      convert_to_aos(builder, io, outputs, clipmask,                       draw->vs.vertex_shader->info.num_outputs,                       max_vertices);     } +     lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);     sampler->destroy(sampler); @@ -992,8 +1257,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian     lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);  #endif -   LLVMBuildRetVoid(builder); - +   ret = LLVMBuildLoad(builder, ret_ptr,"");    +   LLVMBuildRet(builder, ret); +        LLVMDisposeBuilder(builder);     /* diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index b881ef6113..1142ea51cf 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -120,7 +120,7 @@ struct draw_jit_context     lp_build_struct_get_ptr(_builder, _ptr, 0, "id")  #define draw_jit_header_clip(_builder, _ptr) \ -   lp_build_struct_get(_builder, _ptr, 1, "clip") +   lp_build_struct_get_ptr(_builder, _ptr, 1, "clip")  #define draw_jit_header_data(_builder, _ptr)            \     lp_build_struct_get_ptr(_builder, _ptr, 2, "data") @@ -136,7 +136,7 @@ struct draw_jit_context     lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") -typedef void +typedef int  (*draw_jit_vert_func)(struct draw_jit_context *context,                        struct vertex_header *io,                        const char *vbuffers[PIPE_MAX_ATTRIBS], @@ -147,7 +147,7 @@ typedef void                        unsigned instance_id); -typedef void +typedef int  (*draw_jit_vert_func_elts)(struct draw_jit_context *context,                             struct vertex_header *io,                             const char *vbuffers[PIPE_MAX_ATTRIBS], | 
