summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/util
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/util')
-rw-r--r--src/gallium/auxiliary/util/u_atomic.h47
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c47
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h6
-rw-r--r--src/gallium/auxiliary/util/u_format.csv3
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.c53
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.h16
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.c127
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.h41
-rw-r--r--src/gallium/auxiliary/util/u_math.h5
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h52
-rw-r--r--src/gallium/auxiliary/util/u_simple_list.h2
-rw-r--r--src/gallium/auxiliary/util/u_sse.h156
-rw-r--r--src/gallium/auxiliary/util/u_surface.c2
-rw-r--r--src/gallium/auxiliary/util/u_tile.c84
14 files changed, 599 insertions, 42 deletions
diff --git a/src/gallium/auxiliary/util/u_atomic.h b/src/gallium/auxiliary/util/u_atomic.h
index a156823390..8434491a42 100644
--- a/src/gallium/auxiliary/util/u_atomic.h
+++ b/src/gallium/auxiliary/util/u_atomic.h
@@ -29,6 +29,8 @@
#define PIPE_ATOMIC_ASM_MSVC_X86
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86))
#define PIPE_ATOMIC_ASM_GCC_X86
+#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64))
+#define PIPE_ATOMIC_ASM_GCC_X86_64
#elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401)
#define PIPE_ATOMIC_GCC_INTRINSIC
#else
@@ -36,6 +38,51 @@
#endif
+#if defined(PIPE_ATOMIC_ASM_GCC_X86_64)
+#define PIPE_ATOMIC "GCC x86_64 assembly"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define p_atomic_set(_v, _i) (*(_v) = (_i))
+#define p_atomic_read(_v) (*(_v))
+
+static INLINE boolean
+p_atomic_dec_zero(int32_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c)
+ ::"memory");
+
+ return c != 0;
+}
+
+static INLINE void
+p_atomic_inc(int32_t *v)
+{
+ __asm__ __volatile__("lock; incl %0":"+m"(*v));
+}
+
+static INLINE void
+p_atomic_dec(int32_t *v)
+{
+ __asm__ __volatile__("lock; decl %0":"+m"(*v));
+}
+
+static INLINE int32_t
+p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new)
+{
+ return __sync_val_compare_and_swap(v, old, _new);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */
+
#if defined(PIPE_ATOMIC_ASM_GCC_X86)
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index f93ef26ae7..a163f93cb8 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -268,7 +268,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
}
- for (i = 0; i <= PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
+ for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++)
if (ctx->fs_col[i])
pipe->delete_fs_state(pipe, ctx->fs_col[i]);
@@ -964,16 +964,18 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
blitter_restore_CSOs(ctx);
}
-/* Clear a region of a depth stencil surface. */
-void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
- struct pipe_surface *dstsurf)
+/* draw a rectangle across a region using a custom dsa stage - for r600g */
+void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *zsurf,
+ struct pipe_surface *cbsurf,
+ void *dsa_stage, float depth)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;
- assert(dstsurf->texture);
- if (!dstsurf->texture)
+ assert(zsurf->texture);
+ if (!zsurf->texture)
return;
/* check the saved state */
@@ -981,8 +983,8 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
assert(blitter->saved_fb_state.nr_cbufs != ~0);
/* bind CSOs */
- pipe->bind_blend_state(pipe, ctx->blend_keep_color);
- pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
+ pipe->bind_blend_state(pipe, ctx->blend_write_color);
+ pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0));
@@ -990,15 +992,30 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
- fb_state.width = dstsurf->width;
- fb_state.height = dstsurf->height;
- fb_state.nr_cbufs = 0;
- fb_state.cbufs[0] = 0;
- fb_state.zsbuf = dstsurf;
+ fb_state.width = zsurf->width;
+ fb_state.height = zsurf->height;
+ fb_state.nr_cbufs = 1;
+ if (cbsurf) {
+ fb_state.cbufs[0] = cbsurf;
+ fb_state.nr_cbufs = 1;
+ } else {
+ fb_state.cbufs[0] = NULL;
+ fb_state.nr_cbufs = 0;
+ }
+ fb_state.zsbuf = zsurf;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
- blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0,
+ blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height);
+ blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth,
UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
+
+/* flush a region of a depth stencil surface for r300g */
+void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ util_blitter_custom_depth_stencil(blitter, dstsurf, NULL,
+ ctx->dsa_flush_depth_stencil, 0.0f);
+}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index e33d2e283f..f9f96f25c7 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -203,6 +203,12 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *dstsurf);
+
+void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *zsurf,
+ struct pipe_surface *cbsurf,
+ void *dsa_stage, float depth);
+
/* The functions below should be used to save currently bound constant state
* objects inside a driver. The objects are automatically restored at the end
* of the util_blitter_{clear, copy_region, fill_region} functions and then
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 016e73c4a1..1fbd83841c 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___,
PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs
PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs
PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs
+PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs
+PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs
PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs
PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs
PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs
+PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs
# YUV formats
# http://www.fourcc.org/yuv.php#UYVY
diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c
index 792d69c214..80081e22f7 100644
--- a/src/gallium/auxiliary/util/u_format_zs.c
+++ b/src/gallium/auxiliary/util/u_format_zs.c
@@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d
}
}
+
+void
+util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+
+}
+
+void
+util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h
index 650db4b95f..1604cc3eee 100644
--- a/src/gallium/auxiliary/util/u_format_zs.h
+++ b/src/gallium/auxiliary/util/u_format_zs.h
@@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned
void
util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+void
+util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+void
+util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
#endif /* U_FORMAT_ZS_H_ */
diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c
new file mode 100644
index 0000000000..65b079ed53
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_index_modify.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "pipe/p_context.h"
+#include "util/u_index_modify.h"
+#include "util/u_inlines.h"
+
+void util_shorten_ubyte_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_screen* screen = context->screen;
+ struct pipe_resource* new_elts;
+ unsigned char *in_map;
+ unsigned short *out_map;
+ struct pipe_transfer *src_transfer, *dst_transfer;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
+ out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);
+
+ in_map += start;
+
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, src_transfer);
+ pipe_buffer_unmap(context, new_elts, dst_transfer);
+
+ *elts = new_elts;
+}
+
+void util_rebuild_ushort_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned short *in_map;
+ unsigned short *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
+
+void util_rebuild_uint_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned int *in_map;
+ unsigned int *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned int)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
diff --git a/src/gallium/auxiliary/util/u_index_modify.h b/src/gallium/auxiliary/util/u_index_modify.h
new file mode 100644
index 0000000000..01a6cae94f
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_index_modify.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef UTIL_INDEX_MODIFY_H
+#define UTIL_INDEX_MODIFY_H
+
+void util_shorten_ubyte_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start,
+ unsigned count);
+
+void util_rebuild_ushort_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count);
+
+void util_rebuild_uint_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count);
+#endif
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 69a7681494..37294b7203 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val)
#endif
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237309504880
+#endif
+
+
#if defined(_MSC_VER)
#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index aae8b8bdf1..5378f2d782 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -394,7 +394,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
return;
case PIPE_FORMAT_B4G4R4A4_UNORM:
{
- uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
+ uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
}
return;
case PIPE_FORMAT_A8_UNORM:
@@ -434,8 +434,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
/* Integer versions of util_pack_z and util_pack_z_stencil - useful for
* constructing clear masks.
*/
-static INLINE uint
-util_pack_uint_z(enum pipe_format format, unsigned z)
+static INLINE uint32_t
+util_pack_mask_z(enum pipe_format format, uint32_t z)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
@@ -452,29 +452,32 @@ util_pack_uint_z(enum pipe_format format, unsigned z)
case PIPE_FORMAT_S8_USCALED:
return 0;
default:
- debug_print_format("gallium: unhandled format in util_pack_z()", format);
+ debug_print_format("gallium: unhandled format in util_pack_mask_z()", format);
assert(0);
return 0;
}
}
-static INLINE uint
-util_pack_uint_z_stencil(enum pipe_format format, double z, uint s)
+static INLINE uint32_t
+util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
{
- unsigned packed = util_pack_uint_z(format, z);
-
- s &= 0xff;
+ uint32_t packed = util_pack_mask_z(format, z);
switch (format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return packed | (s << 24);
+ packed |= (uint32_t)s << 24;
+ break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- return packed | s;
+ packed |= s;
+ break;
case PIPE_FORMAT_S8_USCALED:
- return packed | s;
+ packed |= s;
+ break;
default:
- return packed;
+ break;
}
+
+ return packed;
}
@@ -482,9 +485,11 @@ util_pack_uint_z_stencil(enum pipe_format format, double z, uint s)
/**
* Note: it's assumed that z is in [0,1]
*/
-static INLINE uint
+static INLINE uint32_t
util_pack_z(enum pipe_format format, double z)
{
+ union fi fui;
+
if (z == 0.0)
return 0;
@@ -492,24 +497,25 @@ util_pack_z(enum pipe_format format, double z)
case PIPE_FORMAT_Z16_UNORM:
if (z == 1.0)
return 0xffff;
- return (uint) (z * 0xffff);
+ return (uint32_t) (z * 0xffff);
case PIPE_FORMAT_Z32_UNORM:
/* special-case to avoid overflow */
if (z == 1.0)
return 0xffffffff;
- return (uint) (z * 0xffffffff);
+ return (uint32_t) (z * 0xffffffff);
case PIPE_FORMAT_Z32_FLOAT:
- return (uint)z;
+ fui.f = (float)z;
+ return fui.ui;
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
if (z == 1.0)
return 0xffffff;
- return (uint) (z * 0xffffff);
+ return (uint32_t) (z * 0xffffff);
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
if (z == 1.0)
return 0xffffff00;
- return ((uint) (z * 0xffffff)) << 8;
+ return ((uint32_t) (z * 0xffffff)) << 8;
case PIPE_FORMAT_S8_USCALED:
/* this case can get it via util_pack_z_stencil() */
return 0;
@@ -525,14 +531,14 @@ util_pack_z(enum pipe_format format, double z)
* Pack Z and/or stencil values into a 32-bit value described by format.
* Note: it's assumed that z is in [0,1] and s in [0,255]
*/
-static INLINE uint
-util_pack_z_stencil(enum pipe_format format, double z, uint s)
+static INLINE uint32_t
+util_pack_z_stencil(enum pipe_format format, double z, uint8_t s)
{
- unsigned packed = util_pack_z(format, z);
+ uint32_t packed = util_pack_z(format, z);
switch (format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- packed |= s << 24;
+ packed |= (uint32_t)s << 24;
break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
packed |= s;
diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h
index f5f43b0faa..fe59771371 100644
--- a/src/gallium/auxiliary/util/u_simple_list.h
+++ b/src/gallium/auxiliary/util/u_simple_list.h
@@ -46,6 +46,8 @@
do { \
(elem)->next->prev = (elem)->prev; \
(elem)->prev->next = (elem)->next; \
+ (elem)->next = elem; \
+ (elem)->prev = elem; \
} while (0)
/**
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index 87959ab0aa..1df6c87267 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a)
#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+union m128i {
+ __m128i m;
+ ubyte ub[16];
+ ushort us[8];
+ uint ui[4];
+};
+
+static INLINE void u_print_epi8(const char *name, __m128i r)
+{
+ union { __m128i m; ubyte ub[16]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x\n",
+ name,
+ u.ub[0], u.ub[1], u.ub[2], u.ub[3],
+ u.ub[4], u.ub[5], u.ub[6], u.ub[7],
+ u.ub[8], u.ub[9], u.ub[10], u.ub[11],
+ u.ub[12], u.ub[13], u.ub[14], u.ub[15]);
+}
+
+static INLINE void u_print_epi16(const char *name, __m128i r)
+{
+ union { __m128i m; ushort us[8]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x\n",
+ name,
+ u.us[0], u.us[1], u.us[2], u.us[3],
+ u.us[4], u.us[5], u.us[6], u.us[7]);
+}
+
+static INLINE void u_print_epi32(const char *name, __m128i r)
+{
+ union { __m128i m; uint ui[4]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%08x/"
+ "%08x/"
+ "%08x/"
+ "%08x\n",
+ name,
+ u.ui[0], u.ui[1], u.ui[2], u.ui[3]);
+}
+
+static INLINE void u_print_ps(const char *name, __m128 r)
+{
+ union { __m128 m; float f[4]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%f/"
+ "%f/"
+ "%f/"
+ "%f\n",
+ name,
+ u.f[0], u.f[1], u.f[2], u.f[3]);
+}
+
+
+#define U_DUMP_EPI32(a) u_print_epi32(#a, a)
+#define U_DUMP_EPI16(a) u_print_epi16(#a, a)
+#define U_DUMP_EPI8(a) u_print_epi8(#a, a)
+#define U_DUMP_PS(a) u_print_ps(#a, a)
+
+
#if defined(PIPE_ARCH_SSSE3)
@@ -78,8 +168,6 @@ _mm_castps_si128(__m128 a)
#else /* !PIPE_ARCH_SSSE3 */
-#include <emmintrin.h>
-
/**
* Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
* where -mssse3 is not supported/enabled.
@@ -100,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
#endif /* !PIPE_ARCH_SSSE3 */
-#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+
+
+/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of
+ * _mm_mul_epu32().
+ *
+ * I suspect this works fine for us because one of our operands is
+ * always positive, but not sure that this can be used for general
+ * signed integer multiplication.
+ *
+ * This seems close enough to the speed of SSE4 and the real
+ * _mm_mullo_epi32() intrinsic as to not justify adding an sse4
+ * dependency at this point.
+ */
+static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
+{
+ __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */
+ __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */
+ __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */
+ __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */
+
+ /* Interleave the results, either with shuffles or (slightly
+ * faster) direct bit operations:
+ */
+#if 0
+ __m128i ba8 = _mm_shuffle_epi32(ba, 8);
+ __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8);
+ __m128i result = _mm_unpacklo_epi32(ba8, b4a48);
+#else
+ __m128i mask = _mm_setr_epi32(~0,0,~0,0);
+ __m128i ba_mask = _mm_and_si128(ba, mask);
+ __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32);
+ __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift);
+#endif
+
+ return result;
+}
+
+
+static INLINE void
+transpose4_epi32(const __m128i * restrict a,
+ const __m128i * restrict b,
+ const __m128i * restrict c,
+ const __m128i * restrict d,
+ __m128i * restrict o,
+ __m128i * restrict p,
+ __m128i * restrict q,
+ __m128i * restrict r)
+{
+ __m128i t0 = _mm_unpacklo_epi32(*a, *b);
+ __m128i t1 = _mm_unpacklo_epi32(*c, *d);
+ __m128i t2 = _mm_unpackhi_epi32(*a, *b);
+ __m128i t3 = _mm_unpackhi_epi32(*c, *d);
+
+ *o = _mm_unpacklo_epi64(t0, t1);
+ *p = _mm_unpackhi_epi64(t0, t1);
+ *q = _mm_unpacklo_epi64(t2, t3);
+ *r = _mm_unpackhi_epi64(t2, t3);
+}
+
+#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i))
+
+
+#endif /* PIPE_ARCH_SSE */
#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index af99163b2e..f78b6838a7 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -332,7 +332,7 @@ util_clear_depth_stencil(struct pipe_context *pipe,
uint32_t *row = (uint32_t *)dst_map;
for (j = 0; j < width; j++) {
uint32_t tmp = *row & dst_mask;
- *row++ = tmp & (zstencil & ~dst_mask);
+ *row++ = tmp | (zstencil & ~dst_mask);
}
dst_map += dst_stride;
}
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index f7aa1403d0..44cadbfcdd 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src,
}
}
+/*** PIPE_FORMAT_S8X24_USCALED ***/
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+s8x24_get_tile_rgba(const unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)((*src++ >> 24) & 0xff);
+ }
+
+ p += dst_stride;
+ }
+}
+
+/*** PIPE_FORMAT_X24S8_USCALED ***/
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+x24s8_get_tile_rgba(const unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)(*src++ & 0xff);
+ }
+ p += dst_stride;
+ }
+}
+
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+s8_get_tile_rgba(const unsigned char *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)(*src++ & 0xff);
+ }
+ p += dst_stride;
+ }
+}
/*** PIPE_FORMAT_Z32_FLOAT ***/
@@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
case PIPE_FORMAT_Z24X8_UNORM:
s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_S8_USCALED:
+ s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride);
+ break;
+ case PIPE_FORMAT_X24S8_USCALED:
+ s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_S8X24_USCALED:
+ x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_Z32_FLOAT:
z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride);
break;