summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/util/u_cpu_detect.c
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2009-09-29 13:25:08 +0100
committerJosé Fonseca <jfonseca@vmware.com>2009-09-29 13:59:16 +0100
commita81fb2a0d2c9a94fa362705edd1281fa7699d093 (patch)
tree76cef41b41721b15e100a706ec7f6832b2c9a0ef /src/gallium/auxiliary/util/u_cpu_detect.c
parent8210abb113462c781a8f3ffee3406493c108a2f0 (diff)
util: Cleanup u_cpu_detect, build. Support X86_64 and detect SSE4.1 too.
I was waiting for the need to use this code to arise, and it finally came. I've tested building this on Linux and Windows, both x86 and x64_64. But it might break other platforms. Please bear with me and help me fix it. Many thanks to Dennis Smit who submitted this, and Eric Anholt whose work this was based on.
Diffstat (limited to 'src/gallium/auxiliary/util/u_cpu_detect.c')
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c741
1 files changed, 368 insertions, 373 deletions
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index d9f2f8fc28..ecfb96138d 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -24,23 +24,21 @@
*
**************************************************************************/
-/*
- * Based on the work of Eric Anholt <anholt@FreeBSD.org>
+/**
+ * @file
+ * CPU feature detection.
+ *
+ * @author Dennis Smit
+ * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
*/
-/* FIXME: clean this entire file up */
+#include "pipe/p_config.h"
+#include "u_debug.h"
#include "u_cpu_detect.h"
-#ifdef __linux__
-#define OS_LINUX
-#endif
-#ifdef WIN32
-#define OS_WIN32
-#endif
-
-#if defined(ARCH_POWERPC)
-#if defined(OS_DARWIN)
+#if defined(PIPE_ARCH_PPC)
+#if defined(PIPE_OS_DARWIN)
#include <sys/sysctl.h>
#else
#include <signal.h>
@@ -48,137 +46,140 @@
#endif
#endif
-#if defined(OS_NETBSD) || defined(OS_OPENBSD)
+#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#endif
-#if defined(OS_FREEBSD)
+#if defined(PIPE_OS_FREEBSD)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
-#if defined(OS_LINUX)
+#if defined(PIPE_OS_LINUX)
#include <signal.h>
#endif
-#if defined(OS_WIN32)
-#include <windows.h>
+#ifdef PIPE_OS_UNIX
+#include <unistd.h>
#endif
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
+#if defined(PIPE_OS_WINDOWS)
+#include <windows.h>
+#endif
-static struct cpu_detect_caps __cpu_detect_caps;
-static int __cpu_detect_initialized = 0;
+struct util_cpu_caps util_cpu_caps;
static int has_cpuid(void);
static int cpuid(unsigned int ax, unsigned int *p);
+#if defined(PIPE_ARCH_X86)
+
/* The sigill handlers */
-#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */
-#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
-static void sigill_handler_sse(int signal, struct sigcontext sc)
+#if defined(PIPE_OS_LINUX) //&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
+static void
+sigill_handler_sse(int signal, struct sigcontext sc)
{
- /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
- * instructions are 3 bytes long. We must increment the instruction
- * pointer manually to avoid repeated execution of the offending
- * instruction.
- *
- * If the SIGILL is caused by a divide-by-zero when unmasked
- * exceptions aren't supported, the SIMD FPU status and control
- * word will be restored at the end of the test, so we don't need
- * to worry about doing it here. Besides, we may not be able to...
- */
- sc.eip += 3;
-
- __cpu_detect_caps.hasSSE=0;
+ /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
+ * instructions are 3 bytes long. We must increment the instruction
+ * pointer manually to avoid repeated execution of the offending
+ * instruction.
+ *
+ * If the SIGILL is caused by a divide-by-zero when unmasked
+ * exceptions aren't supported, the SIMD FPU status and control
+ * word will be restored at the end of the test, so we don't need
+ * to worry about doing it here. Besides, we may not be able to...
+ */
+ sc.eip += 3;
+
+ util_cpu_caps.has_sse=0;
}
-static void sigfpe_handler_sse(int signal, struct sigcontext sc)
+static void
+sigfpe_handler_sse(int signal, struct sigcontext sc)
{
- if (sc.fpstate->magic != 0xffff) {
- /* Our signal context has the extended FPU state, so reset the
- * divide-by-zero exception mask and clear the divide-by-zero
- * exception bit.
- */
- sc.fpstate->mxcsr |= 0x00000200;
- sc.fpstate->mxcsr &= 0xfffffffb;
- } else {
- /* If we ever get here, we're completely hosed.
- */
- }
+ if (sc.fpstate->magic != 0xffff) {
+ /* Our signal context has the extended FPU state, so reset the
+ * divide-by-zero exception mask and clear the divide-by-zero
+ * exception bit.
+ */
+ sc.fpstate->mxcsr |= 0x00000200;
+ sc.fpstate->mxcsr &= 0xfffffffb;
+ } else {
+ /* If we ever get here, we're completely hosed.
+ */
+ }
}
-#endif
-#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
+#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
-#if defined(OS_WIN32)
-LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
+#if defined(PIPE_OS_WINDOWS)
+static LONG CALLBACK
+win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
{
- if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
- ep->ContextRecord->Eip +=3;
- __cpu_detect_caps.hasSSE=0;
- return EXCEPTION_CONTINUE_EXECUTION;
- }
- return EXCEPTION_CONTINUE_SEARCH;
+ if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
+ ep->ContextRecord->Eip +=3;
+ util_cpu_caps.has_sse=0;
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ return EXCEPTION_CONTINUE_SEARCH;
}
-#endif /* OS_WIN32 */
+#endif /* PIPE_OS_WINDOWS */
+
+#endif /* PIPE_ARCH_X86 */
-#if defined(ARCH_POWERPC) && !defined(OS_DARWIN)
+#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN)
static sigjmp_buf __lv_powerpc_jmpbuf;
static volatile sig_atomic_t __lv_powerpc_canjump = 0;
-static void sigill_handler (int sig);
-
-static void sigill_handler (int sig)
+static void
+sigill_handler(int sig)
{
- if (!__lv_powerpc_canjump) {
- signal (sig, SIG_DFL);
- raise (sig);
- }
+ if (!__lv_powerpc_canjump) {
+ signal (sig, SIG_DFL);
+ raise (sig);
+ }
- __lv_powerpc_canjump = 0;
- siglongjmp(__lv_powerpc_jmpbuf, 1);
+ __lv_powerpc_canjump = 0;
+ siglongjmp(__lv_powerpc_jmpbuf, 1);
}
-static void check_os_altivec_support(void)
+static void
+check_os_altivec_support(void)
{
-#if defined(OS_DARWIN)
- int sels[2] = {CTL_HW, HW_VECTORUNIT};
- int has_vu = 0;
- int len = sizeof (has_vu);
- int err;
-
- err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
-
- if (err == 0) {
- if (has_vu != 0) {
- __cpu_detect_caps.hasAltiVec = 1;
- }
- }
-#else /* !OS_DARWIN */
- /* no Darwin, do it the brute-force way */
- /* this is borrowed from the libmpeg2 library */
- signal(SIGILL, sigill_handler);
- if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) {
- signal(SIGILL, SIG_DFL);
- } else {
- __lv_powerpc_canjump = 1;
-
- __asm __volatile
- ("mtspr 256, %0\n\t"
- "vand %%v0, %%v0, %%v0"
- :
- : "r" (-1));
-
- signal(SIGILL, SIG_DFL);
- __cpu_detect_caps.hasAltiVec = 1;
- }
+#if defined(PIPE_OS_DARWIN)
+ int sels[2] = {CTL_HW, HW_VECTORUNIT};
+ int has_vu = 0;
+ int len = sizeof (has_vu);
+ int err;
+
+ err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
+
+ if (err == 0) {
+ if (has_vu != 0) {
+ util_cpu_caps.has_altivec = 1;
+ }
+ }
+#else /* !PIPE_OS_DARWIN */
+ /* no Darwin, do it the brute-force way */
+ /* this is borrowed from the libmpeg2 library */
+ signal(SIGILL, sigill_handler);
+ if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) {
+ signal(SIGILL, SIG_DFL);
+ } else {
+ __lv_powerpc_canjump = 1;
+
+ __asm __volatile
+ ("mtspr 256, %0\n\t"
+ "vand %%v0, %%v0, %%v0"
+ :
+ : "r" (-1));
+
+ signal(SIGILL, SIG_DFL);
+ util_cpu_caps.has_altivec = 1;
+ }
#endif
}
#endif
@@ -189,318 +190,312 @@ static void check_os_altivec_support(void)
* and RedHat patched 2.2 kernels that have broken exception handling
* support for user space apps that do SSE.
*/
-static void check_os_katmai_support(void)
+static void
+check_os_katmai_support(void)
{
-#if defined(ARCH_X86)
-#if defined(OS_FREEBSD)
- int has_sse=0, ret;
- int len = sizeof (has_sse);
-
- ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
- if (ret || !has_sse)
- __cpu_detect_caps.hasSSE=0;
-
-#elif defined(OS_NETBSD) || defined(OS_OPENBSD)
- int has_sse, has_sse2, ret, mib[2];
- int varlen;
-
- mib[0] = CTL_MACHDEP;
- mib[1] = CPU_SSE;
- varlen = sizeof (has_sse);
-
- ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
- if (ret < 0 || !has_sse) {
- __cpu_detect_caps.hasSSE = 0;
- } else {
- __cpu_detect_caps.hasSSE = 1;
- }
-
- mib[1] = CPU_SSE2;
- varlen = sizeof (has_sse2);
- ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
- if (ret < 0 || !has_sse2) {
- __cpu_detect_caps.hasSSE2 = 0;
- } else {
- __cpu_detect_caps.hasSSE2 = 1;
- }
- __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */
-
-#elif defined(OS_WIN32)
- LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
- if (__cpu_detect_caps.hasSSE) {
- exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
- __asm __volatile ("xorps %xmm0, %xmm0");
- SetUnhandledExceptionFilter(exc_fil);
- }
-#elif defined(OS_LINUX)
- struct sigaction saved_sigill;
- struct sigaction saved_sigfpe;
-
- /* Save the original signal handlers.
- */
- sigaction(SIGILL, NULL, &saved_sigill);
- sigaction(SIGFPE, NULL, &saved_sigfpe);
-
- signal(SIGILL, (void (*)(int))sigill_handler_sse);
- signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
-
- /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
- * supports the extended FPU save and restore required for SSE. If
- * we execute an SSE instruction on a PIII and get a SIGILL, the OS
- * doesn't support Streaming SIMD Exceptions, even if the processor
- * does.
- */
- if (__cpu_detect_caps.hasSSE) {
- __asm __volatile ("xorps %xmm1, %xmm0");
- }
-
- /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
- * it supports unmasked SIMD FPU exceptions. If we unmask the
- * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
- * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
- * as expected, we're okay but we need to clean up after it.
- *
- * Are we being too stringent in our requirement that the OS support
- * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
- * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
- * doesn't even support them. We at least know the user-space SSE
- * support is good in kernels that do support unmasked exceptions,
- * and therefore to be safe I'm going to leave this test in here.
- */
- if (__cpu_detect_caps.hasSSE) {
- // test_os_katmai_exception_support();
- }
-
- /* Restore the original signal handlers.
- */
- sigaction(SIGILL, &saved_sigill, NULL);
- sigaction(SIGFPE, &saved_sigfpe, NULL);
+#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_OS_FREEBSD)
+ int has_sse=0, ret;
+ int len = sizeof (has_sse);
+
+ ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
+ if (ret || !has_sse)
+ util_cpu_caps.has_sse=0;
+
+#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
+ int has_sse, has_sse2, ret, mib[2];
+ int varlen;
+
+ mib[0] = CTL_MACHDEP;
+ mib[1] = CPU_SSE;
+ varlen = sizeof (has_sse);
+
+ ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
+ if (ret < 0 || !has_sse) {
+ util_cpu_caps.has_sse = 0;
+ } else {
+ util_cpu_caps.has_sse = 1;
+ }
+
+ mib[1] = CPU_SSE2;
+ varlen = sizeof (has_sse2);
+ ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
+ if (ret < 0 || !has_sse2) {
+ util_cpu_caps.has_sse2 = 0;
+ } else {
+ util_cpu_caps.has_sse2 = 1;
+ }
+ util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
+
+#elif defined(PIPE_OS_WINDOWS)
+ LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
+ if (util_cpu_caps.has_sse) {
+ exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
+#if defined(PIPE_CC_GCC)
+ __asm __volatile ("xorps %xmm0, %xmm0");
+#elif defined(PIPE_CC_MSVC)
+ __asm {
+ xorps xmm0, xmm0 // executing SSE instruction
+ }
+#else
+#error Unsupported compiler
+#endif
+ SetUnhandledExceptionFilter(exc_fil);
+ }
+#elif defined(PIPE_OS_LINUX)
+ struct sigaction saved_sigill;
+ struct sigaction saved_sigfpe;
+
+ /* Save the original signal handlers.
+ */
+ sigaction(SIGILL, NULL, &saved_sigill);
+ sigaction(SIGFPE, NULL, &saved_sigfpe);
+
+ signal(SIGILL, (void (*)(int))sigill_handler_sse);
+ signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
+
+ /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
+ * supports the extended FPU save and restore required for SSE. If
+ * we execute an SSE instruction on a PIII and get a SIGILL, the OS
+ * doesn't support Streaming SIMD Exceptions, even if the processor
+ * does.
+ */
+ if (util_cpu_caps.has_sse) {
+ __asm __volatile ("xorps %xmm1, %xmm0");
+ }
+
+ /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
+ * it supports unmasked SIMD FPU exceptions. If we unmask the
+ * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
+ * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
+ * as expected, we're okay but we need to clean up after it.
+ *
+ * Are we being too stringent in our requirement that the OS support
+ * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
+ * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
+ * doesn't even support them. We at least know the user-space SSE
+ * support is good in kernels that do support unmasked exceptions,
+ * and therefore to be safe I'm going to leave this test in here.
+ */
+ if (util_cpu_caps.has_sse) {
+ // test_os_katmai_exception_support();
+ }
+
+ /* Restore the original signal handlers.
+ */
+ sigaction(SIGILL, &saved_sigill, NULL);
+ sigaction(SIGFPE, &saved_sigfpe, NULL);
#else
- /* We can't use POSIX signal handling to test the availability of
- * SSE, so we disable it by default.
- */
- __cpu_detect_caps.hasSSE = 0;
+ /* We can't use POSIX signal handling to test the availability of
+ * SSE, so we disable it by default.
+ */
+ util_cpu_caps.has_sse = 0;
#endif /* __linux__ */
#endif
+
+#if defined(PIPE_ARCH_X86_64)
+ util_cpu_caps.has_sse = 1;
+#endif
}
static int has_cpuid(void)
{
-#if defined(ARCH_X86)
- int a, c;
-
- __asm __volatile
- ("pushf\n"
- "popl %0\n"
- "movl %0, %1\n"
- "xorl $0x200000, %0\n"
- "push %0\n"
- "popf\n"
- "pushf\n"
- "popl %0\n"
- : "=a" (a), "=c" (c)
- :
- : "cc");
-
- return a != c;
+#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_OS_GCC)
+ int a, c;
+
+ __asm __volatile
+ ("pushf\n"
+ "popl %0\n"
+ "movl %0, %1\n"
+ "xorl $0x200000, %0\n"
+ "push %0\n"
+ "popf\n"
+ "pushf\n"
+ "popl %0\n"
+ : "=a" (a), "=c" (c)
+ :
+ : "cc");
+
+ return a != c;
+#else
+ /* FIXME */
+ return 1;
+#endif
+#elif defined(PIPE_ARCH_X86_64)
+ return 1;
#else
- return 0;
+ return 0;
#endif
}
-static int cpuid(unsigned int ax, unsigned int *p)
+static INLINE int
+cpuid(unsigned int ax, unsigned int *p)
{
-#if defined(ARCH_X86)
- unsigned int flags;
-
- __asm __volatile
- ("movl %%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl %%ebx, %%esi"
- : "=a" (p[0]), "=S" (p[1]),
- "=c" (p[2]), "=d" (p[3])
- : "0" (ax));
-
- return 0;
-#else
- return -1;
+ int ret = -1;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(PIPE_CC_GCC)
+ __asm __volatile
+ ("movl %%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx, %%esi"
+ : "=a" (p[0]), "=S" (p[1]),
+ "=c" (p[2]), "=d" (p[3])
+ : "0" (ax));
+
+ ret = 0;
+#elif defined(PIPE_CC_MSVC)
+ __cpuid(ax, p);
+
+ ret = 0;
+#endif
#endif
+
+ return ret;
}
-void cpu_detect_initialize()
+void
+util_cpu_detect(void)
{
- unsigned int regs[4];
- unsigned int regs2[4];
-
- int mib[2], ncpu;
- int len;
-
- memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps));
-
- /* Check for arch type */
-#if defined(ARCH_MIPS)
- __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS;
-#elif defined(ARCH_ALPHA)
- __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA;
-#elif defined(ARCH_SPARC)
- __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC;
-#elif defined(ARCH_X86)
- __cpu_detect_caps.type = CPU_DETECT_TYPE_X86;
-#elif defined(ARCH_POWERPC)
- __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC;
+ static boolean util_cpu_detect_initialized = FALSE;
+
+ if(util_cpu_detect_initialized)
+ return;
+
+ memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
+
+ /* Check for arch type */
+#if defined(PIPE_ARCH_MIPS)
+ util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS;
+#elif defined(PIPE_ARCH_ALPHA)
+ util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA;
+#elif defined(PIPE_ARCH_SPARC)
+ util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC;
+#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ util_cpu_caps.arch = UTIL_CPU_ARCH_X86;
+#elif defined(PIPE_ARCH_PPC)
+ util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC;
#else
- __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER;
+ util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN;
#endif
- /* Count the number of CPUs in system */
-#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN)
- __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN);
- if (__cpu_detect_caps.nrcpu == -1)
- __cpu_detect_caps.nrcpu = 1;
-
-#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD)
+ /* Count the number of CPUs in system */
+#if !defined(PIPE_OS_WINDOWS) && !defined(PIPE_OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN)
+ util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (util_cpu_caps.nr_cpus == -1)
+ util_cpu_caps.nr_cpus = 1;
- mib[0] = CTL_HW;
- mib[1] = HW_NCPU;
+#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_OPENBSD)
+ {
+ int mib[2], ncpu;
+ int len;
- len = sizeof (ncpu);
- sysctl(mib, 2, &ncpu, &len, NULL, 0);
- __cpu_detect_caps.nrcpu = ncpu;
+ mib[0] = CTL_HW;
+ mib[1] = HW_NCPU;
+ len = sizeof (ncpu);
+ sysctl(mib, 2, &ncpu, &len, NULL, 0);
+ util_cpu_caps.nr_cpus = ncpu;
+ }
#else
- __cpu_detect_caps.nrcpu = 1;
+ util_cpu_caps.nr_cpus = 1;
#endif
-#if defined(ARCH_X86)
- /* No cpuid, old 486 or lower */
- if (has_cpuid() == 0)
- return;
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ if (has_cpuid()) {
+ unsigned int regs[4];
+ unsigned int regs2[4];
- __cpu_detect_caps.cacheline = 32;
+ util_cpu_caps.cacheline = 32;
- /* Get max cpuid level */
- cpuid(0x00000000, regs);
+ /* Get max cpuid level */
+ cpuid(0x00000000, regs);
- if (regs[0] >= 0x00000001) {
- unsigned int cacheline;
+ if (regs[0] >= 0x00000001) {
+ unsigned int cacheline;
- cpuid (0x00000001, regs2);
+ cpuid (0x00000001, regs2);
- __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf;
- if (__cpu_detect_caps.x86cpuType == 0xf)
- __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
+ util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf;
+ if (util_cpu_caps.x86_cpu_type == 0xf)
+ util_cpu_caps.x86_cpu_type = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
- /* general feature flags */
- __cpu_detect_caps.hasTSC = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */
- __cpu_detect_caps.hasMMX = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
- __cpu_detect_caps.hasSSE = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */
- __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */
- __cpu_detect_caps.hasSSE3 = (regs2[2] & (1)); /* 0x0000001 */
- __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */
- __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */
+ /* general feature flags */
+ util_cpu_caps.has_tsc = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */
+ util_cpu_caps.has_mmx = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
+ util_cpu_caps.has_sse = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */
+ util_cpu_caps.has_sse2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */
+ util_cpu_caps.has_sse3 = (regs2[2] & (1)); /* 0x0000001 */
+ util_cpu_caps.has_ssse3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */
+ util_cpu_caps.has_sse4_1 = (regs2[2] & (1 << 19)) >> 19;
+ util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
- cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
- if (cacheline > 0)
- __cpu_detect_caps.cacheline = cacheline;
- }
+ cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
+ if (cacheline > 0)
+ util_cpu_caps.cacheline = cacheline;
+ }
- cpuid(0x80000000, regs);
+ cpuid(0x80000000, regs);
- if (regs[0] >= 0x80000001) {
+ if (regs[0] >= 0x80000001) {
- cpuid(0x80000001, regs2);
+ cpuid(0x80000001, regs2);
- __cpu_detect_caps.hasMMX |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
- __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */
- __cpu_detect_caps.has3DNow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */
- __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30;
- }
+ util_cpu_caps.has_mmx |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
+ util_cpu_caps.has_mmx2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */
+ util_cpu_caps.has_3dnow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */
+ util_cpu_caps.has_3dnow_ext = (regs2[3] & (1 << 30 )) >> 30;
+ }
- if (regs[0] >= 0x80000006) {
- cpuid(0x80000006, regs2);
- __cpu_detect_caps.cacheline = regs2[2] & 0xFF;
- }
+ if (regs[0] >= 0x80000006) {
+ cpuid(0x80000006, regs2);
+ util_cpu_caps.cacheline = regs2[2] & 0xFF;
+ }
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_NETBSD) || defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_OPENBSD)
+ if (util_cpu_caps.has_sse)
+ check_os_katmai_support();
-#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD)
- if (__cpu_detect_caps.hasSSE)
- check_os_katmai_support();
-
- if (!__cpu_detect_caps.hasSSE) {
- __cpu_detect_caps.hasSSE2 = 0;
- __cpu_detect_caps.hasSSE3 = 0;
- __cpu_detect_caps.hasSSSE3 = 0;
- }
+ if (!util_cpu_caps.has_sse) {
+ util_cpu_caps.has_sse2 = 0;
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_ssse3 = 0;
+ }
#else
- __cpu_detect_caps.hasSSE = 0;
- __cpu_detect_caps.hasSSE2 = 0;
- __cpu_detect_caps.hasSSE3 = 0;
- __cpu_detect_caps.hasSSSE3 = 0;
+ util_cpu_caps.has_sse = 0;
+ util_cpu_caps.has_sse2 = 0;
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_ssse3 = 0;
+#endif
+ }
+#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+
+#if defined(PIPE_ARCH_PPC)
+ check_os_altivec_support();
+#endif /* PIPE_ARCH_PPC */
+
+#ifdef DEBUG
+ debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch);
+ debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
+
+ debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
+ debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
+
+ debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
+ debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
+ debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
+ debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
+ debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
+ debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
+ debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
+ debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
+ debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
+ debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
+ debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
#endif
-#endif /* ARCH_X86 */
-
-#if defined(ARCH_POWERPC)
- check_os_altivec_support();
-#endif /* ARCH_POWERPC */
-
- __cpu_detect_initialized = 1;
-}
-
-struct cpu_detect_caps *cpu_detect_get_caps()
-{
- return &__cpu_detect_caps;
-}
-
-/* The getters and setters for feature flags */
-int cpu_detect_get_tsc()
-{
- return __cpu_detect_caps.hasTSC;
-}
-
-int cpu_detect_get_mmx()
-{
- return __cpu_detect_caps.hasMMX;
-}
-
-int cpu_detect_get_mmx2()
-{
- return __cpu_detect_caps.hasMMX2;
-}
-
-int cpu_detect_get_sse()
-{
- return __cpu_detect_caps.hasSSE;
-}
-
-int cpu_detect_get_sse2()
-{
- return __cpu_detect_caps.hasSSE2;
-}
-
-int cpu_detect_get_sse3()
-{
- return __cpu_detect_caps.hasSSE3;
-}
-
-int cpu_detect_get_ssse3()
-{
- return __cpu_detect_caps.hasSSSE3;
-}
-
-int cpu_detect_get_3dnow()
-{
- return __cpu_detect_caps.has3DNow;
-}
-
-int cpu_detect_get_3dnow2()
-{
- return __cpu_detect_caps.has3DNowExt;
-}
-int cpu_detect_get_altivec()
-{
- return __cpu_detect_caps.hasAltiVec;
+ util_cpu_detect_initialized = TRUE;
}
-