kwave  18.07.70
cpu_accel.cpp
Go to the documentation of this file.
1 /*
2  * cpu_accel.c
3  * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
4  *
5  * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
6  *
7  * mpeg2dec is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * mpeg2dec is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
20  *
21  * 2014-05-26
22  * Copied this source into the Kwave project and adapted it to compile
23  * cleanly within this new environment
24  * by Thomas Eschenbacher <Thomas.Eschenbacher@gmx.de>
25  * marked most changes with "#ifdef XINE_COMPILE"
26  */
27 
28 #include "config.h"
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <inttypes.h>
33 
34 #if defined(HAVE_MLIB) && defined(MLIB_LAZYLOAD)
35 #include <dlfcn.h>
36 #endif
37 
38 #if defined (__SVR4) && defined (__sun)
39 #include <sys/systeminfo.h>
40 #endif
41 
42 #ifdef XINE_COMPILE
43 #define LOG_MODULE "cpu_accel"
44 #define LOG_VERBOSE
45 #endif /* XINE_COMPILE */
46 
47 /*
48 #define LOG
49 */
50 
51 #ifdef XINE_COMPILE
52 
53 #include <xine/xineutils.h>
54 
55 #else /* XINE_COMPILE */
56 
57 #include "cputest.h"
58 #define lprintf printf
59 
60 #endif /* XINE_COMPILE */
61 
62 #if defined(PIC) && ! defined(__PIC__)
63 #define __PIC__
64 #endif
65 
66 #if defined(__i386__) || defined(__x86_64__)
67 
68 #include <signal.h>
69 #include <setjmp.h>
70 
71 static jmp_buf sigill_return;
72 
73 static __attribute__((noreturn)) void sigill_handler (int n) {
74  (void)n;
75  longjmp(sigill_return, 1);
76 }
77 
78 static uint32_t arch_accel (void)
79 {
80  /* made static to avoid clobbering by longjmp (THE, 2014-05-30) */
81  static uint32_t caps = 0;
82 
83 #if defined(__x86_64__) || \
84  ( defined(__SSE__) && defined(__SSE2__) && defined(__MMX__) )
85  /* No need to test for this on AMD64, we know what the
86  platform has. */
88 # if defined(__3dNOW__)
90 # endif
91  ;
92 #endif
93 
94  void (*old_sigill_handler)(int);
95  uint32_t eax, ebx, ecx, edx;
96 
97 #if defined(__x86_64__)
98 #define cpuid(op,eax,ebx,ecx,edx) \
99  __asm__ ("push %%rbx\n\t" \
100  "cpuid\n\t" \
101  "movl %%ebx,%1\n\t" \
102  "pop %%rbx" \
103  : "=a" (eax), \
104  "=r" (ebx), \
105  "=c" (ecx), \
106  "=d" (edx) \
107  : "a" (op) \
108  : "cc")
109 #elif !defined(__PIC__)
110 #define cpuid(op,eax,ebx,ecx,edx) \
111  __asm__ ("cpuid" \
112  : "=a" (eax), \
113  "=b" (ebx), \
114  "=c" (ecx), \
115  "=d" (edx) \
116  : "a" (op) \
117  : "cc")
118 #else /* PIC version : save ebx */
119 #define cpuid(op,eax,ebx,ecx,edx) \
120  __asm__ ("pushl %%ebx\n\t" \
121  "cpuid\n\t" \
122  "movl %%ebx,%1\n\t" \
123  "popl %%ebx" \
124  : "=a" (eax), \
125  "=r" (ebx), \
126  "=c" (ecx), \
127  "=d" (edx) \
128  : "a" (op) \
129  : "cc")
130 #endif
131 
132 #ifndef __x86_64__
133  __asm__ ("pushfl\n\t"
134  "pushfl\n\t"
135  "popl %0\n\t"
136  "movl %0,%1\n\t"
137  "xorl $0x200000,%0\n\t"
138  "pushl %0\n\t"
139  "popfl\n\t"
140  "pushfl\n\t"
141  "popl %0\n\t"
142  "popfl"
143  : "=r" (eax),
144  "=r" (ebx)
145  :
146  : "cc");
147 
148  if (eax == ebx) {
149  /* no cpuid */
150  return 0;
151  }
152 
153  cpuid (0x00000000, eax, ebx, ecx, edx);
154  if (!eax) {
155  /* vendor string only */
156  return 0;
157  }
158 
159  int AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65);
160 
161 #endif /* __x86_64__ */
162 
163  cpuid (0x00000001, eax, ebx, ecx, edx);
164 
165 #ifndef __x86_64__
166  if (edx & 0x00800000) {
167  /* MMX */
168  caps |= MM_ACCEL_X86_MMX;
169  }
170 
171  if (edx & 0x02000000) {
172  /* SSE - identical to AMD MMX extensions */
174  }
175 
176  if (edx & 0x04000000) {
177  /* SSE2 */
178  caps |= MM_ACCEL_X86_SSE2;
179  }
180 #endif /* __x86_64__ */
181 
182  if (ecx & 0x00000001) {
183  caps |= MM_ACCEL_X86_SSE3;
184  }
185  if (ecx & 0x00000200) {
186  caps |= MM_ACCEL_X86_SSSE3;
187  }
188  if (ecx & 0x00080000) {
189  caps |= MM_ACCEL_X86_SSE4;
190  }
191  if (ecx & 0x00100000) {
192  caps |= MM_ACCEL_X86_SSE42;
193  }
194 
195  /* Check OXSAVE and AVX bits */
196  if ((ecx & 0x18000000) == 0x18000000) {
197  /* test OS support for AVX */
198 
199  old_sigill_handler = signal (SIGILL, sigill_handler);
200 
201  if (setjmp(sigill_return)) {
202  lprintf("OS doesn't support AVX instructions.\n");
203  } else {
204  /* Get value of extended control register 0 */
205  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (0));
206  if ((eax & 0x6) == 0x6) {
207  caps |= MM_ACCEL_X86_AVX;
208  }
209 
210  }
211 
212  signal(SIGILL, old_sigill_handler);
213  }
214 
215 #ifndef __x86_64__
216  cpuid (0x80000000, eax, ebx, ecx, edx);
217  if (eax >= 0x80000001) {
218  cpuid (0x80000001, eax, ebx, ecx, edx);
219 
220  if (edx & 0x80000000) {
221  /* AMD 3DNow extensions */
222  caps |= MM_ACCEL_X86_3DNOW;
223  }
224 
225  if (AMD && (edx & 0x00400000)) {
226  /* AMD MMX extensions */
227  caps |= MM_ACCEL_X86_MMXEXT;
228  }
229  }
230 #endif /* __x86_64__ */
231 
232 #ifndef __x86_64__
233  /* test OS support for SSE */
234  if (caps & MM_ACCEL_X86_SSE) {
235  old_sigill_handler = signal (SIGILL, sigill_handler);
236 
237  if (setjmp(sigill_return)) {
238  lprintf("OS doesn't support SSE instructions.\n");
239  caps &= ~(MM_ACCEL_X86_SSE|MM_ACCEL_X86_SSE2|
242  } else {
243  __asm__ volatile ("xorps %xmm0, %xmm0");
244  }
245 
246  signal(SIGILL, old_sigill_handler);
247  }
248 
249 #endif /* x86_64 */
250 
251  return caps;
252 }
253 
254 #endif /* i386 or x86_64 */
255 
256 #if defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)
257 #include <signal.h>
258 #include <setjmp.h>
259 
260 static sigjmp_buf jmpbuf;
261 static volatile sig_atomic_t canjump = 0;
262 
263 static void sigill_handler (int sig)
264 {
265  if (!canjump) {
266  signal (sig, SIG_DFL);
267  raise (sig);
268  }
269 
270  canjump = 0;
271  siglongjmp (jmpbuf, 1);
272 }
273 
274 static uint32_t arch_accel (void)
275 {
276  /* FIXME: Autodetect cache line size via AUX ELF vector or otherwise */
277  uint32_t flags = 0;
278 
279  signal (SIGILL, sigill_handler);
280  if (sigsetjmp (jmpbuf, 1)) {
281  signal (SIGILL, SIG_DFL);
282  return flags;
283  }
284 
285  canjump = 1;
286 #ifndef HOST_OS_DARWIN
287  __asm__ volatile ("mtspr 256, %0\n\t"
288  "vand %%v0, %%v0, %%v0"
289  :
290  : "r" (-1));
291 #else
292  __asm__ volatile ("mtspr 256, r0\n\t"
293  "vand v0, v0, v0"
294  :
295  : "r" (-1));
296 #endif
297 
298  signal (SIGILL, SIG_DFL);
299  return flags|MM_ACCEL_PPC_ALTIVEC;
300 }
301 #endif /* ARCH_PPC */
302 
303 #if defined(ARCH_SPARC) && defined(ENABLE_VIS)
304 #if defined (__SVR4) && defined (__sun)
305 static uint32_t arch_accel (void)
306 {
307  uint32_t flags = 0;
308  long len;
309  char isalist_[257], *isalist, *s1, *last, *token;
310 
311  len = sysinfo(SI_ISALIST, isalist_, 257);
312 
313  if (len > 257) {
314  isalist = malloc(len);
315  sysinfo(SI_ISALIST, isalist, len);
316  }
317  else {
318  isalist = isalist_;
319  }
320 
321  s1 = isalist;
322  while (token = strtok_r(s1, " ", &last)) {
323  if (strlen(token) > 4) {
324  if (strcmp(token + (strlen(token) - 4), "+vis") == 0) {
325  flags |= MM_ACCEL_SPARC_VIS;
326  }
327  }
328 
329  if (strlen(token) > 5) {
330  if (strcmp(token + (strlen(token) - 5), "+vis2") == 0) {
331  flags |= MM_ACCEL_SPARC_VIS2;
332  }
333  }
334 
335  s1 = NULL;
336  }
337 
338  if (isalist != isalist_) {
339  free(isalist);
340  }
341  return flags;
342 }
343 #else
344 #include <signal.h>
345 #include <setjmp.h>
346 
347 static sigjmp_buf jmpbuf;
348 static volatile sig_atomic_t canjump = 0;
349 
350 static void sigill_handler (int sig)
351 {
352  if (!canjump) {
353  signal(sig, SIG_DFL);
354  raise(sig);
355  }
356 
357  canjump = 0;
358  siglongjmp(jmpbuf, 1);
359 }
360 
361 static uint32_t arch_accel (void)
362 {
363  uint32_t flags = 0;
364 
365  signal(SIGILL, sigill_handler);
366  if (sigsetjmp(jmpbuf, 1)) {
367  signal(SIGILL, SIG_DFL);
368  return flags;
369  }
370 
371  canjump = 1;
372 
373  /* pdist %f0, %f0, %f0 */
374  __asm__ __volatile__(".word\t0x81b007c0");
375 
376  canjump = 0;
377  flags |= MM_ACCEL_SPARC_VIS;
378 
379  if (sigsetjmp(jmpbuf, 1)) {
380  signal(SIGILL, SIG_DFL);
381  return flags;
382  }
383 
384  canjump = 1;
385 
386  /* edge8n %g0, %g0, %g0 */
387  __asm__ __volatile__(".word\t0x81b00020");
388 
389  canjump = 0;
390  flags |= MM_ACCEL_SPARC_VIS2;
391 
392  signal(SIGILL, SIG_DFL);
393  return flags;
394 }
395 #endif
396 #endif /* ARCH_SPARC */
397 
398 uint32_t xine_mm_accel (void)
399 {
400  static int initialized = 0;
401  static uint32_t accel = 0;
402 
403  if (!initialized) {
404 #ifdef HAVE_MLIB
405 #ifdef MLIB_LAZYLOAD
406  void *hndl;
407 
408  if ((hndl = dlopen("libmlib.so.2", RTLD_LAZY | RTLD_GLOBAL | RTLD_NODELETE)) != NULL) {
409  dlclose(hndl);
410  accel |= MM_ACCEL_MLIB;
411  }
412 #else
413  accel |= MM_ACCEL_MLIB;
414 #endif
415 #endif
416 
417 #if defined(__i386__) || defined(__x86_64__) || (defined(ARCH_PPC) && defined(ENABLE_ALTIVEC)) || (defined(ARCH_SPARC) && defined(ENABLE_VIS))
418  accel |= arch_accel();
419 #endif
420 
421  initialized = 1;
422  }
423 
424  return accel;
425 }
#define MM_ACCEL_X86_SSE2
Definition: cputest.h:26
#define MM_ACCEL_X86_3DNOW
Definition: cputest.h:23
#define MM_ACCEL_X86_MMX
Definition: cputest.h:22
#define MM_ACCEL_X86_SSE4
Definition: cputest.h:29
#define lprintf
Definition: cpu_accel.cpp:58
#define MM_ACCEL_X86_SSSE3
Definition: cputest.h:28
#define MM_ACCEL_X86_SSE3
Definition: cputest.h:27
#define MM_ACCEL_X86_AVX
Definition: cputest.h:31
#define MM_ACCEL_X86_MMXEXT
Definition: cputest.h:24
#define MM_ACCEL_X86_SSE42
Definition: cputest.h:30
#define MM_ACCEL_X86_SSE
Definition: cputest.h:25
uint32_t xine_mm_accel(void)
Definition: cpu_accel.cpp:398