|
18 | 18 |
|
19 | 19 | #include "weightmatrix.h"
|
20 | 20 |
|
21 |
| -#undef X86_BUILD |
22 |
| -#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32) |
23 |
| -# if !defined(ANDROID_BUILD) |
24 |
| -# define X86_BUILD 1 |
25 |
| -# endif // !ANDROID_BUILD |
26 |
| -#endif // x86 target |
27 |
| - |
28 |
| -#if defined(X86_BUILD) |
29 |
| -# if defined(__linux__) || defined(__MINGW32__) |
30 |
| -# include <cpuid.h> |
31 |
| -# elif defined(_WIN32) |
32 |
| -# include <intrin.h> |
33 |
| -# endif |
34 |
| -#endif |
35 | 21 | #include "dotproductavx.h"
|
36 | 22 | #include "dotproductsse.h"
|
| 23 | +#include "simddetect.h" |
37 | 24 | #include "statistc.h"
|
38 |
| -#include "svutil.h" |
39 | 25 | #include "tprintf.h"
|
40 | 26 |
|
41 | 27 | namespace tesseract {
|
42 | 28 |
|
43 |
| -// Architecture detector. Add code here to detect any other architectures for |
44 |
| -// SIMD-based faster dot product functions. Intended to be a single static |
45 |
| -// object, but it does no real harm to have more than one. |
46 |
| -class SIMDDetect { |
47 |
| - public: |
48 |
| - SIMDDetect() |
49 |
| - : arch_tested_(false), avx_available_(false), sse_available_(false) {} |
50 |
| - |
51 |
| - // Returns true if AVX is available on this system. |
52 |
| - bool IsAVXAvailable() { |
53 |
| - if (!arch_tested_) TestArchitecture(); |
54 |
| - return avx_available_; |
55 |
| - } |
56 |
| - // Returns true if SSE4.1 is available on this system. |
57 |
| - bool IsSSEAvailable() { |
58 |
| - if (!arch_tested_) TestArchitecture(); |
59 |
| - return sse_available_; |
60 |
| - } |
61 |
| - |
62 |
| - private: |
63 |
| - // Tests the architecture in a system-dependent way to detect AVX, SSE and |
64 |
| - // any other available SIMD equipment. |
65 |
| - void TestArchitecture() { |
66 |
| - SVAutoLock lock(&arch_mutex_); |
67 |
| - if (!arch_tested_) { |
68 |
| -#if defined(X86_BUILD) |
69 |
| -# if defined(__linux__) || defined(__MINGW32__) |
70 |
| - unsigned int eax, ebx, ecx, edx; |
71 |
| - if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) { |
72 |
| - sse_available_ = (ecx & 0x00080000) != 0; |
73 |
| - avx_available_ = (ecx & 0x10000000) != 0; |
74 |
| - } |
75 |
| -# elif defined(_WIN32) |
76 |
| - int cpuInfo[4]; |
77 |
| - __cpuid(cpuInfo, 0); |
78 |
| - if (cpuInfo[0] >= 1) { |
79 |
| - __cpuid(cpuInfo, 1); |
80 |
| - sse_available_ = (cpuInfo[2] & 0x00080000) != 0; |
81 |
| - avx_available_ = (cpuInfo[2] & 0x10000000) != 0; |
82 |
| - } |
83 |
| -# endif |
84 |
| - if (avx_available_) tprintf("Found AVX\n"); |
85 |
| - if (sse_available_) tprintf("Found SSE\n"); |
86 |
| -#endif // X86_BUILD |
87 |
| - arch_tested_ = true; |
88 |
| - } |
89 |
| - } |
90 |
| - |
91 |
| - private: |
92 |
| - // Detect architecture in only a single thread. |
93 |
| - SVMutex arch_mutex_; |
94 |
| - // Flag set to true after TestArchitecture has been called. |
95 |
| - bool arch_tested_; |
96 |
| - // If true, then AVX has been detected. |
97 |
| - bool avx_available_; |
98 |
| - // If true, then SSe4.1 has been detected. |
99 |
| - bool sse_available_; |
100 |
| -}; |
101 |
| - |
102 |
| -static SIMDDetect detector; |
103 |
| - |
104 | 29 | // Copies the whole input transposed, converted to double, into *this.
|
105 | 30 | void TransposedArray::Transpose(const GENERIC_2D_ARRAY<double>& input) {
|
106 | 31 | int width = input.dim1();
|
@@ -258,7 +183,7 @@ void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const {
|
258 | 183 | for (int i = 0; i < num_out; ++i) {
|
259 | 184 | const inT8* Wi = wi_[i];
|
260 | 185 | int total = 0;
|
261 |
| - if (detector.IsSSEAvailable()) { |
| 186 | + if (SIMDDetect::IsSSEAvailable()) { |
262 | 187 | total = IntDotProductSSE(u, Wi, num_in);
|
263 | 188 | } else {
|
264 | 189 | for (int j = 0; j < num_in; ++j) total += Wi[j] * u[j];
|
@@ -410,8 +335,8 @@ double WeightMatrix::DotProduct(const double* u, const double* v, int n) {
|
410 | 335 | // is about 8% faster than sse. This suggests that the time is memory
|
411 | 336 | // bandwidth constrained and could benefit from holding the reused vector
|
412 | 337 | // in AVX registers.
|
413 |
| - if (detector.IsAVXAvailable()) return DotProductAVX(u, v, n); |
414 |
| - if (detector.IsSSEAvailable()) return DotProductSSE(u, v, n); |
| 338 | + if (SIMDDetect::IsAVXAvailable()) return DotProductAVX(u, v, n); |
| 339 | + if (SIMDDetect::IsSSEAvailable()) return DotProductSSE(u, v, n); |
415 | 340 | double total = 0.0;
|
416 | 341 | for (int k = 0; k < n; ++k) total += u[k] * v[k];
|
417 | 342 | return total;
|
|
0 commit comments