@@ -47,9 +47,9 @@ constexpr int kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup;
47
47
// weights and reps are scratch registers.
48
48
// This function must be inlined with references in order for the compiler to
49
49
// correctly use the registers declared in the caller.
50
- inline void MultiplyGroup (const __m256i& rep_input, const __m256i& ones,
51
- const int8_t *& wi, __m256i& weights, __m256i& reps ,
52
- __m256i& result) {
50
+ static inline void MultiplyGroup (const __m256i& rep_input, const __m256i& ones,
51
+ const int8_t *& wi, __m256i& weights,
52
+ __m256i& reps, __m256i& result) {
53
53
// Load a 4x8 block of weights.
54
54
weights = _mm256_loadu_si256 (reinterpret_cast <const __m256i*>(wi));
55
55
wi += kNumInputsPerRegister ;
@@ -71,9 +71,9 @@ inline void MultiplyGroup(const __m256i& rep_input, const __m256i& ones,
71
71
// Extracts and converts 8x32-bit results from result, adding the bias from wi
72
72
// and scaling by scales, before storing in *v. Note that wi, scales and v are
73
73
// expected to contain 8 consecutive elements or num_out if less.
74
- inline void ExtractResults (__m256i& result, __m256i& shift_id,
75
- const int8_t *& wi, const double *& scales,
76
- int num_out, double *& v) {
74
+ static inline void ExtractResults (__m256i& result, __m256i& shift_id,
75
+ const int8_t *& wi, const double *& scales,
76
+ int num_out, double *& v) {
77
77
for (int out = 0 ; out < num_out; ++out) {
78
78
int32_t res =
79
79
#ifndef _MSC_VER
0 commit comments