From 7e1f0176f4fe9ff9d5b668dde6cb584123d0e501 Mon Sep 17 00:00:00 2001
From: Carles Fernandez <carles.fernandez@gmail.com>
Date: Fri, 20 Nov 2020 16:44:06 +0100
Subject: [PATCH] Make SIMD instructions work on MS Windows

---
 docs/changelog.md                             |  2 +
 .../volk_gnsssdr/CMakeLists.txt               |  3 ++
 .../volk_gnsssdr/README.md                    | 18 +++++++
 .../volk_gnsssdr/gen/archs.xml                |  3 --
 ...nsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h | 17 +++++--
 ...dr_32fc_32f_rotator_dotprodxnpuppet_32fc.h | 10 +++-
 ...gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h | 34 +++++++-------
 .../volk_gnsssdr/lib/CMakeLists.txt           | 15 ++++--
 .../lib/volk_gnsssdr_rank_archs.c             | 47 +++++++------------
 9 files changed, 87 insertions(+), 62 deletions(-)
diff --git a/docs/changelog.md b/docs/changelog.md
index 2defafb3d..5196141a5 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -57,6 +57,8 @@ SPDX-FileCopyrightText: 2011-2020 Carles Fernandez-Prades <carles.fernandez@cttc
   GNSS-SDR, the old method is still used in old development environments. No
   extra dependency is needed. This change is transparent to the user, since
   everything is managed by the CMake scripts.
+- The `volk_gnsssdr` library can be built on Microsoft Windows and can execute
+  SIMD instructions on that OS.
 - Fix building with `-DENABLE_CUDA=ON` for blocks implemented with CUDA.
 
 ### Improvements in Usability:
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt
index 68a16a26b..b30a166ba 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt
@@ -247,7 +247,10 @@ if(CMAKE_VERSION VERSION_GREATER 3.0 AND SUPPORTED_CPU_FEATURES_ARCH)
         FORCE
     )
     set(USE_CPU_FEATURES ON)
+    set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}")
+    set(BUILD_SHARED_LIBS OFF)
     add_subdirectory(cpu_features)
+    set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}")
 endif()
 
 # Python
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/README.md b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/README.md
index cd2eec38e..46f1a8c62 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/README.md
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/README.md
@@ -139,6 +139,24 @@ $ volk_gnsssdr_profile
 $ sudo ldconfig
 ```
 
+### Building on Microsoft Windows
+
+With Visual Studio 2019, in a Powershell run by administrator:
+
+```
+$ git clone https://github.com/gnss-sdr/gnss-sdr
+$ cd gnss-sdr
+$ git checkout next
+$ cd build
+$ cmake -G "Visual Studio 16 2019" ..\src\algorithms\libs\volk_gnsssdr_module\volk_gnsssdr
+$ cd ..
+$ cmake --build build --config Release
+$ cd build
+$ ctest -C Release
+$ cd ..
+$ cmake --install build
+```
+
 ## References
 
 If you use VOLK_GNSSSDR in your research and/or software, please cite the
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/archs.xml b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/archs.xml
index cde79793b..35d33f174 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/archs.xml
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/archs.xml
@@ -68,7 +68,6 @@
   <check name="mmx"></check>
   <flag compiler="gnu">-mmmx</flag>
   <flag compiler="clang">-mmmx</flag>
-  <flag compiler="msvc">/arch:SSE</flag>
   <alignment>8</alignment>
 </arch>
 
@@ -84,7 +83,6 @@
   <check name="sse"></check>
   <flag compiler="gnu">-msse</flag>
   <flag compiler="clang">-msse</flag>
-  <flag compiler="msvc">/arch:SSE</flag>
   <environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment>
   <include>xmmintrin.h</include>
   <alignment>16</alignment>
@@ -94,7 +92,6 @@
   <check name="sse2"></check>
   <flag compiler="gnu">-msse2</flag>
   <flag compiler="clang">-msse2</flag>
-  <flag compiler="msvc">/arch:SSE2</flag>
   <alignment>16</alignment>
 </arch>
 
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h
index bb149a55c..2888cc679 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h
@@ -63,7 +63,6 @@
 #include <volk_gnsssdr/volk_gnsssdr_complex.h>
 #include <volk_gnsssdr/volk_gnsssdr_malloc.h>
 #include <math.h>
-// #include <stdio.h>
 
 #ifdef LV_HAVE_GENERIC
 
@@ -74,7 +73,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic(lv_32f
     unsigned int n;
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
-            result[n_vec] = lv_cmake(0, 0);
+            result[n_vec] = lv_cmake(0.0f, 0.0f);
         }
     for (n = 0; n < num_points; n++)
         {
@@ -115,7 +114,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload
     unsigned int j;
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
-            result[n_vec] = lv_cmake(0, 0);
+            result[n_vec] = lv_cmake(0.0f, 0.0f);
         }
 
     for (n = 0; n < num_points / ROTATOR_RELOAD; n++)
@@ -158,6 +157,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload
 #include <immintrin.h>
 static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
 {
+#ifndef WIN32
     unsigned int number = 0;
     int vec_ind = 0;
     unsigned int i = 0;
@@ -287,7 +287,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
 
             _mm256_store_ps((float*)dotProductVector, dotProdVal0[vec_ind]);  // Store the results back into the dot product vector
 
-            result[vec_ind] = lv_cmake(0, 0);
+            result[vec_ind] = lv_cmake(0.0f, 0.0f);
             for (i = 0; i < 4; ++i)
                 {
                     result[vec_ind] += dotProductVector[i];
@@ -312,6 +312,9 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
         }
 
     *phase = _phase;
+#else
+    volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, in_common, phase_inc, phase, in_a, num_a_vectors, num_points);
+#endif
 }
 
 #endif /* LV_HAVE_AVX */
@@ -322,6 +325,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
 #include <immintrin.h>
 static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
 {
+#ifndef WIN32
     unsigned int number = 0;
     int vec_ind = 0;
     unsigned int i = 0;
@@ -451,7 +455,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
 
             _mm256_store_ps((float*)dotProductVector, dotProdVal0[vec_ind]);  // Store the results back into the dot product vector
 
-            result[vec_ind] = lv_cmake(0, 0);
+            result[vec_ind] = lv_cmake(0.0f, 0.0f);
             for (i = 0; i < 4; ++i)
                 {
                     result[vec_ind] += dotProductVector[i];
@@ -476,6 +480,9 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
         }
 
     *phase = _phase;
+#else
+    volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, in_common, phase_inc, phase, in_a, num_a_vectors, num_points);
+#endif
 }
 
 
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h
index 85eb7bf5c..a73bac9cd 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h
@@ -106,8 +106,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_u_avx(lv_3
             in_a[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
             memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
         }
+#ifndef WIN32
     volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
-
+#else
+    volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
+#endif
     for (n = 0; n < num_a_vectors; n++)
         {
             volk_gnsssdr_free(in_a[n]);
@@ -136,8 +139,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_a_avx(lv_3
             in_a[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
             memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
         }
+#ifndef WIN32
     volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
-
+#else
+    volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
+#endif
     for (n = 0; n < num_a_vectors; n++)
         {
             volk_gnsssdr_free(in_a[n]);
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h
index 9b9ced9b1..45f0b7452 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h
@@ -74,7 +74,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic(lv_32fc
     unsigned int n;
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
-            result[n_vec] = lv_cmake(0, 0);
+            result[n_vec] = lv_cmake(0.0f, 0.0f);
         }
     for (n = 0; n < num_points; n++)
         {
@@ -115,7 +115,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload(
     unsigned int j;
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
-            result[n_vec] = lv_cmake(0, 0);
+            result[n_vec] = lv_cmake(0.0f, 0.0f);
         }
 
     for (n = 0; n < num_points / ROTATOR_RELOAD; n++)
@@ -158,7 +158,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload(
 #include <pmmintrin.h>
 static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
 {
-    lv_32fc_t dotProduct = lv_cmake(0, 0);
+    lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
     lv_32fc_t tmp32_1, tmp32_2;
     const unsigned int sse_iters = num_points / 2;
     int n_vec;
@@ -240,7 +240,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
             _mm_store_ps((float*)dotProductVector, acc[n_vec]);  // Store the results back into the dot product vector
-            dotProduct = lv_cmake(0, 0);
+            dotProduct = lv_cmake(0.0f, 0.0f);
             for (i = 0; i < 2; ++i)
                 {
                     dotProduct = dotProduct + dotProductVector[i];
@@ -270,7 +270,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
 #include <pmmintrin.h>
 static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
 {
-    lv_32fc_t dotProduct = lv_cmake(0, 0);
+    lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
     lv_32fc_t tmp32_1, tmp32_2;
     const unsigned int sse_iters = num_points / 2;
     int n_vec;
@@ -352,7 +352,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
             _mm_store_ps((float*)dotProductVector, acc[n_vec]);  // Store the results back into the dot product vector
-            dotProduct = lv_cmake(0, 0);
+            dotProduct = lv_cmake(0.0f, 0.0f);
             for (i = 0; i < 2; ++i)
                 {
                     dotProduct = dotProduct + dotProductVector[i];
@@ -382,7 +382,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
 #include <immintrin.h>
 static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
 {
-    lv_32fc_t dotProduct = lv_cmake(0, 0);
+    lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
     lv_32fc_t tmp32_1, tmp32_2;
     const unsigned int avx_iters = num_points / 4;
     int n_vec;
@@ -401,13 +401,14 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
             acc[n_vec] = _mm256_setzero_ps();
-            result[n_vec] = lv_cmake(0, 0);
+            result[n_vec] = lv_cmake(0.0f, 0.0f);
         }
 
     // phase rotation registers
     __m256 a, four_phase_acc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z;
 
-    __attribute__((aligned(32))) lv_32fc_t four_phase_inc[4];
+    __VOLK_ATTR_ALIGNED(32)
+    lv_32fc_t four_phase_inc[4];
     const lv_32fc_t phase_inc2 = phase_inc * phase_inc;
     const lv_32fc_t phase_inc3 = phase_inc2 * phase_inc;
     const lv_32fc_t phase_inc4 = phase_inc3 * phase_inc;
@@ -417,7 +418,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
     four_phase_inc[3] = phase_inc4;
     const __m256 four_phase_inc_reg = _mm256_load_ps((float*)four_phase_inc);
 
-    __attribute__((aligned(32))) lv_32fc_t four_phase_acc[4];
+    __VOLK_ATTR_ALIGNED(32)
+    lv_32fc_t four_phase_acc[4];
     four_phase_acc[0] = _phase;
     four_phase_acc[1] = _phase * phase_inc;
     four_phase_acc[2] = _phase * phase_inc2;
@@ -472,7 +474,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
             _mm256_store_ps((float*)dotProductVector, acc[n_vec]);  // Store the results back into the dot product vector
-            dotProduct = lv_cmake(0, 0);
+            dotProduct = lv_cmake(0.0f, 0.0f);
             for (i = 0; i < 4; ++i)
                 {
                     dotProduct = dotProduct + dotProductVector[i];
@@ -510,7 +512,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
 #include <immintrin.h>
 static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
 {
-    lv_32fc_t dotProduct = lv_cmake(0, 0);
+    lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
     lv_32fc_t tmp32_1, tmp32_2;
     const unsigned int avx_iters = num_points / 4;
     int n_vec;
@@ -529,7 +531,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
             acc[n_vec] = _mm256_setzero_ps();
-            result[n_vec] = lv_cmake(0, 0);
+            result[n_vec] = lv_cmake(0.0f, 0.0f);
         }
 
     // phase rotation registers
@@ -602,7 +604,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
     for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
         {
             _mm256_store_ps((float*)dotProductVector, acc[n_vec]);  // Store the results back into the dot product vector
-            dotProduct = lv_cmake(0, 0);
+            dotProduct = lv_cmake(0.0f, 0.0f);
             for (i = 0; i < 4; ++i)
                 {
                     dotProduct = dotProduct + dotProductVector[i];
@@ -655,7 +657,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
 
     if (neon_iters > 0)
         {
-            lv_32fc_t dotProduct = lv_cmake(0, 0);
+            lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
             float32_t arg_phase0 = cargf(_phase);
             float32_t arg_phase_inc = cargf(phase_inc);
             float32_t phase_est;
@@ -760,7 +762,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
             for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
                 {
                     vst2q_f32((float32_t*)dotProductVector, accumulator1[n_vec]);  // Store the results back into the dot product vector
-                    dotProduct = lv_cmake(0, 0);
+                    dotProduct = lv_cmake(0.0f, 0.0f);
                     for (i = 0; i < 4; ++i)
                         {
                             dotProduct = dotProduct + dotProductVector[i];
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
index 68afbf718..2fe6e04ab 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
@@ -145,6 +145,11 @@ macro(OVERRULE_ARCH arch reason)
     list(REMOVE_ITEM available_archs ${arch})
 endmacro()
 
+macro(FORCE_ARCH arch reason)
+    message(STATUS "${reason}, Forced arch ${arch}")
+    list(APPEND available_archs ${arch})
+endmacro()
+
 ########################################################################
 # eliminate AVX on if not on x86, or if the compiler does not accept
 # the xgetbv instruction, or {if not cross-compiling and the xgetbv
@@ -264,13 +269,13 @@ if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86)
     endif()
 
     # MSVC 64 bit does not have MMX, overrule it
-    if(${SIZEOF_CPU} EQUAL 64 AND MSVC)
-        overrule_arch(mmx "No MMX for Win64")
-        if(MSVC_VERSION GREATER 1700)
-            overrule_arch(sse "No SSE for Win64 Visual Studio 2013")
+    if(MSVC)
+        if(${SIZEOF_CPU} EQUAL 64)
+            overrule_arch(mmx "No MMX for Win64")
         endif()
+        force_arch(sse "Built-in for MSVC > 2013")
+        force_arch(sse2 "Built-in for MSVC > 2013")
     endif()
-
 endif()
 
 ########################################################################
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c
index db5b931d1..dca205524 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c
@@ -15,25 +15,10 @@
 #include <string.h>
 // clang-format on
 
-#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
-#define __popcnt __builtin_popcount
-#else
-inline unsigned __popcnt(unsigned num)
-{
-    unsigned pop = 0;
-    while (num)
-        {
-            if (num & 0x1) pop++;
-            num >>= 1;
-        }
-    return pop;
-}
-#endif
-
 int volk_gnsssdr_get_index(
-    const char *impl_names[],  //list of implementations by name
-    const size_t n_impls,      //number of implementations available
-    const char *impl_name      //the implementation name to find
+    const char *impl_names[],  // list of implementations by name
+    const size_t n_impls,      // number of implementations available
+    const char *impl_name      // the implementation name to find
 )
 {
     unsigned int i;
@@ -44,20 +29,20 @@ int volk_gnsssdr_get_index(
                     return i;
                 }
         }
-    //TODO return -1;
-    //something terrible should happen here
+    // TODO return -1;
+    // something terrible should happen here
     fprintf(stderr, "VOLK_GNSSSDR warning: no arch found, returning generic impl\n");
     return volk_gnsssdr_get_index(impl_names, n_impls, "generic");  //but we'll fake it for now
 }
 
 
 int volk_gnsssdr_rank_archs(
-    const char *kern_name,     //name of the kernel to rank
-    const char *impl_names[],  //list of implementations by name
-    const int *impl_deps,      //requirement mask per implementation
-    const bool *alignment,     //alignment status of each implementation
-    size_t n_impls,            //number of implementations available
-    const bool align           //if false, filter aligned implementations
+    const char *kern_name,     // name of the kernel to rank
+    const char *impl_names[],  // list of implementations by name
+    const int *impl_deps,      // requirement mask per implementation
+    const bool *alignment,     // alignment status of each implementation
+    size_t n_impls,            // number of implementations available
+    const bool align           // if false, filter aligned implementations
 )
 {
     size_t i;
@@ -78,7 +63,7 @@ int volk_gnsssdr_rank_archs(
             return volk_gnsssdr_get_index(impl_names, n_impls, "generic");
         }
 
-    //now look for the function name in the prefs list
+    // now look for the function name in the prefs list
     for (i = 0; i < n_arch_prefs; i++)
         {
             if (!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name)))  //found it
@@ -88,14 +73,14 @@ int volk_gnsssdr_rank_archs(
                 }
         }
 
-    //return the best index with the largest deps
+    // return the best index with the largest deps
     size_t best_index_a = 0;
     size_t best_index_u = 0;
     int best_value_a = -1;
     int best_value_u = -1;
     for (i = 0; i < n_impls; i++)
         {
-            const signed val = __popcnt(impl_deps[i]);
+            const signed val = impl_deps[i];
             if (alignment[i] && val > best_value_a)
                 {
                     best_index_a = i;
@@ -108,9 +93,9 @@ int volk_gnsssdr_rank_archs(
                 }
         }
 
-    //when align and we found a best aligned, use it
+    // when align and we found a best aligned, use it
     if (align && best_value_a != -1) return best_index_a;
 
-    //otherwise return the best unaligned
+    // otherwise return the best unaligned
     return best_index_u;
 }