Skip to content
Snippets Groups Projects
Commit dce3c2b4 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

fixes

parent f30d99cb
Branches
Tags
1 merge request!16Pol ispack
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
#endif #endif
#include "sharp_announce.h" #include "sharp_announce.h"
#include "sharp_vecutil.h" #include "sharp_core.h"
static void OpenMP_status(void) static void OpenMP_status(void)
{ {
...@@ -70,7 +70,7 @@ static void MPI_status(void) ...@@ -70,7 +70,7 @@ static void MPI_status(void)
} }
static void vecmath_status(void) static void vecmath_status(void)
{ printf("Supported vector length: %d\n",VLEN); } { printf("Supported vector length: %d\n",sharp_veclen()); }
void sharp_announce (const char *name) void sharp_announce (const char *name)
{ {
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include "sharp_core_inc0.c" #include "sharp_core_inc0.c"
#undef ARCH #undef ARCH
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) #if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
void inner_loop_avx (sharp_job *job, const int *ispair,const double *cth, void inner_loop_avx (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim); const int *mlim);
...@@ -43,7 +43,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth, ...@@ -43,7 +43,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim) const int *mlim)
{ {
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) #if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
__builtin_cpu_init(); __builtin_cpu_init();
if (__builtin_cpu_supports("avx")) if (__builtin_cpu_supports("avx"))
inner_loop_avx (job, ispair, cth, sth, llim, ulim, gen, mi, mlim); inner_loop_avx (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
...@@ -51,3 +51,14 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth, ...@@ -51,3 +51,14 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
#endif #endif
inner_loop_default (job, ispair, cth, sth, llim, ulim, gen, mi, mlim); inner_loop_default (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
} }
int sharp_veclen(void)
{
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
__builtin_cpu_init();
if (__builtin_cpu_supports("avx"))
return 4;
else
#endif
return VLEN;
}
...@@ -43,6 +43,8 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth, ...@@ -43,6 +43,8 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim); const int *mlim);
int sharp_veclen(void);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) #if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
// if we arrive here, we can benefit from an additional AVX version // if we arrive here, we can benefit from an additional AVX version
#warning entering gcc and x86_64 specific code branch // #warning entering gcc and x86_64 specific code branch
#define ARCH _avx #define ARCH _avx
#define __AVX__ //#define __AVX__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx") #pragma GCC target("avx")
#include "sharp_core_inc0.c" #include "sharp_core_inc0.c"
#pragma GCC pop_options #pragma GCC pop_options
#undef __AVX__ //#undef __AVX__
#undef ARCH #undef ARCH
#endif #endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment