Commit 4b034659 authored by Luka Stanisic's avatar Luka Stanisic

rel2: code development

parent 254d53db
...@@ -29,7 +29,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") ...@@ -29,7 +29,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
include_directories(include) include_directories(include)
set (BIOEM_ICC_FLAGS "-xHost -O3 -fno-alias -fno-fnalias -unroll -g0 -ipo") set (BIOEM_ICC_FLAGS "-O3 -fno-alias -fno-fnalias -unroll -g0 -ip")
set (BIOEM_GCC_FLAGS "-O3 -march=native -fweb -mfpmath=sse -frename-registers -minline-all-stringops -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -ggdb") set (BIOEM_GCC_FLAGS "-O3 -march=native -fweb -mfpmath=sse -frename-registers -minline-all-stringops -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -ggdb")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
...@@ -50,11 +50,6 @@ if (NOT FFTW_FOUND) ...@@ -50,11 +50,6 @@ if (NOT FFTW_FOUND)
endif() endif()
include_directories(${FFTW_INCLUDE_DIRS}) include_directories(${FFTW_INCLUDE_DIRS})
find_package( Boost 1.43 REQUIRED COMPONENTS program_options )
include_directories( ${Boost_INCLUDE_DIRS} )
###Find Optional Packages ###Find Optional Packages
###Find CUDA ###Find CUDA
...@@ -163,7 +158,6 @@ if (FFTWF_LIBRARIES) ...@@ -163,7 +158,6 @@ if (FFTWF_LIBRARIES)
else() else()
target_link_libraries(bioEM -L${FFTW_LIBDIR} -lfftw3 -lfftw3f) target_link_libraries(bioEM -L${FFTW_LIBDIR} -lfftw3 -lfftw3f)
endif() endif()
target_link_libraries(bioEM ${Boost_PROGRAM_OPTIONS_LIBRARY})
if (MPI_FOUND) if (MPI_FOUND)
target_link_libraries(bioEM ${MPI_LIBRARIES}) target_link_libraries(bioEM ${MPI_LIBRARIES})
...@@ -172,7 +166,6 @@ endif() ...@@ -172,7 +166,6 @@ endif()
###Show Status ###Show Status
message(STATUS "Build Status") message(STATUS "Build Status")
message(STATUS "FFTW library: ${FFTW_LIBDIR}") message(STATUS "FFTW library: ${FFTW_LIBDIR}")
message(STATUS "Boost directory: ${Boost_LIBRARY_DIRS}")
message(STATUS "FFTW includedir: ${FFTW_INCLUDEDIR}") message(STATUS "FFTW includedir: ${FFTW_INCLUDEDIR}")
message(STATUS "CUDA libraries: ${CUDA_CUDA_LIBRARY}") message(STATUS "CUDA libraries: ${CUDA_CUDA_LIBRARY}")
message(STATUS "CUDART libraries: ${CUDA_LIBRARIES}") message(STATUS "CUDART libraries: ${CUDA_LIBRARIES}")
......
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2017 Pilar Cossio, Markus Rampp, Luka Stanisic and Gerhard
Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
#include "autotuner.h" #include "autotuner.h"
void Autotuner::Reset() void Autotuner::Reset()
...@@ -16,64 +28,75 @@ void Autotuner::Reset() ...@@ -16,64 +28,75 @@ void Autotuner::Reset()
fb = 0.; fb = 0.;
fx = 0.; fx = 0.;
if (algo == 3) workload = 50; if (algo == 3)
workload = 50;
} }
bool Autotuner::Needed(int iteration) bool Autotuner::Needed(int iteration)
{ {
if (stopTuning) return false; if (stopTuning)
return false;
switch (algo) switch (algo)
{ {
case 1: case 1:
case 3: case 3:
return iteration % (stable + 1) == stable; return iteration % (stable + 1) == stable;
case 2: return (iteration == (int) stable / 2 ) || (iteration == stable); case 2:
return (iteration == (int) stable / 2) || (iteration == stable);
default: /* Should never happen */; default: /* Should never happen */;
} }
return false; return false;
} }
bool Autotuner::Finished() bool Autotuner::Finished()
{ {
switch (algo) switch (algo)
{ {
case 1: case 1:
if (workload < 30) if (workload < 30)
{ {
workload = best_workload; workload = best_workload;
return stopTuning = true; return stopTuning = true;
} }
break; break;
case 2: case 2:
if (best_workload != 0) return stopTuning = true; if (best_workload != 0)
return stopTuning = true;
break; break;
case 3: case 3:
if ((c - b == limit) && (b - a == limit)) return stopTuning = true; if ((c - b == limit) && (b - a == limit))
return stopTuning = true;
break; break;
default: /* Should never happen */; default: /* Should never happen */;
} }
return false; return false;
} }
void Autotuner::Tune(double compTime) void Autotuner::Tune(double compTime)
{ {
switch (algo) switch (algo)
{ {
case 1: AlgoSimple(compTime); break; case 1:
case 2: AlgoRatio(compTime); break; AlgoSimple(compTime);
case 3: AlgoBisection(compTime); break; break;
case 2:
AlgoRatio(compTime);
break;
case 3:
AlgoBisection(compTime);
break;
default: /* Should never happen */; default: /* Should never happen */;
} }
} }
void Autotuner::AlgoSimple(double compTime) void Autotuner::AlgoSimple(double compTime)
{ {
if (best_time == 0. || compTime < best_time) if (best_time == 0. || compTime < best_time)
{ {
best_time = compTime; best_time = compTime;
best_workload = workload; best_workload = workload;
} }
workload -= 5; workload -= 5;
} }
...@@ -81,46 +104,46 @@ void Autotuner::AlgoSimple(double compTime) ...@@ -81,46 +104,46 @@ void Autotuner::AlgoSimple(double compTime)
void Autotuner::AlgoRatio(double compTime) void Autotuner::AlgoRatio(double compTime)
{ {
if (best_time == 0.) if (best_time == 0.)
{ {
best_time = compTime; best_time = compTime;
workload = 1; workload = 1;
} }
else else
{ {
best_workload = (int) 100 * (compTime / (best_time + compTime)); best_workload = (int) 100 * (compTime / (best_time + compTime));
workload = best_workload; workload = best_workload;
} }
} }
void Autotuner::AlgoBisection(double compTime) void Autotuner::AlgoBisection(double compTime)
{ {
if (fb == 0.) if (fb == 0.)
{ {
fb = compTime; fb = compTime;
x = 75; x = 75;
workload = x; workload = x;
return; return;
} }
fx = compTime; fx = compTime;
if (fx < fb) if (fx < fb)
{ {
if (x < b) if (x < b)
c = b; c = b;
else else
a = b; a = b;
b = x; b = x;
fb = fx; fb = fx;
} }
else else
{ {
if (x < b) if (x < b)
a = x; a = x;
else else
c = x; c = x;
} }
x = (c-b > b-a) ? (int)(b+(c-b)/2) : (int)(a+(b-a+1)/2); x = (c - b > b - a) ? (int) (b + (c - b) / 2) : (int) (a + (b - a + 1) / 2);
workload = x; workload = x;
} }
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images> < BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Copyright (C) 2017 Pilar Cossio, Markus Rampp, Luka Stanisic and Gerhard
Volker Lindenstruth and Gerhard Hummer. Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany. Max Planck Institute of Biophysics, Frankfurt, Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
See license statement for terms of distribution. See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...@@ -11,52 +13,58 @@ ...@@ -11,52 +13,58 @@
#ifndef AUTOTUNER_H #ifndef AUTOTUNER_H
#define AUTOTUNER_H #define AUTOTUNER_H
class Autotuner { class Autotuner
{
public: public:
Autotuner() {stopTuning = true;} Autotuner() { stopTuning = true; }
/* Setting variables to initial values */ /* Setting variables to initial values */
inline void Initialize(int alg=3, int st=7) {algo = alg; stable=st; Reset(); } inline void Initialize(int alg = 3, int st = 7)
{
algo = alg;
stable = st;
Reset();
}
/* Resetting variables to initial values */ /* Resetting variables to initial values */
void Reset(); void Reset();
/* Check if autotuning is needed, depending on which comparison is finished */ /* Check if autotuning is needed, depending on which comparison is finished */
bool Needed(int iteration); bool Needed(int iteration);
/* Check if optimal workload value has been computed */ /* Check if optimal workload value has been computed */
bool Finished(); bool Finished();
/* Set a new workload value to test, depending on the algorithm */ /* Set a new workload value to test, depending on the algorithm */
void Tune(double compTime); void Tune(double compTime);
/* Return workload value */ /* Return workload value */
inline int Workload() {return workload;} inline int Workload() { return workload; }
private: private:
int algo; int algo;
int stable; int stable;
bool stopTuning; bool stopTuning;
int workload; int workload;
/* Variables needed for AlgoSimple and AlgoRatio */ /* Variables needed for AlgoSimple and AlgoRatio */
double best_time; double best_time;
int best_workload; int best_workload;
/* Variables needed for AlgoBisection */ /* Variables needed for AlgoBisection */
int a; int a;
int b; int b;
int c; int c;
int x; int x;
int limit; int limit;
double fb, fx; double fb, fx;
/* Autotuning algorithms */ /* Autotuning algorithms */
void AlgoSimple(double compTime); void AlgoSimple(double compTime);
void AlgoRatio(double compTime); void AlgoRatio(double compTime);
void AlgoBisection(double compTime); void AlgoBisection(double compTime);
}; };
#endif #endif
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images> < BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer. Luka Stanisic, Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany. Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Max Planck Computing and Data Facility, Garching, Germany. Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3. Released under the GNU Public License, v3.
See license statement for terms of distribution. See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...@@ -14,66 +15,87 @@ ...@@ -14,66 +15,87 @@
#ifndef BIOEM_H #ifndef BIOEM_H
#define BIOEM_H #define BIOEM_H
#include "defs.h"
#include "bioem.h" #include "bioem.h"
#include "model.h" #include "defs.h"
#include "map.h" #include "map.h"
#include "model.h"
#include "param.h" #include "param.h"
class bioem class bioem
{ {
friend class bioem_RefMap; friend class bioem_RefMap;
friend class bioem_Probability; friend class bioem_Probability;
public: public:
bioem(); bioem();
virtual ~bioem(); virtual ~bioem();
int configure(int ac, char* av[]); void printOptions(myoption_t *myoptions, int myoptions_length);
void cleanup(); //Cleanup everything happening during configure int readOptions(int ac, char *av[]);
int configure(int ac, char *av[]);
int precalculate(); // Is it better to pass directly the input File names? void cleanup(); // Cleanup everything happening during configure
int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal);
int run(); int precalculate(); // Is it better to pass directly the input File names?
int doProjections(int iMap); inline int needToPrintModel() { return param.printModel; }
int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t* lproj, myfloat_t* Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC); int printModel();
int run();
virtual int compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); int doProjections(int iMap);
int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t *lproj,
virtual void* malloc_device_host(size_t size); mycomplex_t *localmultFFT, myfloat_t &sumC,
virtual void free_device_host(void* ptr); myfloat_t &sumsquareC);
virtual void rebalance(int workload); //Rebalance GPUWorkload int createConvolutedProjectionMap_noFFT(mycomplex_t *lproj,
void rebalanceWrapper(int workload); //Rebalance wrapper myfloat_t *Mapconv,
mycomplex_t *localmultFFT,
int createProjection(int iMap, mycomplex_t* map); myfloat_t &sumC,
int calcross_cor(myfloat_t* localmap, myfloat_t& sum, myfloat_t& sumsquare); myfloat_t &sumsquareC);
void calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC);
virtual int compareRefMaps(int iPipeline, int iOrient, int iConv,
bioem_Probability pProb; int maxParallelConv, mycomplex_t *localmultFFT,
myparam5_t *comp_params, const int startMap = 0);
string OutfileName;
bool yesoutfilename; virtual void *malloc_device_host(size_t size);
virtual void free_device_host(void *ptr);
virtual void rebalance(int workload); // Rebalance GPUWorkload
void rebalanceWrapper(int workload); // Rebalance wrapper
int createProjection(int iMap, mycomplex_t *map);
int calcross_cor(myfloat_t *localmap, myfloat_t &sum, myfloat_t &sumsquare);
void calculateCCFFT(int iMap, mycomplex_t *localConvFFT,
mycomplex_t *localCCT, myfloat_t *lCC);
void doRefMap_CPU_Parallel(int iRefMap, int iOrient, int iConv,
myfloat_t *lCC, myparam5_t *comp_params,
myblockCPU_t *comp_block);
void doRefMap_CPU_Reduce(int iRefMap, int iOrient, int iConvStart,
int maxParallelConv, myparam5_t *comp_params,
myblockCPU_t *comp_block);
bioem_Probability pProb;
string OutfileName;
protected: protected:
virtual int deviceInit(); virtual int deviceInit();
virtual int deviceStartRun(); virtual int deviceStartRun();
virtual int deviceFinishRun(); virtual int deviceFinishRun();
bioem_param param; bioem_param param;
bioem_model Model; bioem_model Model;
bioem_RefMap RefMap; bioem_RefMap RefMap;
int nReferenceMaps; //Maps in memory at a time int nReferenceMaps; // Maps in memory at a time
int nReferenceMapsTotal; //Maps in total int nReferenceMapsTotal; // Maps in total
int nProjectionMaps; //Maps in memory at a time int nProjectionMaps; // Maps in memory at a time
int nProjectionMapsTotal; //Maps in total int nProjectionMapsTotal; // Maps in total
int FFTAlgo; //Use the FFT Algorithm (Default 1) int BioEMAlgo; // BioEM algorithm used to do comparison (Default 1)
int DebugOutput; //Debug Output Level (Default 2) int CudaThreadCount; // Number of CUDA threads used in each block (Default
int nProjectionsAtOnce; //Number of projections to do at once via OpenMP (Default 1) // depends on the BioEM algorithm)
bool Autotuning; //Do the autotuning of the load-balancing between CPUs and GPUs int DebugOutput; // Debug Output Level (Default 0)
int nProjectionsAtOnce; // Number of projections to do at once via OpenMP
// (Default number of OMP threads)
bool Autotuning; // Do the autotuning of the load-balancing between CPUs and
// GPUs (Default 1, if GPUs are used and GPUWORKLOAD is not specified)
}; };
#endif #endif
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images> < BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer. Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany. Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Max Planck Computing and Data Facility, Garching, Germany. Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3. Released under the GNU Public License, v3.
See license statement for terms of distribution. See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...@@ -16,6 +17,6 @@ ...@@ -16,6 +17,6 @@
#include "bioem.h" #include "bioem.h"
extern bioem* bioem_cuda_create(); extern bioem *bioem_cuda_create();
#endif #endif
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images> < BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer. Luka Stanisic, Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany. Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Max Planck Computing and Data Facility, Garching, Germany. Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3. Released under the GNU Public License, v3.
See license statement for terms of distribution. See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...@@ -17,7 +18,7 @@ ...@@ -17,7 +18,7 @@
#include <cuda.h> #include <cuda.h>
#include <cufft.h> #include <cufft.h>
//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway // Hack to make nvcc compiler accept fftw.h, float128 is not used anyway
#define __float128 double #define __float128 double
#include <fftw3.h> #include <fftw3.h>
#undef __float128 #undef __float128
...@@ -27,51 +28,60 @@ ...@@ -27,51 +28,60 @@
class bioem_cuda : public bioem class bioem_cuda : public bioem
{ {
public: public:
bioem_cuda(); bioem_cuda();
virtual ~bioem_cuda(); virtual ~bioem_cuda();
virtual int compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); virtual int compareRefMaps(int iPipeline, int iOrient, int iConv,
virtual void* malloc_device_host(size_t size); int maxParallelConv, mycomplex_t *localmultFFT,
virtual void free_device_host(void* ptr); myparam5_t *comp_params, const int startMap = 0);
virtual void rebalance(int workload); //Rebalance GPUWorkload virtual void *malloc_device_host(size_t size);
virtual void free_device_host(void *ptr);
virtual void rebalance(int workload); // Rebalance GPUWorkload
protected: protected:
virtual int deviceInit(); virtual int deviceInit();
virtual int deviceStartRun(); virtual int deviceStartRun();
virtual int deviceFinishRun(); virtual int deviceFinishRun();
int deviceExit(); int deviceExit();
private: private:
int selectCudaDevice(); int selectCudaDevice();
int deviceInitialized; int deviceInitialized;
cudaStream_t cudaStream[3]; cudaStream_t cudaStream[PIPELINE_LVL + 1]; // Streams are used for both
cudaEvent_t cudaEvent[3]; // PIPELINE and MULTISTREAM control
cudaEvent_t cudaFFTEvent[2]; cudaEvent_t cudaEvent[PIPELINE_LVL + 1];
bioem_RefMap_Mod* pRefMap_device_Mod; cudaEvent_t cudaFFTEvent[MULTISTREAM_LVL];
bioem_RefMap* gpumap; bioem_RefMap *gpumap;
bioem_Probability* pProb_host; bioem_Probability *pProb_host;
bioem_Probability pProb_device; bioem_Probability pProb_device;
void* pProb_memory; void *pProb_memory;
myfloat_t* pConvMap_device[2];
mycomplex_t *pRefMapsFFT;
mycomplex_t* pRefMapsFFT; mycomplex_t *pConvMapFFT;
mycomplex_t* pConvMapFFT; mycomplex_t *pConvMapFFT_Host;
mycomplex_t* pConvMapFFT_Host; mycuComplex_t *pFFTtmp2[MULTISTREAM_LVL];
mycuComplex_t* pFFTtmp2[2]; myfloat_t *pFFTtmp[MULTISTREAM_LVL];
myfloat_t* pFFTtmp[2]; cufftHandle plan[SPLIT_MAPS_LVL][MULTISTREAM_LVL];
cufftHandle plan[2][2];
myparam5_t *pTmp_comp_params;
myfloat_t *maps, *sum, *sumsquare;
myblockGPU_t *pTmp_comp_blocks;
int GPUAlgo; //GPU Algorithm to use, 0: parallelize over maps, 1: as 0 but work split in multiple kernels (better), 2: also parallelize over shifts (best) int Ncomp_blocks;
int GPUAsync; //Run GPU Asynchronously, do the convolutions on the host in parallel.
int GPUDualStream; //Use two streams to improve paralelism bool *initialized_const; // In order to make sure Constoadd is initialized to
int GPUWorkload; //Percentage of workload to perform on GPU. Default 100. Rest is done on processor in parallel. // the first value
int maxRef; myfloat_t *sum, *sumsquare;
int GPUAsync; // Run GPU Asynchronously, do the convolutions on the host in
// parallel.
int GPUDualStream; // Use two streams to improve paralelism
int GPUWorkload; // Percentage of workload to perform on GPU. Default 100.
// Rest is done on processor in parallel.
int maxRef;
}; };
#endif #endif
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images> < BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer. Luka Stanisic, Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany. Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
<