Commit 4b034659 authored by Luka Stanisic's avatar Luka Stanisic

rel2: code development

parent 254d53db
......@@ -29,7 +29,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
include_directories(include)
set (BIOEM_ICC_FLAGS "-xHost -O3 -fno-alias -fno-fnalias -unroll -g0 -ipo")
set (BIOEM_ICC_FLAGS "-O3 -fno-alias -fno-fnalias -unroll -g0 -ip")
set (BIOEM_GCC_FLAGS "-O3 -march=native -fweb -mfpmath=sse -frename-registers -minline-all-stringops -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -ggdb")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
......@@ -50,11 +50,6 @@ if (NOT FFTW_FOUND)
endif()
include_directories(${FFTW_INCLUDE_DIRS})
find_package( Boost 1.43 REQUIRED COMPONENTS program_options )
include_directories( ${Boost_INCLUDE_DIRS} )
###Find Optional Packages
###Find CUDA
......@@ -163,7 +158,6 @@ if (FFTWF_LIBRARIES)
else()
target_link_libraries(bioEM -L${FFTW_LIBDIR} -lfftw3 -lfftw3f)
endif()
target_link_libraries(bioEM ${Boost_PROGRAM_OPTIONS_LIBRARY})
if (MPI_FOUND)
target_link_libraries(bioEM ${MPI_LIBRARIES})
......@@ -172,7 +166,6 @@ endif()
###Show Status
message(STATUS "Build Status")
message(STATUS "FFTW library: ${FFTW_LIBDIR}")
message(STATUS "Boost directory: ${Boost_LIBRARY_DIRS}")
message(STATUS "FFTW includedir: ${FFTW_INCLUDEDIR}")
message(STATUS "CUDA libraries: ${CUDA_CUDA_LIBRARY}")
message(STATUS "CUDART libraries: ${CUDA_LIBRARIES}")
......
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2017 Pilar Cossio, Markus Rampp, Luka Stanisic and Gerhard
Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
#include "autotuner.h"
void Autotuner::Reset()
......@@ -16,19 +28,22 @@ void Autotuner::Reset()
fb = 0.;
fx = 0.;
if (algo == 3) workload = 50;
if (algo == 3)
workload = 50;
}
bool Autotuner::Needed(int iteration)
{
if (stopTuning) return false;
if (stopTuning)
return false;
switch (algo)
{
case 1:
case 3:
return iteration % (stable + 1) == stable;
case 2: return (iteration == (int) stable / 2 ) || (iteration == stable);
case 2:
return (iteration == (int) stable / 2) || (iteration == stable);
default: /* Should never happen */;
}
return false;
......@@ -46,10 +61,12 @@ bool Autotuner::Finished()
}
break;
case 2:
if (best_workload != 0) return stopTuning = true;
if (best_workload != 0)
return stopTuning = true;
break;
case 3:
if ((c - b == limit) && (b - a == limit)) return stopTuning = true;
if ((c - b == limit) && (b - a == limit))
return stopTuning = true;
break;
default: /* Should never happen */;
}
......@@ -60,9 +77,15 @@ void Autotuner::Tune(double compTime)
{
switch (algo)
{
case 1: AlgoSimple(compTime); break;
case 2: AlgoRatio(compTime); break;
case 3: AlgoBisection(compTime); break;
case 1:
AlgoSimple(compTime);
break;
case 2:
AlgoRatio(compTime);
break;
case 3:
AlgoBisection(compTime);
break;
default: /* Should never happen */;
}
}
......@@ -121,6 +144,6 @@ void Autotuner::AlgoBisection(double compTime)
c = x;
}
x = (c-b > b-a) ? (int)(b+(c-b)/2) : (int)(a+(b-a+1)/2);
x = (c - b > b - a) ? (int) (b + (c - b) / 2) : (int) (a + (b - a + 1) / 2);
workload = x;
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer.
Copyright (C) 2017 Pilar Cossio, Markus Rampp, Luka Stanisic and Gerhard
Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
......@@ -11,13 +13,19 @@
#ifndef AUTOTUNER_H
#define AUTOTUNER_H
class Autotuner {
class Autotuner
{
public:
Autotuner() {stopTuning = true;}
Autotuner() { stopTuning = true; }
/* Setting variables to initial values */
inline void Initialize(int alg=3, int st=7) {algo = alg; stable=st; Reset(); }
inline void Initialize(int alg = 3, int st = 7)
{
algo = alg;
stable = st;
Reset();
}
/* Resetting variables to initial values */
void Reset();
......@@ -32,7 +40,7 @@ public:
void Tune(double compTime);
/* Return workload value */
inline int Workload() {return workload;}
inline int Workload() { return workload; }
private:
int algo;
......
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer.
Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Luka Stanisic, Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
......@@ -14,10 +15,10 @@
#ifndef BIOEM_H
#define BIOEM_H
#include "defs.h"
#include "bioem.h"
#include "model.h"
#include "defs.h"
#include "map.h"
#include "model.h"
#include "param.h"
class bioem
......@@ -29,31 +30,48 @@ public:
bioem();
virtual ~bioem();
int configure(int ac, char* av[]);
void cleanup(); //Cleanup everything happening during configure
void printOptions(myoption_t *myoptions, int myoptions_length);
int readOptions(int ac, char *av[]);
int configure(int ac, char *av[]);
void cleanup(); // Cleanup everything happening during configure
int precalculate(); // Is it better to pass directly the input File names?
int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal);
inline int needToPrintModel() { return param.printModel; }
int printModel();
int run();
int doProjections(int iMap);
int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t* lproj, myfloat_t* Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC);
virtual int compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0);
virtual void* malloc_device_host(size_t size);
virtual void free_device_host(void* ptr);
virtual void rebalance(int workload); //Rebalance GPUWorkload
void rebalanceWrapper(int workload); //Rebalance wrapper
int createProjection(int iMap, mycomplex_t* map);
int calcross_cor(myfloat_t* localmap, myfloat_t& sum, myfloat_t& sumsquare);
void calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC);
int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t *lproj,
mycomplex_t *localmultFFT, myfloat_t &sumC,
myfloat_t &sumsquareC);
int createConvolutedProjectionMap_noFFT(mycomplex_t *lproj,
myfloat_t *Mapconv,
mycomplex_t *localmultFFT,
myfloat_t &sumC,
myfloat_t &sumsquareC);
virtual int compareRefMaps(int iPipeline, int iOrient, int iConv,
int maxParallelConv, mycomplex_t *localmultFFT,
myparam5_t *comp_params, const int startMap = 0);
virtual void *malloc_device_host(size_t size);
virtual void free_device_host(void *ptr);
virtual void rebalance(int workload); // Rebalance GPUWorkload
void rebalanceWrapper(int workload); // Rebalance wrapper
int createProjection(int iMap, mycomplex_t *map);
int calcross_cor(myfloat_t *localmap, myfloat_t &sum, myfloat_t &sumsquare);
void calculateCCFFT(int iMap, mycomplex_t *localConvFFT,
mycomplex_t *localCCT, myfloat_t *lCC);
void doRefMap_CPU_Parallel(int iRefMap, int iOrient, int iConv,
myfloat_t *lCC, myparam5_t *comp_params,
myblockCPU_t *comp_block);
void doRefMap_CPU_Reduce(int iRefMap, int iOrient, int iConvStart,
int maxParallelConv, myparam5_t *comp_params,
myblockCPU_t *comp_block);
bioem_Probability pProb;
string OutfileName;
bool yesoutfilename;
protected:
virtual int deviceInit();
......@@ -64,16 +82,20 @@ protected:
bioem_model Model;
bioem_RefMap RefMap;
int nReferenceMaps; //Maps in memory at a time
int nReferenceMapsTotal; //Maps in total
int nReferenceMaps; // Maps in memory at a time
int nReferenceMapsTotal; // Maps in total
int nProjectionMaps; //Maps in memory at a time
int nProjectionMapsTotal; //Maps in total
int nProjectionMaps; // Maps in memory at a time
int nProjectionMapsTotal; // Maps in total
int FFTAlgo; //Use the FFT Algorithm (Default 1)
int DebugOutput; //Debug Output Level (Default 2)
int nProjectionsAtOnce; //Number of projections to do at once via OpenMP (Default 1)
bool Autotuning; //Do the autotuning of the load-balancing between CPUs and GPUs
int BioEMAlgo; // BioEM algorithm used to do comparison (Default 1)
int CudaThreadCount; // Number of CUDA threads used in each block (Default
// depends on the BioEM algorithm)
int DebugOutput; // Debug Output Level (Default 0)
int nProjectionsAtOnce; // Number of projections to do at once via OpenMP
// (Default number of OMP threads)
bool Autotuning; // Do the autotuning of the load-balancing between CPUs and
// GPUs (Default 1, if GPUs are used and GPUWORKLOAD is not specified)
};
#endif
......@@ -3,7 +3,8 @@
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
......@@ -16,6 +17,6 @@
#include "bioem.h"
extern bioem* bioem_cuda_create();
extern bioem *bioem_cuda_create();
#endif
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer.
Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Luka Stanisic, Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
......@@ -17,7 +18,7 @@
#include <cuda.h>
#include <cufft.h>
//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway
// Hack to make nvcc compiler accept fftw.h, float128 is not used anyway
#define __float128 double
#include <fftw3.h>
#undef __float128
......@@ -30,10 +31,12 @@ public:
bioem_cuda();
virtual ~bioem_cuda();
virtual int compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0);
virtual void* malloc_device_host(size_t size);
virtual void free_device_host(void* ptr);
virtual void rebalance(int workload); //Rebalance GPUWorkload
virtual int compareRefMaps(int iPipeline, int iOrient, int iConv,
int maxParallelConv, mycomplex_t *localmultFFT,
myparam5_t *comp_params, const int startMap = 0);
virtual void *malloc_device_host(size_t size);
virtual void free_device_host(void *ptr);
virtual void rebalance(int workload); // Rebalance GPUWorkload
protected:
virtual int deviceInit();
......@@ -46,32 +49,39 @@ private:
int deviceInitialized;
cudaStream_t cudaStream[3];
cudaEvent_t cudaEvent[3];
cudaEvent_t cudaFFTEvent[2];
bioem_RefMap_Mod* pRefMap_device_Mod;
bioem_RefMap* gpumap;
bioem_Probability* pProb_host;
cudaStream_t cudaStream[PIPELINE_LVL + 1]; // Streams are used for both
// PIPELINE and MULTISTREAM control
cudaEvent_t cudaEvent[PIPELINE_LVL + 1];
cudaEvent_t cudaFFTEvent[MULTISTREAM_LVL];
bioem_RefMap *gpumap;
bioem_Probability *pProb_host;
bioem_Probability pProb_device;
void* pProb_memory;
myfloat_t* pConvMap_device[2];
void *pProb_memory;
mycomplex_t* pRefMapsFFT;
mycomplex_t* pConvMapFFT;
mycomplex_t* pConvMapFFT_Host;
mycuComplex_t* pFFTtmp2[2];
myfloat_t* pFFTtmp[2];
cufftHandle plan[2][2];
mycomplex_t *pRefMapsFFT;
mycomplex_t *pConvMapFFT;
mycomplex_t *pConvMapFFT_Host;
mycuComplex_t *pFFTtmp2[MULTISTREAM_LVL];
myfloat_t *pFFTtmp[MULTISTREAM_LVL];
cufftHandle plan[SPLIT_MAPS_LVL][MULTISTREAM_LVL];
myfloat_t *maps, *sum, *sumsquare;
myparam5_t *pTmp_comp_params;
int GPUAlgo; //GPU Algorithm to use, 0: parallelize over maps, 1: as 0 but work split in multiple kernels (better), 2: also parallelize over shifts (best)
int GPUAsync; //Run GPU Asynchronously, do the convolutions on the host in parallel.
int GPUDualStream; //Use two streams to improve paralelism
int GPUWorkload; //Percentage of workload to perform on GPU. Default 100. Rest is done on processor in parallel.
myblockGPU_t *pTmp_comp_blocks;
int Ncomp_blocks;
bool *initialized_const; // In order to make sure Constoadd is initialized to
// the first value
myfloat_t *sum, *sumsquare;
int GPUAsync; // Run GPU Asynchronously, do the convolutions on the host in
// parallel.
int GPUDualStream; // Use two streams to improve paralelism
int GPUWorkload; // Percentage of workload to perform on GPU. Default 100.
// Rest is done on processor in parallel.
int maxRef;
};
#endif
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer.
Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Luka Stanisic, Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
......@@ -14,11 +15,22 @@
#ifndef BIOEM_DEFS_H
#define BIOEM_DEFS_H
#define BIOEM_PROB_DOUBLE
//#define BIOEM_USE_DOUBLE
//#define DEBUG
//#define DEBUG_GPU
//#define DEBUG_PROB
//#define PILAR_DEBUG
#ifndef BIOEM_PROB_DOUBLE
typedef float myprob_t;
#define MY_MPI_FLOAT MPI_FLOAT
#else
typedef double myprob_t;
#define MY_MPI_FLOAT MPI_DOUBLE
#endif
#ifndef BIOEM_USE_DOUBLE
#define MIN_PROB -999999.
typedef float myfloat_t;
#define myfftw_malloc fftwf_malloc
#define myfftw_free fftwf_free
......@@ -35,9 +47,9 @@ typedef float myfloat_t;
#define MY_CUFFT_C2R CUFFT_C2R
#define mycufftExecC2R cufftExecC2R
#define mycuComplex_t cuComplex
#define MY_MPI_FLOAT MPI_FLOAT
#else
typedef double myfloat_t;
#define MIN_PROB -999999.
#define myfftw_malloc fftw_malloc
#define myfftw_free fftw_free
#define myfftw_destroy_plan fftw_destroy_plan
......@@ -53,11 +65,10 @@ typedef double myfloat_t;
#define mycufftExecC2R cufftExecZ2D
#define mycuComplex_t cuDoubleComplex
#define MY_CUFFT_C2R CUFFT_Z2D
#define MY_MPI_FLOAT MPI_DOUBLE
#endif
typedef myfloat_t mycomplex_t[2];
#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU
#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 // Possible set to 4 for GPU
struct myfloat3_t
{
......@@ -66,6 +77,52 @@ struct myfloat3_t
// myfloat_t prior;
};
/* myoptions
Structure for saving options, in order to mimic old Boost program_options
behaviour
*/
struct myoption_t
{
const char *name;
int arg;
const char *desc;
bool hidden;
};
/* comp_params
Put all parameters needed for each comparison in a single structure
This makes code cleaner and requires less GPU transfers
*/
struct myparam5_t
{
myfloat_t amp;
myfloat_t pha;
myfloat_t env;
myfloat_t sumC;
myfloat_t sumsquareC;
};
/* comp_block
Put all parameters created by each inside-block comparison
This makes code cleaner
*/
// For GPUs
struct myblockGPU_t
{
myprob_t logpro;
int id;
myprob_t sumExp;
myprob_t sumAngles;
};
// For CPUs (easier to save value as well)
struct myblockCPU_t
{
myprob_t logpro;
int id;
myprob_t sumExp;
myfloat_t value;
};
#ifdef BIOEM_GPUCODE
#define myThreadIdxX threadIdx.x
#define myThreadIdxY threadIdx.y
......@@ -85,44 +142,53 @@ struct myfloat3_t
#define myBlockIdxY 0
#endif
#define CUDA_THREAD_COUNT 256
#define CUDA_BLOCK_COUNT 1024 * 16
#define CUDA_MAX_SHIFT_REDUCE 1024
#define OUTPUT_PRECISION 4
#define CUDA_THREAD_COUNT_ALGO1 256
#define CUDA_THREAD_COUNT_ALGO2 512
#define CUDA_THREAD_MAX 1024
#define CUDA_FFTS_AT_ONCE 1024
//#define BIOEM_USE_NVTX
#define PIPELINE_LVL 2
#define MULTISTREAM_LVL 2
#define SPLIT_MAPS_LVL 2
/* Autotuning
Autotuning algorithms:
1. AlgoSimple = 1; Testing workload values between 100 and 30, all multiples of 5. Taking the value with the best timing.
2. AlgoRatio = 2; Comparisons where GPU handles 100% or only 1% of the workload are timed, and then the optimal workload balance is computed.
3. AlgoBisection = 3; Based on bisection, multiple workload values are tested until the optimal one is found.
1. AlgoSimple = 1; Testing workload values between 100 and 30, all multiples
of 5. Taking the value with the best timing.
2. AlgoRatio = 2; Comparisons where GPU handles 100% or only 1% of the
workload are timed, and then the optimal workload balance is computed.
3. AlgoBisection = 3; Based on bisection, multiple workload values are
tested until the optimal one is found.
*/
#define AUTOTUNING_ALGORITHM 3
/* Recalibrate every X projections. Put to a very high value, i.e., 99999, to de facto disable recalibration */
/* Recalibrate every X projections. Put to a very high value, i.e., 99999, to de
* facto disable recalibration */
#define RECALIB_FACTOR 200
/* After how many comparison iterations, comparison duration becomes stable */
#define FIRST_STABLE 7
static inline void* mallocchk(size_t size)
static inline void *mallocchk(size_t size)
{
void* ptr = malloc(size);
void *ptr = malloc(size);
if (ptr == 0)
{
std::cout << "Memory allocation error\n";
exit(1);
}
return(ptr);
return (ptr);
}
static inline void* reallocchk(void* oldptr, size_t size)
static inline void *reallocchk(void *oldptr, size_t size)
{
void* ptr = realloc(oldptr, size);
void *ptr = realloc(oldptr, size);
if (ptr == 0)
{
std::cout << "Memory allocation error\n";
exit(1);
}
return(ptr);
return (ptr);
}
#ifndef WITH_OPENMP
......
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer.
Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Luka Stanisic, Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany.
Frankfurt Institute for Advanced Studies, Goethe University Frankfurt,
Germany.
Max Planck Computing and Data Facility, Garching, Germany.
Released under the GNU Public License, v3.
......@@ -18,7 +19,6 @@
#include "param.h"
#include <complex>
#include <math.h>
#include <boost/concept_check.hpp>
class bioem_param;
class bioem;
......@@ -36,51 +36,68 @@ public:
void freePointers()
{
if (maps) free(maps);
if (sum_RefMap) free(sum_RefMap);
if (sumsquare_RefMap) free(sumsquare_RefMap);
if (RefMapsFFT) delete[] RefMapsFFT;
if (maps)
free(maps);
if (sum_RefMap)
free(sum_RefMap);
if (sumsquare_RefMap)
free(sumsquare_RefMap);
if (RefMapsFFT)
delete[] RefMapsFFT;
maps = NULL;
sum_RefMap = NULL;
sumsquare_RefMap = NULL;
RefMapsFFT = NULL;
}
int readRefMaps(bioem_param& param, const char* filemap);
int precalculate(bioem_param& param, bioem& bio);
int PreCalculateMapsFFT(bioem_param& param);
int readRefMaps(bioem_param &param, const char *filemap);
int precalculate(bioem_param &param, bioem &bio);
int PreCalculateMapsFFT(bioem_param &param);
int read_int(int *currlong, FILE *fin, int swap);
int read_float(float *currfloat, FILE *fin, int swap);
int read_float_empty (FILE *fin);
int read_char_float (float *currfloat, FILE *fin) ;
int test_mrc (const char *vol_file, int swap);
int read_MRC(const char* filename,bioem_param& param);
int read_float_empty(FILE *fin);
int read_char_float(float *currfloat, FILE *fin);
int test_mrc(const char *vol_file, int swap);
int read_MRC(const char *filename, bioem_param &param);
mycomplex_t* RefMapsFFT;
mycomplex_t *RefMapsFFT;
bool readMRC,readMultMRC;
bool readMRC, readMultMRC;
int ntotRefMap;
int numPixels;
int refMapSize;
myfloat_t* maps;
myfloat_t* sum_RefMap;
myfloat_t* sumsquare_RefMap;
myfloat_t *maps;
myfloat_t *sum_RefMap;
myfloat_t *sumsquare_RefMap;
__host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(maps[map * refMapSize + x * numPixels + y]);}
__host__ __device__ inline const myfloat_t* getp(int map, int x, int y) const {return(&maps[map * refMapSize + x * numPixels]);}
__host__ __device__ inline myfloat_t* getmap(int map) {return(&maps[map * refMapSize]);}
__host__ __device__ inline myfloat_t get(int map, int x, int y) const
{
return (maps[map * refMapSize + x * numPixels + y]);
}
__host__ __device__ inline const myfloat_t *getp(int map, int x, int y) const
{
return (&maps[map * refMapSize + x * numPixels]);
}
__host__ __device__ inline myfloat_t *getmap(int map)
{
return (&maps[map * refMapSize]);
}
};
class bioem_RefMap_Mod : public bioem_RefMap
{
public:
__host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(maps[(x * numPixels + y) * ntotRefMap + map]);}
__host__ __device__ inline myfloat_t get(int map, int x, int y) const
{
return (maps[(x * numPixels + y) * ntotRefMap + map]);
}
void init(const bioem_RefMap& map)
void init(const bioem_RefMap &map)
{
maps = (myfloat_t*) malloc(map.refMapSize * map.ntotRefMap * sizeof(myfloat_t));
#pragma omp parallel for
maps = (myfloat_t *) malloc(map.refMapSize * map.ntotRefMap *
sizeof(myfloat_t));
#pragma omp parallel for
for (int i = 0; i < map.ntotRefMap; i++)
{
for (int j = 0; j < map.numPixels; j++)
......@@ -97,30 +114,22 @@ public:
class bioem_Probability_map
{
public:
myfloat_t Total;
myfloat_t Constoadd;
myprob_t Total;
myprob_t Constoadd;
class bioem_Probability_map_max
{
public:
int max_prob_cent_x, max_prob_cent_y, max_prob_orient, max_prob_conv;
myfloat_t max_prob_norm,max_prob_mu;
myfloat_t max_prob_norm, max_prob_mu;
} max;
};
class bioem_Probability_angle
{
public:
myfloat_t forAngles;
myfloat_t ConstAngle;
myfloat_t priorang;
};
class bioem_Probability_cc
{
public:
myfloat_t forCC;
myfloat_t ConstCC;
myprob_t forAngles;
myprob_t ConstAngle;
};
class bioem_Probability
......@@ -128,34 +137,35 @@ class bioem_Probability
public:
int nMaps;
int nAngles;
int nCC;
__device__ __host__ bioem_Probability_map& getProbMap(int map) {return(ptr_map[map]);}
__device__ __host__ bioem_Probability_angle& getProbAngle(int map, int angle) {return(ptr_angle[angle * nMaps + map]);}
__device__ __host__ bioem_Probability_cc& getProbCC(int map, int cc) {return(ptr_cc[cc * nMaps + map]);}