diff --git a/CMakeLists.txt b/CMakeLists.txt index fd731f9a7896cfebf6d49a81eba7bd93c5c78da6..3b077b8d7681ce5beee3bc66b2e03d47206662e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") include_directories(include) -set (BIOEM_ICC_FLAGS "-xHost -O3 -fno-alias -fno-fnalias -unroll -g0 -ipo") +set (BIOEM_ICC_FLAGS "-O3 -fno-alias -fno-fnalias -unroll -g0 -ip") set (BIOEM_GCC_FLAGS "-O3 -march=native -fweb -mfpmath=sse -frename-registers -minline-all-stringops -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -ggdb") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") @@ -50,11 +50,6 @@ if (NOT FFTW_FOUND) endif() include_directories(${FFTW_INCLUDE_DIRS}) -find_package( Boost 1.43 REQUIRED COMPONENTS program_options ) -include_directories( ${Boost_INCLUDE_DIRS} ) - - - ###Find Optional Packages ###Find CUDA @@ -163,7 +158,6 @@ if (FFTWF_LIBRARIES) else() target_link_libraries(bioEM -L${FFTW_LIBDIR} -lfftw3 -lfftw3f) endif() -target_link_libraries(bioEM ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (MPI_FOUND) target_link_libraries(bioEM ${MPI_LIBRARIES}) @@ -172,7 +166,6 @@ endif() ###Show Status message(STATUS "Build Status") message(STATUS "FFTW library: ${FFTW_LIBDIR}") -message(STATUS "Boost directory: ${Boost_LIBRARY_DIRS}") message(STATUS "FFTW includedir: ${FFTW_INCLUDEDIR}") message(STATUS "CUDA libraries: ${CUDA_CUDA_LIBRARY}") message(STATUS "CUDART libraries: ${CUDA_LIBRARIES}") diff --git a/autotuner.cpp b/autotuner.cpp index 125fc6c651579f8c0259a773a1326d410a716273..c31cd427d4ce7e6116e23a9390d39647356609c3 100644 --- a/autotuner.cpp +++ b/autotuner.cpp @@ -1,3 +1,15 @@ +/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + < BioEM software for Bayesian inference of Electron Microscopy images> + Copyright (C) 2017 Pilar Cossio, Markus Rampp, Luka Stanisic and Gerhard + Hummer. + Max Planck Institute of Biophysics, Frankfurt, Germany. + Max Planck Computing and Data Facility, Garching, Germany. + + Released under the GNU Public License, v3. + See license statement for terms of distribution. + + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ + #include "autotuner.h" void Autotuner::Reset() @@ -16,64 +28,75 @@ void Autotuner::Reset() fb = 0.; fx = 0.; - if (algo == 3) workload = 50; + if (algo == 3) + workload = 50; } bool Autotuner::Needed(int iteration) { - if (stopTuning) return false; + if (stopTuning) + return false; switch (algo) - { + { case 1: case 3: return iteration % (stable + 1) == stable; - case 2: return (iteration == (int) stable / 2 ) || (iteration == stable); + case 2: + return (iteration == (int) stable / 2) || (iteration == stable); default: /* Should never happen */; - } + } return false; } bool Autotuner::Finished() { switch (algo) - { + { case 1: if (workload < 30) - { - workload = best_workload; - return stopTuning = true; - } + { + workload = best_workload; + return stopTuning = true; + } break; case 2: - if (best_workload != 0) return stopTuning = true; + if (best_workload != 0) + return stopTuning = true; break; case 3: - if ((c - b == limit) && (b - a == limit)) return stopTuning = true; + if ((c - b == limit) && (b - a == limit)) + return stopTuning = true; break; default: /* Should never happen */; - } + } return false; } void Autotuner::Tune(double compTime) { switch (algo) - { - case 1: AlgoSimple(compTime); break; - case 2: AlgoRatio(compTime); break; - case 3: AlgoBisection(compTime); break; + { + case 1: + AlgoSimple(compTime); + break; + case 2: + AlgoRatio(compTime); + break; + case 3: + AlgoBisection(compTime); + break; default: /* Should never happen */; - } + } } void Autotuner::AlgoSimple(double compTime) { if (best_time == 0. || compTime < best_time) - { - best_time = compTime; - best_workload = workload; - } + { + best_time = compTime; + best_workload = workload; + } workload -= 5; } @@ -81,46 +104,46 @@ void Autotuner::AlgoSimple(double compTime) void Autotuner::AlgoRatio(double compTime) { if (best_time == 0.) - { - best_time = compTime; - workload = 1; - } + { + best_time = compTime; + workload = 1; + } else - { - best_workload = (int) 100 * (compTime / (best_time + compTime)); - workload = best_workload; - } + { + best_workload = (int) 100 * (compTime / (best_time + compTime)); + workload = best_workload; + } } void Autotuner::AlgoBisection(double compTime) { if (fb == 0.) - { - fb = compTime; - x = 75; - workload = x; - return; - } + { + fb = compTime; + x = 75; + workload = x; + return; + } fx = compTime; if (fx < fb) - { - if (x < b) - c = b; - else - a = b; - b = x; - fb = fx; - } + { + if (x < b) + c = b; + else + a = b; + b = x; + fb = fx; + } else - { - if (x < b) - a = x; - else - c = x; - } - - x = (c-b > b-a) ? (int)(b+(c-b)/2) : (int)(a+(b-a+1)/2); + { + if (x < b) + a = x; + else + c = x; + } + + x = (c - b > b - a) ? (int) (b + (c - b) / 2) : (int) (a + (b - a + 1) / 2); workload = x; } diff --git a/bioem.cpp b/bioem.cpp index 8b5d284dd4017df08d3a40dfd01022681bb6b547..1cf3cd02b2b1df89836a110a01fe46eaf7b41b6b 100644 --- a/bioem.cpp +++ b/bioem.cpp @@ -1,414 +1,538 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. - + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ + #ifdef WITH_MPI #include <mpi.h> -#define MPI_CHK(expr) \ - if (expr != MPI_SUCCESS) \ - { \ - fprintf(stderr, "Error in MPI function %s: %d\n", __FILE__, __LINE__); \ - } +#define MPI_CHK(expr) \ + if (expr != MPI_SUCCESS) \ + { \ + fprintf(stderr, "Error in MPI function %s: %d\n", __FILE__, __LINE__); \ + } #endif +#include "MersenneTwister.h" +#include <algorithm> +#include <cmath> #include <fstream> -#include <boost/program_options.hpp> -#include <boost/random/normal_distribution.hpp> -#include <boost/random/uniform_int_distribution.hpp> -#include <boost/random/mersenne_twister.hpp> +#include <getopt.h> #include <iostream> -#include <algorithm> #include <iterator> +#include <queue> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <string> -#include <cmath> +#include <vector> #ifdef WITH_OPENMP #include <omp.h> #endif +#include "autotuner.h" +#include "timer.h" #include <fftw3.h> #include <math.h> -#include "timer.h" -#include "autotuner.h" -#include "param.h" #include "bioem.h" -#include "model.h" #include "map.h" +#include "model.h" +#include "param.h" #ifdef BIOEM_USE_NVTX #include "nvToolsExt.h" -const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff }; -const int num_colors = sizeof(colors)/sizeof(colors[0]); -enum myColor { COLOR_PROJECTION, COLOR_CONVOLUTION, COLOR_COMPARISON, COLOR_WORKLOAD, COLOR_INIT }; +const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, + 0x0000ffff, 0x00ff0000, 0x00ffffff}; +const int num_colors = sizeof(colors) / sizeof(colors[0]); +enum myColor +{ + COLOR_PROJECTION, + COLOR_CONVOLUTION, + COLOR_COMPARISON, + COLOR_WORKLOAD, + COLOR_INIT +}; // Projection number is stored in category attribute // Convolution number is stored in payload attribute -#define cuda_custom_timeslot(name,iMap,iConv,cid) { \ - int color_id = cid; \ - color_id = color_id%num_colors; \ - nvtxEventAttributes_t eventAttrib = {0}; \ - eventAttrib.version = NVTX_VERSION; \ - eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ - eventAttrib.colorType = NVTX_COLOR_ARGB; \ - eventAttrib.color = colors[color_id]; \ - eventAttrib.category = iMap; \ - eventAttrib.payloadType = NVTX_PAYLOAD_TYPE_UNSIGNED_INT64; \ - eventAttrib.payload.llValue = iConv; \ - eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ - eventAttrib.message.ascii = name; \ - nvtxRangePushEx(&eventAttrib); \ +#define cuda_custom_timeslot(name, iMap, iConv, cid) \ + { \ + int color_id = cid; \ + color_id = color_id % num_colors; \ + nvtxEventAttributes_t eventAttrib = {0}; \ + eventAttrib.version = NVTX_VERSION; \ + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ + eventAttrib.colorType = NVTX_COLOR_ARGB; \ + eventAttrib.color = colors[color_id]; \ + eventAttrib.category = iMap; \ + eventAttrib.payloadType = NVTX_PAYLOAD_TYPE_UNSIGNED_INT64; \ + eventAttrib.payload.llValue = iConv; \ + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ + eventAttrib.message.ascii = name; \ + nvtxRangePushEx(&eventAttrib); \ } #define cuda_custom_timeslot_end nvtxRangePop(); #else -#define cuda_custom_timeslot(name,iMap,iConv,cid) +#define cuda_custom_timeslot(name, iMap, iConv, cid) #define cuda_custom_timeslot_end #endif #include "bioem_algorithm.h" -using namespace boost; -namespace po = boost::program_options; -namespace bran= boost::random; - using namespace std; -/* For dvl nodes in hydra with problem in boost - namespace std { - typedef decltype(nullptr) nullptr_t; - }*/ - -// A helper function of Boost -template<class T> -ostream& operator<<(ostream& os, const vector<T>& v) -{ - copy(v.begin(), v.end(), ostream_iterator<T>(os, " ")); - return os; -} - bioem::bioem() { - FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO")); - DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 0 : atoi(getenv("BIOEM_DEBUG_OUTPUT")); - nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE")); + BioEMAlgo = getenv("BIOEM_ALGO") == NULL ? 1 : atoi(getenv("BIOEM_ALGO")); + + DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? + 0 : + atoi(getenv("BIOEM_DEBUG_OUTPUT")); + + if (getenv("BIOEM_PROJ_CONV_AT_ONCE") != NULL) + { + nProjectionsAtOnce = atoi(getenv("BIOEM_PROJ_CONV_AT_ONCE")); + if (BioEMAlgo == 1 && getenv("GPU") && atoi(getenv("GPU")) && + nProjectionsAtOnce > 1) + { + printf("Warning: using parallel convolutions with GPUs can create race " + "condition and lead to inaccurate results. " + "BIOEM_PROJ_CONV_AT_ONCE is going to be set 1.\n"); + nProjectionsAtOnce = 1; + } + } + else if (BioEMAlgo == 1) + nProjectionsAtOnce = 1; + else + nProjectionsAtOnce = + getenv("OMP_NUM_THREADS") == NULL ? 1 : atoi(getenv("OMP_NUM_THREADS")); + + if (getenv("BIOEM_CUDA_THREAD_COUNT") != NULL) + CudaThreadCount = atoi(getenv("BIOEM_CUDA_THREAD_COUNT")); + else if (BioEMAlgo == 1) + CudaThreadCount = CUDA_THREAD_COUNT_ALGO1; + else + CudaThreadCount = CUDA_THREAD_COUNT_ALGO2; + Autotuning = false; } -bioem::~bioem() +bioem::~bioem() {} + +void bioem::printOptions(myoption_t *myoptions, int myoptions_length) { + printf("\nCommand line inputs:\n"); + + // Find longest column width + int maxlen = 0; + for (int i = 0; i < myoptions_length; i++) + { + if (myoptions[i].hidden) + continue; + if (maxlen < strlen(myoptions[i].name)) + maxlen = strlen(myoptions[i].name); + } + + for (int i = 0; i < myoptions_length; i++) + { + if (myoptions[i].hidden) + continue; + printf(" --%-*s", maxlen, myoptions[i].name); + if (myoptions[i].arg == required_argument) + printf(" arg"); + else + printf(" "); + printf(" %s\n", myoptions[i].desc); + } + printf("\n"); } -int bioem::configure(int ac, char* av[]) +int bioem::readOptions(int ac, char *av[]) { - // ************************************************************************************** - // **** Configuration Routine using boost for extracting parameters, models and maps **** - // ************************************************************************************** - // ****** And Precalculating necessary grids, map crosscorrelations and kernels ******** - // ************************************************************************************* - HighResTimer timer; + // *** Inizialzing default variables *** + std::string infile, modelfile, mapfile, Inputanglefile, Inputbestmap; + Model.readPDB = false; + param.param_device.writeAngles = 0; + param.dumpMap = false; + param.loadMap = false; + param.printModel = false; + RefMap.readMRC = false; + RefMap.readMultMRC = false; + param.notuniformangles = false; + OutfileName = "Output_Probabilities"; + + cout << " ++++++++++++ FROM COMMAND LINE +++++++++++\n\n"; + + // Write your options here + myoption_t myoptions[] = { + {"Modelfile", required_argument, "(Mandatory) Name of model file", false}, + {"Particlesfile", required_argument, + "(Mandatory) Name of particle-image file", false}, + {"Inputfile", required_argument, + "(Mandatory) Name of input parameter file", false}, + {"PrintBestCalMap", required_argument, + "(Optional) Only print best calculated map. NO BioEM!", true}, + {"ReadOrientation", required_argument, + "(Optional) Read file name containing orientations", false}, + {"ReadPDB", no_argument, "(Optional) If reading model file in PDB format", + false}, + {"ReadMRC", no_argument, + "(Optional) If reading particle file in MRC format", false}, + {"ReadMultipleMRC", no_argument, "(Optional) If reading Multiple MRCs", + false}, + {"DumpMaps", no_argument, + "(Optional) Dump maps after they were read from particle-image file", + false}, + {"LoadMapDump", no_argument, "(Optional) Read Maps from dump option", + false}, + {"OutputFile", required_argument, + "(Optional) For changing the outputfile name", false}, + {"help", no_argument, "(Optional) Produce help message", false}}; + int myoptions_length = sizeof(myoptions) / sizeof(myoption_t); + + // If not all Mandatory parameters are defined + if ((ac < 2)) + { + printf("Error: Need to specify all mandatory options\n"); + printOptions(myoptions, myoptions_length); + return 1; + } + // Creating options structure for getopt_long() + struct option *long_options = + (option *) calloc((myoptions_length + 1), sizeof(option)); + for (int i = 0; i < myoptions_length; i++) + { + long_options[i].name = myoptions[i].name; + long_options[i].has_arg = myoptions[i].arg; + } - std::string infile, modelfile, mapfile,Inputanglefile,Inputbestmap; - if (mpi_rank == 0) - { - // *** Inizialzing default variables *** - std::string infile, modelfile, mapfile,Inputanglefile,Inputbestmap; - Model.readPDB = false; - param.param_device.writeAngles = false; - param.dumpMap = false; - param.loadMap = false; - RefMap.readMRC = false; - RefMap.readMultMRC = false; - param.notuniformangles=false; - yesoutfilename=false; - - // ************************************************************************************* - cout << " ++++++++++++ FROM COMMAND LINE +++++++++++\n\n"; - // ************************************************************************************* - - // ********************* Command line reading input with BOOST ************************ - - try { - po::options_description desc("Command line inputs"); - desc.add_options() - ("Modelfile", po::value< std::string>() , "(Mandatory) Name of model file") - ("Particlesfile", po::value< std::string>(), "if BioEM (Mandatory) Name of particle-image file") - ("Inputfile", po::value<std::string>(), "if BioEM (Mandatory) Name of input parameter file") - ("PrintBestCalMap", po::value< std::string>(), "(Optional) Only print best calculated map. NO BioEM (!)") - ("ReadOrientation", po::value< std::string>(), "(Optional) Read file name containing orientations") - ("ReadPDB", "(Optional) If reading model file in PDB format") - ("ReadMRC", "(Optional) If reading particle file in MRC format") - ("ReadMultipleMRC", "(Optional) If reading Multiple MRCs") - ("DumpMaps", "(Optional) Dump maps after they were read from particle-image file") - ("LoadMapDump", "(Optional) Read Maps from dump option") - ("OutputFile", po::value< std::string>(), "(Optional) For changing the outputfile name") - ("help", "(Optional) Produce help message") - ; - - - po::positional_options_description p; - p.add("Inputfile", -1); - p.add("Modelfile", -1); - p.add("Particlesfile", -1); - p.add("ReadPDB", -1); - p.add("ReadMRC", -1); - p.add("ReadMultipleMRC", -1); - p.add("ReadOrientation",-1); - p.add("PrintBestCalMap",-1); - p.add("DumpMaps", -1); - p.add("LoadMapDump", -1); - p.add("OutputFile",-1); - - po::variables_map vm; - po::store(po::command_line_parser(ac, av). - options(desc).positional(p).run(), vm); - po::notify(vm); - - if((ac < 4)) { - std::cout << desc << std::endl; - return 1; - } - if (vm.count("help")) { - cout << "Usage: options_description [options]\n"; - cout << desc; - return 1; - } - - if (vm.count("Inputfile")) - { - cout << "Input file is: "; - cout << vm["Inputfile"].as< std::string >() << "\n"; - infile = vm["Inputfile"].as< std::string >(); - } - if (vm.count("Modelfile")) - { - cout << "Model file is: " - << vm["Modelfile"].as< std::string >() << "\n"; - modelfile = vm["Modelfile"].as< std::string >(); - } - if (vm.count("ReadPDB")) - { - cout << "Reading model file in PDB format.\n"; - Model.readPDB = true; - } - if (vm.count("ReadOrientation")) - { - cout << "Reading Orientation from file: " - << vm["ReadOrientation"].as< std::string >() << "\n"; - cout << "Important! if using Quaternions, include \n"; - cout << "QUATERNIONS keyword in INPUT PARAMETER FILE\n"; - cout << "First row in file should be the total number of orientations (int)\n"; - cout << "Euler angle format should be alpha (12.6f) beta (12.6f) gamma (12.6f)\n"; - cout << "Quaternion format q1 (12.6f) q2 (12.6f) q3 (12.6f) q4 (12.6f)\n"; - Inputanglefile = vm["ReadOrientation"].as< std::string >(); - param.notuniformangles=true; - } - if (vm.count("OutputFile")) - { - OutfileName = vm["OutputFile"].as< std::string >(); - cout << "Writing OUTPUT to: " << vm["OutputFile"].as< std::string >() << "\n"; - yesoutfilename=true; - } - if (vm.count("PrintBestCalMap")) - { - cout << "Reading Euler Angles from file: " - << vm["PrintBestCalMap"].as< std::string >() << "\n"; - Inputbestmap = vm["PrintBestCalMap"].as< std::string >(); - param.printModel=true; - } - - if (vm.count("ReadMRC")) - { - cout << "Reading particle file in MRC format.\n"; - RefMap.readMRC=true; - } - - if (vm.count("ReadMultipleMRC")) - { - cout << "Reading Multiple MRCs.\n"; - RefMap.readMultMRC=true; - } - - if (vm.count("DumpMaps")) - { - cout << "Dumping Maps after reading from file.\n"; - param.dumpMap = true; - } - - if (vm.count("LoadMapDump")) - { - cout << "Loading Map dump.\n"; - param.loadMap = true; - } - - if (vm.count("Particlesfile")) - { - cout << "Paricle file is: " - << vm["Particlesfile"].as< std::string >() << "\n"; - mapfile = vm["Particlesfile"].as< std::string >(); - } - } - catch(std::exception& e) - { - cout << e.what() << "\n"; - return 1; - } - - //check for consitency in multiple MRCs - if(RefMap.readMultMRC && not(RefMap.readMRC)) - { - cout << "For Multiple MRCs command --ReadMRC is necesary too"; - exit(1); - } - - if(!Model.readPDB){ - cout << "Note: Reading model in simple text format (not PDB)\n"; - cout << "---- x y z radius density ------- \n"; - } - - if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart(); - // ********************* Reading Parameter Input *************************** - if(!param.printModel){ - // Standard definition for BioEM - param.readParameters(infile.c_str()); - - // ********************* Reading Particle Maps Input ********************** - RefMap.readRefMaps(param, mapfile.c_str()); - - - } else{ - // Reading parameters for only writting down Best projection - - param.forprintBest(Inputbestmap.c_str()); - } - - // ********************* Reading Model Input ****************************** - Model.readModel(param, modelfile.c_str()); - - cout << "**NOTE:: look at file COORDREAD to confirm that the Model coordinates are correct\n"; - - if (DebugOutput >= 2 && mpi_rank == 0) printf("Reading Input Data Time: %f\n", timer.GetCurrentElapsedTime()); - - if(param.param_device.writeCC && mpi_size>1){ - cout << "Exiting::: WRITE CROSS-CORRELATION ONLY VAILD FOR 1 MPI PROCESS\n"; - exit(1); - } + int myopt; + while (1) + { + /* getopt_long stores the option index here. */ + int option_index = 0; + myopt = getopt_long(ac, av, "", long_options, &option_index); + + /* Detect the end of the options. */ + if (myopt == -1) + break; - // Generating Grids of orientations - if(!param.printModel)param.CalculateGridsParam(Inputanglefile.c_str()); + switch (myopt) + { + case 0: +#ifdef DEBUG + printf("option %s", long_options[option_index].name); + if (optarg) + printf(" with arg %s", optarg); + printf("\n"); +#endif + // Here write actions for each option + if (!strcmp(long_options[option_index].name, "help")) + { + cout << "Usage: options_description [options]\n"; + printOptions(myoptions, myoptions_length); + return 1; + } + if (!strcmp(long_options[option_index].name, "Inputfile")) + { + cout << "Input file is: " << optarg << "\n"; + infile = optarg; + } + if (!strcmp(long_options[option_index].name, "Modelfile")) + { + cout << "Model file is: " << optarg << "\n"; + modelfile = optarg; + } + if (!strcmp(long_options[option_index].name, "ReadPDB")) + { + cout << "Reading model file in PDB format.\n"; + Model.readPDB = true; + } + if (!strcmp(long_options[option_index].name, "ReadOrientation")) + { + cout << "Reading Orientation from file: " << optarg << "\n"; + cout << "Important! if using Quaternions, include \n"; + cout << "QUATERNIONS keyword in INPUT PARAMETER FILE\n"; + cout << "First row in file should be the total number of " + "orientations " + "(int)\n"; + cout << "Euler angle format should be alpha (12.6f) beta (12.6f) " + "gamma (12.6f)\n"; + cout << "Quaternion format q1 (12.6f) q2 (12.6f) q3 (12.6f) q4 " + "(12.6f)\n"; + Inputanglefile = optarg; + param.notuniformangles = true; + } + if (!strcmp(long_options[option_index].name, "OutputFile")) + { + cout << "Writing OUTPUT to: " << optarg << "\n"; + OutfileName = optarg; + } + if (!strcmp(long_options[option_index].name, "PrintBestCalMap")) + { + cout << "Reading Best Parameters from file: " << optarg << "\n"; + Inputbestmap = optarg; + param.printModel = true; + } + if (!strcmp(long_options[option_index].name, "ReadMRC")) + { + cout << "Reading particle file in MRC format.\n"; + RefMap.readMRC = true; + } + if (!strcmp(long_options[option_index].name, "ReadMultipleMRC")) + { + cout << "Reading Multiple MRCs.\n"; + RefMap.readMultMRC = true; + } + if (!strcmp(long_options[option_index].name, "DumpMaps")) + { + cout << "Dumping Maps after reading from file.\n"; + param.dumpMap = true; + } + if (!strcmp(long_options[option_index].name, "LoadMapDump")) + { + cout << "Loading Map dump.\n"; + param.loadMap = true; + } + if (!strcmp(long_options[option_index].name, "Particlesfile")) + { + cout << "Particle file is: " << optarg << "\n"; + mapfile = optarg; + } + break; + case '?': + /* getopt_long already printed an error message. */ + printOptions(myoptions, myoptions_length); + return 1; + default: + abort(); } + } + /* Print any remaining command line arguments (not options) and exit */ + if (optind < ac) + { + printf("Error: non-option ARGV-elements: "); + while (optind < ac) + printf("%s ", av[optind++]); + putchar('\n'); + printOptions(myoptions, myoptions_length); + return 1; + } -#ifdef WITH_MPI + // check for consitency in multiple MRCs + if (RefMap.readMultMRC && not(RefMap.readMRC)) + { + cout << "For Multiple MRCs command --ReadMRC is necesary too"; + exit(1); + } + if (!Model.readPDB) + { + cout << "Note: Reading model in simple text format (not PDB)\n"; + cout << "---- x y z radius density ------- \n"; + } + if (DebugOutput >= 2 && mpi_rank == 0) + timer.ResetStart(); - // ********************* MPI inizialization/ Transfer of parameters****************** - if (mpi_size > 1) - { - if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart(); - MPI_Bcast(¶m, sizeof(param), MPI_BYTE, 0, MPI_COMM_WORLD); - //We have to reinitialize all pointers !!!!!!!!!!!! - if (mpi_rank != 0) param.angprior = NULL; + // *** Reading Parameter Input *** + if (!param.printModel) + { + // Standard definition for BioEM + param.readParameters(infile.c_str()); + // *** Reading Particle Maps Input *** + RefMap.readRefMaps(param, mapfile.c_str()); + } + else + { + // Reading parameters for only writting down Best projection + param.forprintBest(Inputbestmap.c_str()); + } - if (mpi_rank != 0)param.angles = (myfloat3_t*) mallocchk(param.nTotGridAngles * sizeof (myfloat3_t)); - MPI_Bcast(param.angles, param.nTotGridAngles * sizeof (myfloat3_t),MPI_BYTE, 0, MPI_COMM_WORLD); + // *** Reading Model Input *** + Model.readModel(param, modelfile.c_str()); -#ifdef DEBUG - for(int n=0;n<param.nTotGridAngles;n++){ - cout << "CHECK: Angle orient " << mpi_rank << " "<< n << " " << param.angles[n].pos[0] << " " << param.angles[n].pos[1] << " " << param.angles[n].pos[2] << " " << param.angles[n].quat4 << " " << "\n";} + cout << "**NOTE:: look at file COORDREAD to confirm that the Model " + "coordinates are correct\n"; -#endif - //****refCtf, CtfParam, angles automatically filled by precalculate function bellow + if (DebugOutput >= 2 && mpi_rank == 0) + printf("Reading Input Data Time: %f\n", timer.GetCurrentElapsedTime()); + + // Generating Grids of orientations + if (!param.printModel) + param.CalculateGridsParam(Inputanglefile.c_str()); + + return (0); +} + +int bioem::configure(int ac, char *av[]) +{ + // ************************************************************************************** + // **** Configuration Routine using getopts for extracting parameters, models + // and maps **** + // ************************************************************************************** + // ****** And Precalculating necessary grids, map crosscorrelations and + // kernels ******** + // ************************************************************************************* + + HighResTimer timer; + + if (mpi_rank == 0 && readOptions(ac, av)) + return 1; + +#ifdef WITH_MPI - MPI_Bcast(&Model, sizeof(Model), MPI_BYTE, 0, MPI_COMM_WORLD); - if (mpi_rank != 0) Model.points = (bioem_model::bioem_model_point*) mallocchk(sizeof(bioem_model::bioem_model_point) * Model.nPointsModel); - MPI_Bcast(Model.points, sizeof(bioem_model::bioem_model_point) * Model.nPointsModel, MPI_BYTE, 0, MPI_COMM_WORLD); + // ********************* MPI inizialization/ Transfer of + // parameters****************** + if (mpi_size > 1) + { + if (DebugOutput >= 2 && mpi_rank == 0) + timer.ResetStart(); + MPI_Bcast(¶m, sizeof(param), MPI_BYTE, 0, MPI_COMM_WORLD); + // We have to reinitialize all pointers !!!!!!!!!!!! + if (mpi_rank != 0) + param.angprior = NULL; - MPI_Bcast(&RefMap, sizeof(RefMap), MPI_BYTE, 0, MPI_COMM_WORLD); - if (mpi_rank != 0) RefMap.maps = (myfloat_t*) mallocchk(RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap); - MPI_Bcast(RefMap.maps, RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap, MPI_BYTE, 0, MPI_COMM_WORLD); - if (DebugOutput >= 2 && mpi_rank == 0) printf("MPI Broadcast of Input Data %f\n", timer.GetCurrentElapsedTime()); + if (mpi_rank != 0) + param.angles = + (myfloat3_t *) mallocchk(param.nTotGridAngles * sizeof(myfloat3_t)); + MPI_Bcast(param.angles, param.nTotGridAngles * sizeof(myfloat3_t), MPI_BYTE, + 0, MPI_COMM_WORLD); +#ifdef DEBUG + for (int n = 0; n < param.nTotGridAngles; n++) + { + cout << "CHECK: Angle orient " << mpi_rank << " " << n << " " + << param.angles[n].pos[0] << " " << param.angles[n].pos[1] << " " + << param.angles[n].pos[2] << " " << param.angles[n].quat4 << " " + << "\n"; } + +#endif + //****refCtf, CtfParam, angles automatically filled by precalculate function + // bellow + + MPI_Bcast(&Model, sizeof(Model), MPI_BYTE, 0, MPI_COMM_WORLD); + if (mpi_rank != 0) + Model.points = (bioem_model::bioem_model_point *) mallocchk( + sizeof(bioem_model::bioem_model_point) * Model.nPointsModel); + MPI_Bcast(Model.points, + sizeof(bioem_model::bioem_model_point) * Model.nPointsModel, + MPI_BYTE, 0, MPI_COMM_WORLD); + + MPI_Bcast(&RefMap, sizeof(RefMap), MPI_BYTE, 0, MPI_COMM_WORLD); + if (mpi_rank != 0) + RefMap.maps = (myfloat_t *) mallocchk( + RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap); + MPI_Bcast(RefMap.maps, + RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap, + MPI_BYTE, 0, MPI_COMM_WORLD); + if (DebugOutput >= 2 && mpi_rank == 0) + printf("MPI Broadcast of Input Data %f\n", timer.GetCurrentElapsedTime()); + } #endif // ****************** Precalculating Necessary Stuff ********************* - if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart(); + if (DebugOutput >= 2 && mpi_rank == 0) + timer.ResetStart(); param.PrepareFFTs(); if (DebugOutput >= 2 && mpi_rank == 0) - { - printf("Time Prepare FFTs %f\n", timer.GetCurrentElapsedTime()); - timer.ResetStart(); - } + { + printf("Time Prepare FFTs %f\n", timer.GetCurrentElapsedTime()); + timer.ResetStart(); + } precalculate(); // ****************** For debugging ********************* if (getenv("BIOEM_DEBUG_BREAK")) - { - const int cut = atoi(getenv("BIOEM_DEBUG_BREAK")); - if (param.nTotGridAngles > cut) param.nTotGridAngles = cut; - if (param.nTotCTFs > cut) param.nTotCTFs = cut; - } + { + const int cut = atoi(getenv("BIOEM_DEBUG_BREAK")); + if (param.nTotGridAngles > cut) + param.nTotGridAngles = cut; + if (param.nTotCTFs > cut) + param.nTotCTFs = cut; + } if (DebugOutput >= 2 && mpi_rank == 0) - { - printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime()); - timer.ResetStart(); - } + { + printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime()); + timer.ResetStart(); + } + + // Number of parallel Convolutions and Comparisons + param.nTotParallelConv = min(param.nTotCTFs, nProjectionsAtOnce); // ****************** For autotuning ********************** - if ((getenv("GPU") && atoi(getenv("GPU"))) && ((!getenv("GPUWORKLOAD") || (atoi(getenv("GPUWORKLOAD")) == -1))) && (!getenv("BIOEM_DEBUG_BREAK") || (atoi(getenv("BIOEM_DEBUG_BREAK")) > FIRST_STABLE))) - { - Autotuning = true; - if (mpi_rank == 0) printf("Autotuning of GPUWorkload enabled:\n\tAlgorithm %d\n\tRecalibration at every %d projections\n\tComparisons are considered stable after first %d comparisons\n", AUTOTUNING_ALGORITHM, RECALIB_FACTOR, FIRST_STABLE); - } + if ((getenv("GPU") && atoi(getenv("GPU"))) && (BioEMAlgo == 1) && + ((!getenv("GPUWORKLOAD") || (atoi(getenv("GPUWORKLOAD")) == -1))) && + (!getenv("BIOEM_DEBUG_BREAK") || + (atoi(getenv("BIOEM_DEBUG_BREAK")) > FIRST_STABLE))) + { + Autotuning = true; + if (mpi_rank == 0) + printf("Autotuning of GPUWorkload enabled:\n\tAlgorithm " + "%d\n\tRecalibration at every %d projections\n\tComparisons are " + "considered stable after first %d comparisons\n", + AUTOTUNING_ALGORITHM, RECALIB_FACTOR, FIRST_STABLE); + } else + { + Autotuning = false; + if (mpi_rank == 0) { - Autotuning = false; - if (mpi_rank == 0) printf("Autotuning of GPUWorkload disabled\n"); + printf("Autotuning of GPUWorkload disabled"); + if (getenv("GPU") && atoi(getenv("GPU"))) + printf(", using GPUWorkload: %d%%\n", + (getenv("GPUWORKLOAD") && (atoi(getenv("GPUWORKLOAD")) != -1)) ? + atoi(getenv("GPUWORKLOAD")) : + 100); + else + printf(", please enable GPUs\n"); } + } // ****************** Initializing pointers ********************* deviceInit(); if (DebugOutput >= 2 && mpi_rank == 0) - { - printf("Time Device Init %f\n", timer.GetCurrentElapsedTime()); - timer.ResetStart(); - } + { + printf("Time Device Init %f\n", timer.GetCurrentElapsedTime()); + timer.ResetStart(); + } - if(!param.printModel)pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, *this); + if (!param.printModel) + pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, *this); if (DebugOutput >= 2 && mpi_rank == 0) - { - printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime()); - timer.ResetStart(); - } + { + printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime()); + timer.ResetStart(); + } - return(0); + return (0); } void bioem::cleanup() { - //Deleting allocated pointers + // Deleting allocated pointers free_device_host(pProb.ptr); RefMap.freePointers(); } @@ -416,723 +540,1019 @@ void bioem::cleanup() int bioem::precalculate() { // ************************************************************************************** - // **Precalculating Routine of Orientation grids, Map crosscorrelations and CTF Kernels** + // **Precalculating Routine of Orientation grids, Map crosscorrelations and + // CTF Kernels** // ************************************************************************************** HighResTimer timer; - if (DebugOutput >= 3) - { - printf("\tTime Precalculate Grids Param: %f\n", timer.GetCurrentElapsedTime()); - timer.ResetStart(); - } + if (DebugOutput >= 2) + { + printf("\tTime Precalculate Grids Param: %f\n", + timer.GetCurrentElapsedTime()); + timer.ResetStart(); + } // Precalculating CTF Kernels stored in class Param param.CalculateRefCTF(); - if (DebugOutput >= 3) - { - printf("\tTime Precalculate CTFs: %f\n", timer.GetCurrentElapsedTime()); - timer.ResetStart(); - } - //Precalculate Maps - if(!param.printModel) RefMap.precalculate(param, *this); - if (DebugOutput >= 3) printf("\tTime Precalculate Maps: %f\n", timer.GetCurrentElapsedTime()); + if (DebugOutput >= 2) + { + printf("\tTime Precalculate CTFs: %f\n", timer.GetCurrentElapsedTime()); + timer.ResetStart(); + } + // Precalculate Maps + if (!param.printModel) + RefMap.precalculate(param, *this); + if (DebugOutput >= 2) + printf("\tTime Precalculate Maps: %f\n", timer.GetCurrentElapsedTime()); - return(0); + return (0); } -int bioem::run() +int bioem::printModel() { - // ************************************************************************************** - // ********** Secondary routine for printing out the only best projection *************** + // ********** Secondary routine for printing out the only best projection + // *************** // ************************************************************************************** - if(mpi_rank == 0 && param.printModel){ //Only works for 1 MPI process (not parallelized) - - cout << "\nAnalysis for printing best projection::: \n \n" ; - mycomplex_t* proj_mapsFFT; - myfloat_t* conv_map = NULL; - mycomplex_t* conv_mapFFT; - myfloat_t sumCONV, sumsquareCONV; + cout << "\nAnalysis for printing best projection::: \n \n"; + mycomplex_t *proj_mapsFFT; + myfloat_t *conv_map = NULL; + mycomplex_t *conv_mapFFT; + myfloat_t sumCONV, sumsquareCONV; - proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); - conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); - conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels); + proj_mapsFFT = (mycomplex_t *) myfftw_malloc( + sizeof(mycomplex_t) * param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D); + conv_mapFFT = (mycomplex_t *) myfftw_malloc( + sizeof(mycomplex_t) * param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D); + conv_map = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * + param.param_device.NumberPixels * + param.param_device.NumberPixels); - cout << "...... Calculating Projection .......................\n " ; + cout << "...... Calculating Projection .......................\n "; - createProjection(0, proj_mapsFFT); + createProjection(0, proj_mapsFFT); - cout << "...... Calculating Convolution .......................\n " ; + cout << "...... Calculating Convolution .......................\n "; - createConvolutedProjectionMap(0, 0, proj_mapsFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); + createConvolutedProjectionMap_noFFT(proj_mapsFFT, conv_map, conv_mapFFT, + sumCONV, sumsquareCONV); - } + return (0); +} +int bioem::run() +{ // ************************************************************************************** - // **** Main BioEM routine, projects, convolutes and compares with Map using OpenMP **** + // **** Main BioEM routine, projects, convolutes and compares with Map using + // OpenMP **** // ************************************************************************************** - // **** If we want to control the number of threads -> omp_set_num_threads(XX); ****** - // ****************** Declarying class of Probability Pointer ************************* + // **** If we want to control the number of threads -> + // omp_set_num_threads(XX); ****** + // ****************** Declarying class of Probability Pointer + // ************************* cuda_custom_timeslot("Initialization", -1, -1, COLOR_INIT); - if (mpi_rank == 0) printf("\tInitializing Probabilities\n"); + if (mpi_rank == 0) + printf("\tInitializing Probabilities\n"); // Contros for MPI - if(mpi_size > param.nTotGridAngles){ - cout << "EXIT: Wrong MPI setup More MPI processes than orientations\n"; exit(1); + if (mpi_size > param.nTotGridAngles) + { + cout << "EXIT: Wrong MPI setup More MPI processes than orientations\n"; + exit(1); } // Inizialzing Probabilites to zero and constant to -Infinity - for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++) - { - bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); + for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap++) + { + bioem_Probability_map &pProbMap = pProb.getProbMap(iRefMap); - pProbMap.Total = 0.0; - pProbMap.Constoadd = -FLT_MAX; //Problem if using double presicion + pProbMap.Total = 0.0; + pProbMap.Constoadd = MIN_PROB; - if (param.param_device.writeAngles) - { - for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient ++) - { - bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); - - pProbAngle.forAngles = 0.0; - pProbAngle.ConstAngle = -FLT_MAX; - } - } - - if (param.param_device.writeCC) - { int cc=0; - for (int cent_x = 0; cent_x < param.param_device.NumberPixels; cent_x = cent_x + param.param_device.CCdisplace) - { - for (int cent_y = 0; cent_y < param.param_device.NumberPixels; cent_y = cent_y + param.param_device.CCdisplace) - { - bioem_Probability_cc& pProbCC = pProb.getProbCC(iRefMap, cc); - //Debuggin:: cout << iRefMap << " " << cc << " " << cent_x << " " << cent_y << "\n"; - - if(!param.param_device.CCwithBayes) { - pProbCC.forCC=-FLT_MAX; - }else { - pProbCC.forCC = 0.0; - pProbCC.ConstCC=-FLT_MAX; - } - cc++; - } - } - if(!FFTAlgo){cout << "Cross correlation calculation must be with enviormental variable FFTALGO=1\n"; exit(1);} - } - } + if (param.param_device.writeAngles) + { + for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++) + { + bioem_Probability_angle &pProbAngle = + pProb.getProbAngle(iRefMap, iOrient); - if(!FFTAlgo){cout << "Remark: Not using FFT algorithm. Not using Prior in B-Env.";} + pProbAngle.forAngles = 0.0; + pProbAngle.ConstAngle = MIN_PROB; + } + } + } // ************************************************************************************** deviceStartRun(); - // ******************************** MAIN CYCLE ****************************************** - - mycomplex_t* proj_mapsFFT; - myfloat_t* conv_map = NULL; - mycomplex_t* conv_mapFFT; - myfloat_t sumCONV, sumsquareCONV; - - //allocating fftw_complex vector - const int ProjMapSize = (param.FFTMapSize + 64) & ~63; //Make sure this is properly aligned for fftw..., Actually this should be ensureb by using FFTMapSize, but it is not due to a bug in CUFFT which cannot handle padding properly - //******** Alocating Vectors ************* - proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce); - conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); - if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels); - - cuda_custom_timeslot_end; //Ending initialization + // ******************************** MAIN CYCLE + // ****************************************** + + mycomplex_t *proj_mapsFFT; + mycomplex_t *conv_mapsFFT; + myparam5_t *comp_params = + new myparam5_t[param.nTotParallelConv * PIPELINE_LVL]; + int iPipeline = 0; + + // allocating fftw_complex vector + const int ProjMapSize = + (param.FFTMapSize + 64) & ~63; // Make sure this is properly aligned for + // fftw..., Actually this should be ensureb by + // using FFTMapSize, but it is not due to a bug + // in CUFFT which cannot handle padding properly + //******** Allocating Vectors ************* + proj_mapsFFT = (mycomplex_t *) myfftw_malloc( + sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce); + conv_mapsFFT = + (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.FFTMapSize * + param.nTotParallelConv * PIPELINE_LVL); + + cuda_custom_timeslot_end; // Ending initialization HighResTimer timer, timer2; /* Autotuning */ Autotuner aut; if (Autotuning) - { - aut.Initialize(AUTOTUNING_ALGORITHM, FIRST_STABLE); - rebalanceWrapper(aut.Workload()); - } - - if (DebugOutput >= 1 && mpi_rank == 0) printf("\tMain Loop GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)², Pixels %d², OMP Threads %d, MPI Ranks %d\n", param.nTotGridAngles, param.nTotCTFs, RefMap.ntotRefMap, 2 * param.param_device.maxDisplaceCenter + param.param_device.GridSpaceCenter, param.param_device.GridSpaceCenter, param.param_device.NumberPixels, omp_get_max_threads(), mpi_size); - - + { + aut.Initialize(AUTOTUNING_ALGORITHM, FIRST_STABLE); + rebalanceWrapper(aut.Workload()); + } - const int iOrientStart = (int) ((long long int) mpi_rank * param.nTotGridAngles / mpi_size); - int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size); - if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles; + if (DebugOutput >= 1 && mpi_rank == 0) + printf("\tMain Loop GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)², " + "Pixels %d², OMP Threads %d, MPI Ranks %d\n", + param.nTotGridAngles, param.nTotCTFs, RefMap.ntotRefMap, + 2 * param.param_device.maxDisplaceCenter + + param.param_device.GridSpaceCenter, + param.param_device.GridSpaceCenter, param.param_device.NumberPixels, + omp_get_max_threads(), mpi_size); + + const int iOrientStart = + (int) ((long long int) mpi_rank * param.nTotGridAngles / mpi_size); + int iOrientEnd = + (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size); + if (iOrientEnd > param.nTotGridAngles) + iOrientEnd = param.nTotGridAngles; /* Vectors for computing statistic on different parts of the code */ TimeStat ts((iOrientEnd - iOrientStart), param.nTotCTFs); - if (DebugOutput >= 1) ts.InitTimeStat(4); + if (DebugOutput >= 1) + ts.InitTimeStat(4); - // **************************Loop Over orientations*************************************** + // **************************Loop Over + // orientations*************************************** - for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; iOrientAtOnce += nProjectionsAtOnce) + for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; + iOrientAtOnce += nProjectionsAtOnce) + { + // *************************************************************************************** + // ***** Creating Projection for given orientation and transforming to + // Fourier space ***** + if (DebugOutput >= 1) { - // *************************************************************************************** - // ***** Creating Projection for given orientation and transforming to Fourier space ***** - if (DebugOutput >= 1) - { - timer2.ResetStart(); - timer.ResetStart(); - } - int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce); + timer2.ResetStart(); + timer.ResetStart(); + } + int iOrientEndAtOnce = + std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce); - // **************************Parallel orientations for projections at once*************** +// **************************Parallel orientations for projections at +// once*************** #pragma omp parallel for - for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++) - { - createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]); - } + for (int iOrient = iOrientAtOnce; iOrient < iOrientEndAtOnce; iOrient++) + { + createProjection(iOrient, + &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]); + } + if (DebugOutput >= 1) + { + ts.time = timer.GetCurrentElapsedTime(); + ts.Add(TS_PROJECTION); + if (DebugOutput >= 2) + printf("\tTime Projection %d-%d: %f (rank %d)\n", iOrientAtOnce, + iOrientEndAtOnce - 1, ts.time, mpi_rank); + } + /* Recalibrate if needed */ + if (Autotuning && ((iOrientAtOnce - iOrientStart) % RECALIB_FACTOR == 0) && + ((iOrientEnd - iOrientAtOnce) > RECALIB_FACTOR) && + (iOrientAtOnce != iOrientStart)) + { + aut.Reset(); + rebalanceWrapper(aut.Workload()); + } + + for (int iOrient = iOrientAtOnce; iOrient < iOrientEndAtOnce; iOrient++) + { + mycomplex_t *proj_mapFFT = + &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]; + + // *************************************************************************************** + // ***** **** Internal Loop over PSF/CTF convolutions **** ***** + for (int iConvAtOnce = 0; iConvAtOnce < param.nTotCTFs; + iConvAtOnce += param.nTotParallelConv) + { + if (DebugOutput >= 1) + timer.ResetStart(); + int iConvEndAtOnce = + std::min(param.nTotCTFs, iConvAtOnce + param.nTotParallelConv); + // Total number of convolutions that can be treated in this iteration in + // parallel + int maxParallelConv = iConvEndAtOnce - iConvAtOnce; +#pragma omp parallel for + for (int iConv = iConvAtOnce; iConv < iConvEndAtOnce; iConv++) + { + // *** Calculating convolutions of projection map and + // crosscorrelations *** + int i = + (iPipeline & 1) * param.nTotParallelConv + (iConv - iConvAtOnce); + mycomplex_t *localmultFFT = &conv_mapsFFT[i * param.FFTMapSize]; + + createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, + localmultFFT, comp_params[i].sumC, + comp_params[i].sumsquareC); + + comp_params[i].amp = param.CtfParam[iConv].pos[0]; + comp_params[i].pha = param.CtfParam[iConv].pos[1]; + comp_params[i].env = param.CtfParam[iConv].pos[2]; + } + if (DebugOutput >= 1) + { + ts.time = timer.GetCurrentElapsedTime(); + ts.Add(TS_CONVOLUTION); + if (DebugOutput >= 2) + printf("\t\tTime Convolution %d %d-%d: %f (rank %d)\n", iOrient, + iConvAtOnce, iConvEndAtOnce - 1, ts.time, mpi_rank); + } + + // ******************Internal loop over Reference images CUDA or + // OpenMP****************** + // *** Comparing each calculated convoluted map with all experimental + // maps *** + ts.time = 0.; + if ((DebugOutput >= 1) || (Autotuning && aut.Needed(iConvAtOnce))) + timer.ResetStart(); + compareRefMaps(iPipeline++, iOrient, iConvAtOnce, maxParallelConv, + conv_mapsFFT, comp_params); + if (DebugOutput >= 1) + { + ts.time = timer.GetCurrentElapsedTime(); + ts.Add(TS_COMPARISON); + } + if (DebugOutput >= 2) + { + if (Autotuning) + printf("\t\tTime Comparison %d %d-%d: %f sec with GPU workload " + "%d%% (rank %d)\n", + iOrient, iConvAtOnce, iConvEndAtOnce - 1, ts.time, + aut.Workload(), mpi_rank); + else + printf("\t\tTime Comparison %d %d-%d: %f sec (rank %d)\n", iOrient, + iConvAtOnce, iConvEndAtOnce - 1, ts.time, mpi_rank); + } + if (Autotuning && aut.Needed(iConvAtOnce)) + { + if (ts.time == 0.) + ts.time = timer.GetCurrentElapsedTime(); + aut.Tune(ts.time); + if (aut.Finished() && DebugOutput >= 1) + printf("\tOptimal GPU workload %d%% (rank %d)\n", aut.Workload(), + mpi_rank); + rebalanceWrapper(aut.Workload()); + } + } if (DebugOutput >= 1) - { - ts.time = timer.GetCurrentElapsedTime(); - ts.Add(TS_PROJECTION); - if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, ts.time, mpi_rank); - } - /* Recalibrate if needed */ - if (Autotuning && ((iOrientAtOnce - iOrientStart) % RECALIB_FACTOR == 0) && ((iOrientEnd - iOrientAtOnce) > RECALIB_FACTOR) && (iOrientAtOnce != iOrientStart)) - { - aut.Reset(); - rebalanceWrapper(aut.Workload()); - } - - for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++) - { - mycomplex_t* proj_mapFFT = &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]; - - // *************************************************************************************** - // ***** **** Internal Loop over PSF/CTF convolutions **** ***** - - for (int iConv = 0; iConv < param.nTotCTFs; iConv++) - { - // *** Calculating convolutions of projection map and crosscorrelations *** - if (DebugOutput >= 1) timer.ResetStart(); - createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); - if (DebugOutput >= 1) - { - ts.time = timer.GetCurrentElapsedTime(); - ts.Add(TS_CONVOLUTION); - if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, ts.time, mpi_rank); - } - - if ((DebugOutput >= 1) || (Autotuning && aut.Needed(iConv))) timer.ResetStart(); - myfloat_t amp,pha,env; - - amp=param.CtfParam[iConv].pos[0]; - pha=param.CtfParam[iConv].pos[1]; - env=param.CtfParam[iConv].pos[2]; - - // ******************Internal loop over Reference images CUDA or OpenMP****************** - // *** Comparing each calculated convoluted map with all experimental maps *** - - compareRefMaps(iOrient, iConv, amp, pha, env, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); - - ts.time = 0.; - if (DebugOutput >= 1) - { - ts.time = timer.GetCurrentElapsedTime(); - ts.Add(TS_COMPARISON); - } - if (DebugOutput >= 2) - { - const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; - const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts * - (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / ts.time; - const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts * - (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / ts.time; - const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / ts.time; - - if (Autotuning) printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s, with GPU workload %d%%) (rank %d)\n", iOrient, iConv, ts.time, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., aut.Workload(), mpi_rank); - else printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, ts.time, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank); - } - if (Autotuning && aut.Needed(iConv)) - { - if (ts.time == 0.) ts.time = timer.GetCurrentElapsedTime(); - aut.Tune(ts.time); - if (aut.Finished() && DebugOutput >= 1) printf("\tOptimal GPU workload %d%% (rank %d)\n", aut.Workload(), mpi_rank); - rebalanceWrapper(aut.Workload()); - } - } - if (DebugOutput >= 1) - { - ts.time = timer2.GetCurrentElapsedTime(); - ts.Add(TS_TPROJECTION); - printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, ts.time, mpi_rank); - timer2.ResetStart(); - } - } + { + ts.time = timer2.GetCurrentElapsedTime(); + ts.Add(TS_TPROJECTION); + printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, + ts.time, mpi_rank); + timer2.ResetStart(); + } } + } /* Statistical summary on different parts of the code */ if (DebugOutput >= 1) - { - ts.PrintTimeStat(mpi_rank); - ts.EmptyTimeStat(); - } + { + ts.PrintTimeStat(mpi_rank); + ts.EmptyTimeStat(); + } - //deallocating fftw_complex vector + // deallocating fftw_complex vector myfftw_free(proj_mapsFFT); - myfftw_free(conv_mapFFT); - if (!FFTAlgo) myfftw_free(conv_map); + myfftw_free(conv_mapsFFT); deviceFinishRun(); - - - // ************* Collecing all the probabilities from MPI replicas *************** +// ******************************************************************************* +// ************* Collecing all the probabilities from MPI replicas +// *************** #ifdef WITH_MPI if (mpi_size > 1) + { + if (DebugOutput >= 1 && mpi_rank == 0) + timer.ResetStart(); + // Reduce Constant and summarize probabilities { - if (DebugOutput >= 1 && mpi_rank == 0) timer.ResetStart(); - //Reduce Constant and summarize probabilities + myprob_t *tmp1 = new myprob_t[RefMap.ntotRefMap]; + myprob_t *tmp2 = new myprob_t[RefMap.ntotRefMap]; + myprob_t *tmp3 = new myprob_t[RefMap.ntotRefMap]; + for (int i = 0; i < RefMap.ntotRefMap; i++) + { + tmp1[i] = pProb.getProbMap(i).Constoadd; + } + MPI_Allreduce(tmp1, tmp2, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_MAX, + MPI_COMM_WORLD); + + for (int i = 0; i < RefMap.ntotRefMap; i++) { - myfloat_t* tmp1 = new myfloat_t[RefMap.ntotRefMap]; - myfloat_t* tmp2 = new myfloat_t[RefMap.ntotRefMap]; - myfloat_t* tmp3 = new myfloat_t[RefMap.ntotRefMap]; - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - tmp1[i] = pProb.getProbMap(i).Constoadd; - } - MPI_Allreduce(tmp1, tmp2, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); - - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - bioem_Probability_map& pProbMap = pProb.getProbMap(i); + bioem_Probability_map &pProbMap = pProb.getProbMap(i); #ifdef DEBUG - cout << "Reduction " << mpi_rank << " Map " << i << " Prob " << pProbMap.Total << " Const " << pProbMap.Constoadd << "\n"; + cout << "Reduction " << mpi_rank << " Map " << i << " Prob " + << pProbMap.Total << " Const " << pProbMap.Constoadd << "\n"; #endif - tmp1[i] = pProbMap.Total * exp(pProbMap.Constoadd - tmp2[i]); - - } - MPI_Reduce(tmp1, tmp3, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); - - //Find MaxProb - MPI_Status mpistatus; - { - int* tmpi1 = new int[RefMap.ntotRefMap]; - int* tmpi2 = new int[RefMap.ntotRefMap]; - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - bioem_Probability_map& pProbMap = pProb.getProbMap(i); - tmpi1[i] = tmp2[i] <= pProbMap.Constoadd ? mpi_rank : -1; - //temporary array that has the mpirank for the highest pProb.constant - } - MPI_Allreduce(tmpi1, tmpi2, RefMap.ntotRefMap, MPI_INT, MPI_MAX, MPI_COMM_WORLD); - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - if (tmpi2[i] == -1) - { - if (mpi_rank == 0) printf("Error: Could not find highest probability\n"); - } - else if (tmpi2[i] != 0) //Skip if rank 0 already has highest probability - { - if (mpi_rank == 0) - { - MPI_Recv(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, MPI_COMM_WORLD, &mpistatus); - } - else if (mpi_rank == tmpi2[i]) - { - MPI_Send(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, 0, i, MPI_COMM_WORLD); - } - } - } - delete[] tmpi1; - delete[] tmpi2; - } - - if (mpi_rank == 0) - { - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - bioem_Probability_map& pProbMap = pProb.getProbMap(i); - pProbMap.Total = tmp3[i]; - pProbMap.Constoadd = tmp2[i]; - } - } - - delete[] tmp1; - delete[] tmp2; - delete[] tmp3; - if (DebugOutput >= 1 && mpi_rank == 0 && mpi_size > 1) printf("Time MPI Reduction: %f\n", timer.GetCurrentElapsedTime()); + tmp1[i] = pProbMap.Total * exp(pProbMap.Constoadd - tmp2[i]); } + MPI_Reduce(tmp1, tmp3, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_SUM, 0, + MPI_COMM_WORLD); - //Angle Reduction and Probability summation for individual angles - if (param.param_device.writeAngles) - { - const int count = RefMap.ntotRefMap * param.nTotGridAngles; - myfloat_t* tmp1 = new myfloat_t[count]; - myfloat_t* tmp2 = new myfloat_t[count]; - myfloat_t* tmp3 = new myfloat_t[count]; - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - for (int j = 0;j < param.nTotGridAngles;j++) - { - // tmp1[i] = pProb.getProbMap(i).Constoadd; - // bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j); - tmp1[i * param.nTotGridAngles + j]= pProb.getProbAngle(i, j).ConstAngle; - } - } - - MPI_Allreduce(tmp1, tmp2, count, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - for (int j = 0;j < param.nTotGridAngles;j++) - { - bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j); - tmp1[i * param.nTotGridAngles + j] = pProbAngle.forAngles * exp(pProbAngle.ConstAngle - tmp2[i * param.nTotGridAngles + j]); - } - } - MPI_Reduce(tmp1, tmp3, count, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); - if (mpi_rank == 0) - { - for (int i = 0;i < RefMap.ntotRefMap;i++) - { - for (int j = 0;j < param.nTotGridAngles;j++) - { - bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j); - pProbAngle.forAngles = tmp3[i * param.nTotGridAngles + j]; - pProbAngle.ConstAngle = tmp2[i * param.nTotGridAngles + j]; - } - } - } - delete[] tmp1; - delete[] tmp2; - delete[] tmp3; - } + // Find MaxProb + MPI_Status mpistatus; + { + int *tmpi1 = new int[RefMap.ntotRefMap]; + int *tmpi2 = new int[RefMap.ntotRefMap]; + for (int i = 0; i < RefMap.ntotRefMap; i++) + { + bioem_Probability_map &pProbMap = pProb.getProbMap(i); + tmpi1[i] = tmp2[i] <= pProbMap.Constoadd ? mpi_rank : -1; + // temporary array that has the mpirank for the highest pProb.constant + } + MPI_Allreduce(tmpi1, tmpi2, RefMap.ntotRefMap, MPI_INT, MPI_MAX, + MPI_COMM_WORLD); + for (int i = 0; i < RefMap.ntotRefMap; i++) + { + if (tmpi2[i] == -1) + { + if (mpi_rank == 0) + printf("Error: Could not find highest probability\n"); + } + else if (tmpi2[i] != + 0) // Skip if rank 0 already has highest probability + { + if (mpi_rank == 0) + { + MPI_Recv(&pProb.getProbMap(i).max, + sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, + MPI_COMM_WORLD, &mpistatus); + } + else if (mpi_rank == tmpi2[i]) + { + MPI_Send(&pProb.getProbMap(i).max, + sizeof(pProb.getProbMap(i).max), MPI_BYTE, 0, i, + MPI_COMM_WORLD); + } + } + } + delete[] tmpi1; + delete[] tmpi2; + } + + if (mpi_rank == 0) + { + for (int i = 0; i < RefMap.ntotRefMap; i++) + { + bioem_Probability_map &pProbMap = pProb.getProbMap(i); + pProbMap.Total = tmp3[i]; + pProbMap.Constoadd = tmp2[i]; + } + } + + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + if (DebugOutput >= 1 && mpi_rank == 0 && mpi_size > 1) + printf("Time MPI Reduction: %f\n", timer.GetCurrentElapsedTime()); } -#endif + // Angle Reduction and Probability summation for individual angles + if (param.param_device.writeAngles) + { + const int count = RefMap.ntotRefMap * param.nTotGridAngles; + myprob_t *tmp1 = new myprob_t[count]; + myprob_t *tmp2 = new myprob_t[count]; + myprob_t *tmp3 = new myprob_t[count]; + for (int i = 0; i < RefMap.ntotRefMap; i++) + { + for (int j = 0; j < param.nTotGridAngles; j++) + { + // tmp1[i] = pProb.getProbMap(i).Constoadd; + // bioem_Probability_angle& pProbAngle = + // pProb.getProbAngle(i, j); + tmp1[i * param.nTotGridAngles + j] = + pProb.getProbAngle(i, j).ConstAngle; + } + } + + MPI_Allreduce(tmp1, tmp2, count, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); + for (int i = 0; i < RefMap.ntotRefMap; i++) + { + for (int j = 0; j < param.nTotGridAngles; j++) + { + bioem_Probability_angle &pProbAngle = pProb.getProbAngle(i, j); + tmp1[i * param.nTotGridAngles + j] = + pProbAngle.forAngles * + exp(pProbAngle.ConstAngle - tmp2[i * param.nTotGridAngles + j]); + } + } + MPI_Reduce(tmp1, tmp3, count, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + if (mpi_rank == 0) + { + for (int i = 0; i < RefMap.ntotRefMap; i++) + { + for (int j = 0; j < param.nTotGridAngles; j++) + { + bioem_Probability_angle &pProbAngle = pProb.getProbAngle(i, j); + pProbAngle.forAngles = tmp3[i * param.nTotGridAngles + j]; + pProbAngle.ConstAngle = tmp2[i * param.nTotGridAngles + j]; + } + } + } + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + } + } +#endif // ************* Writing Out Probabilities *************** if (mpi_rank == 0) + { + + // Output for Angle Probability File + ofstream angProbfile; + angProbfile.precision(OUTPUT_PRECISION); + angProbfile.setf(ios::fixed); + if (param.param_device.writeAngles) + { + angProbfile.open("ANG_PROB"); + angProbfile << "************************* HEADER:: NOTATION " + "*******************************************\n"; + if (!param.doquater) + { + angProbfile << " RefMap: MapNumber ; alpha[rad] - beta[rad] - " + "gamma[rad] - logP - cal log Probability + Constant: " + "Numerical Const.+ log (volume) + prior ang\n"; + } + else + { + angProbfile << " RefMap: MapNumber ; q1 - q2 -q3 - logP- cal log " + "Probability + Constant: Numerical Const. + log " + "(volume) + prior ang\n"; + }; + angProbfile << "************************* HEADER:: NOTATION " + "*******************************************\n"; + // angProbfile <<"Model Used: " << modelfile.c_str() << "\n"; + // angProbfile <<"Input Used: " << infile.c_str() << "\n"; + } + + // Output for Standard Probability + ofstream outputProbFile; + outputProbFile.precision(OUTPUT_PRECISION); + outputProbFile.setf(ios::fixed); + outputProbFile.open(OutfileName.c_str()); + outputProbFile << "************************* HEADER:: NOTATION " + "*******************************************\n"; + outputProbFile << "Notation= RefMap: MapNumber ; LogProb natural " + "logarithm of posterior Probability ; Constant: " + "Numerical Const. for adding Probabilities \n"; + if (!param.doquater) + { + if (param.usepsf) + { + outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: " + "MaxLogProb - alpha[rad] - beta[rad] - gamma[rad] - " + "PSF amp - PSF phase - PSF envelope - center x - " + "center y - normalization - offsett \n"; + } + else + { + outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: " + "MaxLogProb - alpha[rad] - beta[rad] - gamma[rad] - " + "CTF amp - CTF defocus - CTF B-Env - center x - " + "center y - normalization - offsett \n"; + } + } + else { - - // Output for Angle Probability File - ofstream angProbfile; - if(param.param_device.writeAngles) - { - angProbfile.open ("ANG_PROB"); - angProbfile <<"************************* HEADER:: NOTATION *******************************************\n"; - if(!param.doquater){ angProbfile <<" RefMap: MapNumber ; alpha[rad] - beta[rad] - gamma[rad] - logP - cal log Probability + Constant: Numerical Const.+ log (volume) + prior ang\n" ;} - else { angProbfile <<" RefMap: MapNumber ; q1 - q2 -q3 - logP- cal log Probability + Constant: Numerical Const. + log (volume) + prior ang\n" ;}; - angProbfile <<"************************* HEADER:: NOTATION *******************************************\n"; - // angProbfile <<"Model Used: " << modelfile.c_str() << "\n"; - // angProbfile <<"Input Used: " << infile.c_str() << "\n"; - } - // Output for Cross Correlation File - ofstream ccProbfile; - if(param.param_device.writeCC) - { - ccProbfile.open ("CROSS_CORRELATION"); - ccProbfile <<"************************* HEADER:: NOTATION *******************************************\n"; - ccProbfile <<" RefMap: MapNumber ; Pixel x - Pixel y - Cross-Correlation \n"; - ccProbfile <<"Note that the highest Cross-correlation is the best.\n"; - ccProbfile <<"If the particles are flipped, include the keyward FLIPPED in the Param file.\n"; - ccProbfile <<"************************* HEADER:: NOTATION *******************************************\n"; - } - - // Output for Standard Probability - ofstream outputProbFile; - if(!yesoutfilename)OutfileName="Output_Probabilities"; - outputProbFile.open (OutfileName.c_str()); - outputProbFile <<"************************* HEADER:: NOTATION *******************************************\n"; - outputProbFile << "Notation= RefMap: MapNumber ; LogProb natural logarithm of posterior Probability ; Constant: Numerical Const. for adding Probabilities \n"; - if(!param.doquater){ - if(param.usepsf){ - outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: MaxLogProb - alpha[rad] - beta[rad] - gamma[rad] - PSF amp - PSF phase - PSF envelope - center x - center y - normalization - offsett \n";}else{ - outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: MaxLogProb - alpha[rad] - beta[rad] - gamma[rad] - CTF amp - CTF defocus - CTF B-Env - center x - center y - normalization - offsett \n";} - }else { - if(param.usepsf){ - // if( localcc[rx * param.param_device.NumberPixels + ry] < - outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: MaxLogProb - q1 - q2 - q3 - q4 -PSF amp - PSF phase - PSF envelope - center x - center y - normalization - offsett \n"; - }else{ - outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: MaxLogProb - q1 - q2 - q3 - q4 - CTF amp - CTF defocus - CTF B-Env - center x - center y - normalization - offsett \n"; - }} - if(param.writeCTF) outputProbFile << " RefMap: MapNumber ; CTFMaxParm: defocus - b-Env (B ref. Penzeck 2010)\n"; - if(param.yespriorAngles) outputProbFile << "**** Remark: Using Prior Proability in Angles ****\n"; - outputProbFile <<"************************* HEADER:: NOTATION *******************************************\n\n"; - - - // Loop over reference maps - // ************* Over all maps *************** - - for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++) - { - // **** Total Probability *** - bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); - - //Controll for Value of Total Probability - // cout << pProbMap.Total << " " << pProbMap.Constoadd << " " << FLT_MAX <<" " << log(FLT_MAX) << "\n"; - if(pProbMap.Total>1.e-38){ - - outputProbFile << "RefMap: " << iRefMap << " LogProb: " << log(pProbMap.Total) + pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant: " << pProbMap.Constoadd << "\n"; - outputProbFile << "RefMap: " << iRefMap << " Maximizing Param: "; - // *** Param that maximize probability**** - outputProbFile << (log(pProbMap.Total) + pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " "; - - - }else{ - outputProbFile << "Warining! with Map " << iRefMap << "Numerical Integrated Probability without constant = 0.0;\n"; - outputProbFile << "Warining RefMap: " << iRefMap << "Check that constant is finite: " << pProbMap.Constoadd << "\n"; - outputProbFile << "Warining RefMap: i) check model, ii) check refmap , iii) check GPU on/off command inconsitency\n"; - // outputProbFile << "Warning! " << iRefMap << " LogProb: " << pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant: " << pProbMap.Constoadd << "\n"; - } - // outputProbFile << "RefMap: " << iRefMap << " Maximizing Param: "; - - // *** Param that maximize probability**** - // outputProbFile << (pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " "; - - outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[0] << " [] "; - outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[1] << " [] "; - outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[2] << " [] "; - if(param.doquater)outputProbFile << param.angles[pProbMap.max.max_prob_orient].quat4 << " [] "; - outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[0] << " [] "; - if(!param.usepsf){outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[1]/ 2.f /M_PI / param.elecwavel * 0.0001 << " [micro-m] "; - }else{outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[1] << " [1/A²] ";} - if(!param.usepsf){outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[2] << " [A²] ";} - else{outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[2] << " [1/A²] ";} - outputProbFile << pProbMap.max.max_prob_cent_x << " [pix] "; - outputProbFile << pProbMap.max.max_prob_cent_y << " [pix] " ; - if(FFTAlgo){outputProbFile << pProbMap.max.max_prob_norm << " [] " ;}else{outputProbFile << "N.A." << " [] ";} - if(FFTAlgo){outputProbFile << pProbMap.max.max_prob_mu << " [] ";}else{outputProbFile << "N.A." << " [] ";} - outputProbFile << "\n"; - - // Writing out CTF parameters if requiered - if(param.writeCTF && param.usepsf){ - - myfloat_t denomi; - denomi = param.CtfParam[pProbMap.max.max_prob_conv].pos[1] * param.CtfParam[pProbMap.max.max_prob_conv].pos[1] + - param.CtfParam[pProbMap.max.max_prob_conv].pos[2] * param.CtfParam[pProbMap.max.max_prob_conv].pos[2]; - outputProbFile << "RefMap: " << iRefMap << " CTFMaxParam: "; - outputProbFile << 2*M_PI*param.CtfParam[pProbMap.max.max_prob_conv].pos[1]/denomi/param.elecwavel*0.0001 << " [micro-m] "; - outputProbFile << 4*M_PI*M_PI*param.CtfParam[pProbMap.max.max_prob_conv].pos[2]/denomi << " [A²] \n"; - } - - //*************** Writing Individual Angle probabilities - if(param.param_device.writeAngles) - { - for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++) - { - bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); - - myfloat_t logp=log(pProbAngle.forAngles)+ pProbAngle.ConstAngle+0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu); - if(!param.doquater){ - // For Euler Angles - if(param.yespriorAngles){ - logp+=param.angprior[iOrient]; - angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << logp << " Separated: " - << log(pProbAngle.forAngles) << " " << pProbAngle.ConstAngle << " " << 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << param.angprior[iOrient] << "\n"; - } else - { - angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << logp << " Separated: "<< - log(pProbAngle.forAngles) << " " << pProbAngle.ConstAngle << " " << 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << "\n"; - } - }else { - // Samething but for Quaternions - if(param.yespriorAngles){ - logp+=param.angprior[iOrient]; - angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << param.angles[iOrient].quat4 << " " << logp << " Separated: " << log(pProbAngle.forAngles) << " " << pProbAngle.ConstAngle << " " << 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << param.angprior[iOrient] << "\n"; - } else - { - angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << param.angles[iOrient].quat4 << " " << logp << " Separated: "<< - log(pProbAngle.forAngles) << " " << pProbAngle.ConstAngle << " " << 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << "\n"; - } - } - } - } - - //************* Writing Cross-Correlations if requiered - //************* This is currently not in the manual ***** - if(param.param_device.writeCC){ - - int cc=0; - int halfPix; - int rx=0; - int ry=0; - myfloat_t localcc[ (param.param_device.NumberPixels+1) * (param.param_device.NumberPixels+1) ]; - int used[(param.param_device.NumberPixels+1) * (param.param_device.NumberPixels+1)]; - - halfPix = param.param_device.NumberPixels / 2 ; - // Ordering the centers of the Cross Correlation - - for (int rx = 0; rx < param.param_device.NumberPixels ; rx++) - { - for (int ry = 0; ry < param.param_device.NumberPixels ; ry++) - { - localcc[ rx * param.param_device.NumberPixels + ry ] = 0.0; - used[ rx * param.param_device.NumberPixels + ry ]= 0; - } - } - - for (int cent_x = 0; cent_x < param.param_device.NumberPixels ; cent_x = cent_x + param.param_device.CCdisplace) - { - for (int cent_y = 0; cent_y < param.param_device.NumberPixels ; cent_y = cent_y + param.param_device.CCdisplace) - { - //localcc[ rx * param.param_device.NumberPixels + ry ] = 0.0; - bioem_Probability_cc& pProbCC = pProb.getProbCC(iRefMap, cc); - - // Applying Periodic boundary conditions to the CC - if(cent_x < halfPix && cent_y < halfPix){ - // ccProbfile << " " << iRefMap << " " << (myfloat_t) halfPix - cent_x << " " << halfPix - cent_y << " " << pProbCC.forCC <<"\n"; - rx = halfPix - cent_x; - ry = halfPix - cent_y;} - if(cent_x >= halfPix && cent_y < halfPix){ - // ccProbfile << " " << iRefMap << " " << (myfloat_t) 3 * halfPix - cent_x << " " << halfPix - cent_y << " " << pProbCC.forCC <<"\n"; - rx = 3 * halfPix - cent_x; - ry = halfPix - cent_y;} - if(cent_x < halfPix && cent_y >= halfPix){ - // ccProbfile << " " << iRefMap << " " << (myfloat_t) halfPix - cent_x << " " << 3 * halfPix - cent_y << " " << pProbCC.forCC <<"\n"; - rx = halfPix - cent_x; - ry = 3 * halfPix - cent_y;} - if(cent_x >= halfPix && cent_y >= halfPix){ - // ccProbfile << " " << iRefMap << " " << 3* halfPix - cent_x << " " << 3 * halfPix - cent_y << " " << pProbCC.forCC <<"\n"; - rx = 3 * halfPix - cent_x; - ry = 3 * halfPix - cent_y;} - // cout << " TT " << cent_x << " " << rx << " " << cent_y << " " << ry << " " << pProbCC.forCC << "\n"; - if(!param.param_device.CCwithBayes){ - localcc[ rx * param.param_device.NumberPixels + ry ] = pProbCC.forCC; - }else{ - localcc[ rx * param.param_device.NumberPixels + ry ] = log(pProbCC.forCC)+pProbCC.ConstCC; - } - used[ rx * param.param_device.NumberPixels + ry] = 1; - cc++; - } - // ccProbfile << "\n"; - } - if(!param.ignoreCCoff){ -/* for (int rx = param.param_device.CCdisplace; rx < param.param_device.NumberPixels ; rx = rx + param.param_device.CCdisplace) - { - for (int ry = param.param_device.CCdisplace; ry < param.param_device.NumberPixels ; ry = ry + param.param_device.CCdisplace) - {*/ - for (int rx = param.param_device.CCdisplace; rx < param.param_device.NumberPixels ; rx++) - { - for (int ry = param.param_device.CCdisplace; ry < param.param_device.NumberPixels ; ry++) - { - - if(used[ rx * param.param_device.NumberPixels + ry ] == 1){ - ccProbfile << "RefMap: "<< iRefMap << " " << rx << " " << ry << " " << localcc[ rx * param.param_device.NumberPixels + ry ] << "\n" ; - }else{ - if(localcc[ rx * param.param_device.NumberPixels + ry ] <= -FLT_MAX)ccProbfile << "RefMap: "<< iRefMap << " " << rx << " " << ry << " " << -FLT_MAX << "\n" ; - } - // cout << " cc " << rx << " " << ry << " " << localcc[ rx * param.param_device.NumberPixels + ry ] <<"\n" ; - } - // ccProbfile << "\n"; - } - }else{ - for (int rx = param.param_device.CCdisplace; rx < param.param_device.NumberPixels ; rx++) - { - for (int ry = param.param_device.CCdisplace; ry < param.param_device.NumberPixels ; ry++) - { - if(used[ rx * param.param_device.NumberPixels + ry ] == 1){ - ccProbfile << "RefMap: "<< iRefMap << " " << rx << " " << ry << " " << localcc[ rx * param.param_device.NumberPixels + ry ] << "\n" ; - }else{ - if(localcc[ rx * param.param_device.NumberPixels + ry ] <= -FLT_MAX)ccProbfile << "RefMap: "<< iRefMap << " " << rx << " " << ry << " " << -FLT_MAX << "\n" ; - } - } - // ccProbfile << "\n"; - } - - } - } - } - - if(param.param_device.writeAngles) - { - angProbfile.close(); - } - - if(param.param_device.writeCC) - { - ccProbfile.close(); - } - - outputProbFile.close(); + if (param.usepsf) + { + // if( localcc[rx * param.param_device.NumberPixels + ry] < + outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: " + "MaxLogProb - q1 - q2 - q3 - q4 -PSF amp - PSF phase " + "- PSF envelope - center x - center y - " + "normalization - offsett \n"; + } + else + { + outputProbFile << "Notation= RefMap: MapNumber ; Maximizing Param: " + "MaxLogProb - q1 - q2 - q3 - q4 - CTF amp - CTF " + "defocus - CTF B-Env - center x - center y - " + "normalization - offsett \n"; + } } + if (param.writeCTF) + outputProbFile << " RefMap: MapNumber ; CTFMaxParm: defocus - b-Env (B " + "ref. Penzeck 2010)\n"; + if (param.yespriorAngles) + outputProbFile << "**** Remark: Using Prior Proability in Angles ****\n"; + outputProbFile << "************************* HEADER:: NOTATION " + "*******************************************\n\n"; + + // Loop over reference maps + // ************* Over all maps *************** + + for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap++) + { + // **** Total Probability *** + bioem_Probability_map &pProbMap = pProb.getProbMap(iRefMap); - return(0); + // Controll for Value of Total Probability + // cout << pProbMap.Total << " " << pProbMap.Constoadd << " " << FLT_MAX + // <<" " << log(FLT_MAX) << "\n"; + if (pProbMap.Total > 1.e-38) + { + + outputProbFile << "RefMap: " << iRefMap << " LogProb: " + << log(pProbMap.Total) + pProbMap.Constoadd + + 0.5 * log(M_PI) + + (1 - param.param_device.Ntotpi * 0.5) * + (log(2 * M_PI) + 1) + + log(param.param_device.volu) + << " Constant: " << pProbMap.Constoadd << "\n"; + outputProbFile << "RefMap: " << iRefMap << " Maximizing Param: "; + // *** Param that maximize probability**** + outputProbFile << (log(pProbMap.Total) + pProbMap.Constoadd + + 0.5 * log(M_PI) + + (1 - param.param_device.Ntotpi * 0.5) * + (log(2 * M_PI) + 1) + + log(param.param_device.volu)) + << " "; + } + else + { + outputProbFile + << "Warining! with Map " << iRefMap + << "Numerical Integrated Probability without constant = 0.0;\n"; + outputProbFile << "Warining RefMap: " << iRefMap + << "Check that constant is finite: " + << pProbMap.Constoadd << "\n"; + outputProbFile << "Warining RefMap: i) check model, ii) check refmap , " + "iii) check GPU on/off command inconsitency\n"; + // outputProbFile << "Warning! " << iRefMap << " LogProb: " + //<< pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - + // param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + + // log(param.param_device.volu) << " Constant: " << pProbMap.Constoadd + //<< "\n"; + } + // outputProbFile << "RefMap: " << iRefMap << " Maximizing + // Param: "; + + // *** Param that maximize probability**** + // outputProbFile << (pProbMap.Constoadd + 0.5 * log(M_PI) + (1 + //- param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + + // log(param.param_device.volu)) << " "; + + outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[0] + << " [] "; + outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[1] + << " [] "; + outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[2] + << " [] "; + if (param.doquater) + outputProbFile << param.angles[pProbMap.max.max_prob_orient].quat4 + << " [] "; + outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[0] + << " [] "; + if (!param.usepsf) + { + outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[1] / + 2.f / M_PI / param.elecwavel * 0.0001 + << " [micro-m] "; + } + else + { + outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[1] + << " [1/A²] "; + } + if (!param.usepsf) + { + outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[2] + << " [A²] "; + } + else + { + outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[2] + << " [1/A²] "; + } + outputProbFile << pProbMap.max.max_prob_cent_x << " [pix] "; + outputProbFile << pProbMap.max.max_prob_cent_y << " [pix] "; + outputProbFile << pProbMap.max.max_prob_norm << " [] "; + outputProbFile << pProbMap.max.max_prob_mu << " [] "; + outputProbFile << "\n"; + + // Writing out CTF parameters if requiered + if (param.writeCTF && param.usepsf) + { + + myfloat_t denomi; + denomi = param.CtfParam[pProbMap.max.max_prob_conv].pos[1] * + param.CtfParam[pProbMap.max.max_prob_conv].pos[1] + + param.CtfParam[pProbMap.max.max_prob_conv].pos[2] * + param.CtfParam[pProbMap.max.max_prob_conv].pos[2]; + outputProbFile << "RefMap: " << iRefMap << " CTFMaxParam: "; + outputProbFile + << 2 * M_PI * param.CtfParam[pProbMap.max.max_prob_conv].pos[1] / + denomi / param.elecwavel * 0.0001 + << " [micro-m] "; + outputProbFile + << 4 * M_PI * M_PI * + param.CtfParam[pProbMap.max.max_prob_conv].pos[2] / denomi + << " [A²] \n"; + } + + //*************** Writing Individual Angle probabilities + if (param.param_device.writeAngles) + { + // Finding the best param.param_device.writeAngles probabilities + // This implementation is clean, but not the most optimal one + // and it supposes param.param_device.writeAngles << + // param.nTotGridAngles + unsigned K = + param.param_device.writeAngles; // number of best probabilities + // clang-format off + std::priority_queue<std::pair<double, int>, + std::vector<std::pair<double, int> >, + std::greater<std::pair<double, int> > > + q; + // clang-format on + for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++) + { + bioem_Probability_angle &pProbAngle = + pProb.getProbAngle(iRefMap, iOrient); + + myprob_t logp = + log(pProbAngle.forAngles) + pProbAngle.ConstAngle + + 0.5 * log(M_PI) + + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + + log(param.param_device.volu); + + if (q.size() < K) + q.push(std::pair<double, int>(logp, iOrient)); + else if (q.top().first < logp) + { + q.pop(); + q.push(std::pair<double, int>(logp, iOrient)); + } + } + K = q.size(); + int *rev_iOrient = (int *) malloc(K * sizeof(int)); + myprob_t *rev_logp = (myprob_t *) malloc(K * sizeof(myprob_t)); + for (int i = K - 1; i >= 0; i--) + { + rev_iOrient[i] = q.top().second; + rev_logp[i] = q.top().first; + q.pop(); + } + for (unsigned i = 0; i < K; i++) + { + int iOrient = rev_iOrient[i]; + bioem_Probability_angle &pProbAngle = + pProb.getProbAngle(iRefMap, iOrient); + myprob_t logp = rev_logp[i]; + + if (!param.doquater) + { + // For Euler Angles + if (param.yespriorAngles) + { + logp += param.angprior[iOrient]; + angProbfile << " " << iRefMap << " " + << param.angles[iOrient].pos[0] << " " + << param.angles[iOrient].pos[1] << " " + << param.angles[iOrient].pos[2] << " " << logp + << " Separated: " << log(pProbAngle.forAngles) << " " + << pProbAngle.ConstAngle << " " + << 0.5 * log(M_PI) + + (1 - param.param_device.Ntotpi * 0.5) * + (log(2 * M_PI) + 1) + + log(param.param_device.volu) + << " " << param.angprior[iOrient] << "\n"; + } + else + { + angProbfile << " " << iRefMap << " " + << param.angles[iOrient].pos[0] << " " + << param.angles[iOrient].pos[1] << " " + << param.angles[iOrient].pos[2] << " " << logp + << " Separated: " << log(pProbAngle.forAngles) << " " + << pProbAngle.ConstAngle << " " + << 0.5 * log(M_PI) + + (1 - param.param_device.Ntotpi * 0.5) * + (log(2 * M_PI) + 1) + + log(param.param_device.volu) + << "\n"; + } + } + else + { + // Samething but for Quaternions + if (param.yespriorAngles) + { + logp += param.angprior[iOrient]; + angProbfile << " " << iRefMap << " " + << param.angles[iOrient].pos[0] << " " + << param.angles[iOrient].pos[1] << " " + << param.angles[iOrient].pos[2] << " " + << param.angles[iOrient].quat4 << " " << logp + << " Separated: " << log(pProbAngle.forAngles) << " " + << pProbAngle.ConstAngle << " " + << 0.5 * log(M_PI) + + (1 - param.param_device.Ntotpi * 0.5) * + (log(2 * M_PI) + 1) + + log(param.param_device.volu) + << " " << param.angprior[iOrient] << "\n"; + } + else + { + angProbfile << " " << iRefMap << " " + << param.angles[iOrient].pos[0] << " " + << param.angles[iOrient].pos[1] << " " + << param.angles[iOrient].pos[2] << " " + << param.angles[iOrient].quat4 << " " << logp + << " Separated: " << log(pProbAngle.forAngles) << " " + << pProbAngle.ConstAngle << " " + << 0.5 * log(M_PI) + + (1 - param.param_device.Ntotpi * 0.5) * + (log(2 * M_PI) + 1) + + log(param.param_device.volu) + << "\n"; + } + } + } + free(rev_iOrient); + free(rev_logp); + } + } + + if (param.param_device.writeAngles) + { + angProbfile.close(); + } + + outputProbFile.close(); + } + + return (0); } -int bioem::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) +int bioem::compareRefMaps(int iPipeline, int iOrient, int iConvStart, + int maxParallelConv, mycomplex_t *localmultFFT, + myparam5_t *comp_params, const int startMap) { - //*************************************************************************************** //***** BioEM routine for comparing reference maps to convoluted maps ***** //*************************************************************************************** - cuda_custom_timeslot("Comparison", iOrient, iConv, COLOR_COMPARISON); - if (FFTAlgo) - { - //With FFT Algorithm + cuda_custom_timeslot("Comparison", iOrient, iConvStart, COLOR_COMPARISON); + + int k = (iPipeline & 1) * param.nTotParallelConv; + + if (BioEMAlgo == 1) + { #pragma omp parallel for schedule(dynamic, 1) - for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) - { - const int num = omp_get_thread_num(); - calculateCCFFT(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, localmultFFT, param.fft_scratch_complex[num], param.fft_scratch_real[num]); - } + for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap++) + { + const int num = omp_get_thread_num(); + for (int iConv = 0; iConv < maxParallelConv; iConv++) + { + calculateCCFFT(iRefMap, &localmultFFT[(k + iConv) * param.FFTMapSize], + param.fft_scratch_complex[num], + param.fft_scratch_real[num]); + doRefMapFFT( + iRefMap, iOrient, iConvStart + iConv, comp_params[k + iConv].amp, + comp_params[k + iConv].pha, comp_params[k + iConv].env, + comp_params[k + iConv].sumC, comp_params[k + iConv].sumsquareC, + param.fft_scratch_real[num], pProb, param.param_device, RefMap); + } } + } else + { + myblockCPU_t *comp_blocks = new myblockCPU_t[maxParallelConv]; + for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap++) { - //Without FFT Algorithm #pragma omp parallel for schedule(dynamic, 1) - for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) - { - compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, conv_map, pProb, param.param_device, RefMap); - } + for (int iConv = 0; iConv < maxParallelConv; iConv++) + { + const int num = omp_get_thread_num(); + calculateCCFFT(iRefMap, &localmultFFT[(k + iConv) * param.FFTMapSize], + param.fft_scratch_complex[num], + param.fft_scratch_real[num]); + doRefMap_CPU_Parallel(iRefMap, iOrient, iConv, + param.fft_scratch_real[num], &comp_params[k], + comp_blocks); + } + doRefMap_CPU_Reduce(iRefMap, iOrient, iConvStart, maxParallelConv, + &comp_params[k], comp_blocks); } + delete[] comp_blocks; + } cuda_custom_timeslot_end; - return(0); + return (0); } -inline void bioem::calculateCCFFT(int iRefMap, int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC) +inline void bioem::calculateCCFFT(int iRefMap, mycomplex_t *localConvFFT, + mycomplex_t *localCCT, myfloat_t *lCC) { //*************************************************************************************** - //***** Calculating cross correlation in FFTALGOrithm ***** + //***** Calculating cross correlation with FFT algorithm ***** - for(int i = 0; i < param.param_device.NumberPixels; i++) - { - for(int j = 0; j < param.param_device.NumberPixels; j++) lCC[i * param.param_device.NumberPixels + j] = 0.f; - } + for (int i = 0; i < param.param_device.NumberPixels; i++) + { + for (int j = 0; j < param.param_device.NumberPixels; j++) + lCC[i * param.param_device.NumberPixels + j] = 0.f; + } + + const mycomplex_t *RefMapFFT = &RefMap.RefMapsFFT[iRefMap * param.FFTMapSize]; + for (int i = 0; i < param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D; + i++) + { + localCCT[i][0] = localConvFFT[i][0] * RefMapFFT[i][0] + + localConvFFT[i][1] * RefMapFFT[i][1]; + localCCT[i][1] = localConvFFT[i][1] * RefMapFFT[i][0] - + localConvFFT[i][0] * RefMapFFT[i][1]; + } + + myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, localCCT, lCC); +} +inline void bioem::doRefMap_CPU_Parallel(int iRefMap, int iOrient, int iConv, + myfloat_t *lCC, + myparam5_t *comp_params, + myblockCPU_t *comp_block) +{ + //*************************************************************************************** + //***** Computation of log probabilities, done in parallel by OMP - const mycomplex_t* RefMapFFT = &RefMap.RefMapsFFT[iRefMap * param.FFTMapSize]; - for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D; i++) + int myGlobalId = iConv * param.param_device.NtotDisp; + myfloat_t bestLogpro = MIN_PROB; + int dispC = + param.param_device.NumberPixels - param.param_device.maxDisplaceCenter; + int cent_x, cent_y, address, bestId = 0; + myfloat_t value, bestValue = 0.; + myprob_t logpro = 0., sumExp = 0.; + + for (int myX = 0; myX < param.param_device.NxDisp; myX++) + { + for (int myY = 0; myY < param.param_device.NxDisp; myY++, myGlobalId++) { - localCCT[i][0] = localConvFFT[i][0] * RefMapFFT[i][0] + localConvFFT[i][1] * RefMapFFT[i][1]; - localCCT[i][1] = localConvFFT[i][1] * RefMapFFT[i][0] - localConvFFT[i][0] * RefMapFFT[i][1]; + cent_x = (myX * param.param_device.GridSpaceCenter + dispC) % + param.param_device.NumberPixels; + cent_y = (myY * param.param_device.GridSpaceCenter + dispC) % + param.param_device.NumberPixels; + address = cent_x * param.param_device.NumberPixels + cent_y; + value = (myfloat_t) lCC[address] / + (myfloat_t)(param.param_device.NumberPixels * + param.param_device.NumberPixels); + + logpro = calc_logpro( + param.param_device, comp_params[iConv].amp, comp_params[iConv].pha, + comp_params[iConv].env, comp_params[iConv].sumC, + comp_params[iConv].sumsquareC, value, RefMap.sum_RefMap[iRefMap], + RefMap.sumsquare_RefMap[iRefMap]); +#ifdef DEBUG_PROB + printf("\t\t\tProb: iRefMap %d, iOrient %d, iConv %d, " + "cent_x %d, cent_y %d, address %d, value %f, logpro %f\n", + iRefMap, iOrient, iConv, cent_x, cent_y, address, value, logpro); +#endif + if (bestLogpro < logpro) + { + sumExp *= exp(-logpro + bestLogpro); + bestLogpro = logpro; + bestId = myGlobalId; + bestValue = value; + } + sumExp += exp(logpro - bestLogpro); } + } - myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, localCCT, lCC); - - // printf("HereCC %p %f %d %d %d %d \n", &lCC[139 * param.param_device.NumberPixels + 139],lCC[139 * param.param_device.NumberPixels + 139],mpi_rank,iConv,iOrient,iRefMap); + comp_block[iConv].logpro = bestLogpro; + comp_block[iConv].sumExp = sumExp; + comp_block[iConv].id = bestId; + comp_block[iConv].value = bestValue; +} - doRefMapFFT(iRefMap, iOrient, iConv, amp, pha, env, lCC, sumC, sumsquareC, pProb, param.param_device, RefMap); +inline void bioem::doRefMap_CPU_Reduce(int iRefMap, int iOrient, int iConvStart, + int maxParallelConv, + myparam5_t *comp_params, + myblockCPU_t *comp_block) +{ + //*************************************************************************************** + //***** Reduction of previously compututed log probabilities + bioem_Probability_map &pProbMap = pProb.getProbMap(iRefMap); -#ifdef DEBUG - if (param.param_device.writeCC) - { int cc=0; - for (int cent_x = 0; cent_x < param.param_device.NumberPixels ; cent_x = cent_x + param.param_device.CCdisplace) - { - for (int cent_y = 0; cent_y < param.param_device.NumberPixels ; cent_y = cent_y + param.param_device.CCdisplace) - { - cout << "CHECKCC " << " " << cent_x << " " << cent_y <<" " << lCC[cent_x * param.param_device.NumberPixels + cent_y] / (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels ) << "\n"; - cc++; - } - } + for (int i = 0; i < maxParallelConv; i++) + { + if (pProbMap.Constoadd < comp_block[i].logpro) + { + pProbMap.Total *= exp(-comp_block[i].logpro + pProbMap.Constoadd); + pProbMap.Constoadd = comp_block[i].logpro; + + // ********** Getting parameters that maximize the probability *********** + int myGlobalId = comp_block[i].id; + int myConv = myGlobalId / param.param_device.NtotDisp; + myGlobalId -= myConv * param.param_device.NtotDisp; + int myX = myGlobalId / param.param_device.NxDisp; + myGlobalId -= myX * param.param_device.NxDisp; + int myY = myGlobalId; + + int dispC = param.param_device.NumberPixels - + param.param_device.maxDisplaceCenter; + myfloat_t value = comp_block[i].value; + + pProbMap.max.max_prob_cent_x = + -((myX * param.param_device.GridSpaceCenter + dispC) - + param.param_device.NumberPixels); + pProbMap.max.max_prob_cent_y = + -((myY * param.param_device.GridSpaceCenter + dispC) - + param.param_device.NumberPixels); + pProbMap.max.max_prob_orient = iOrient; + pProbMap.max.max_prob_conv = iConvStart + myConv; + + pProbMap.max.max_prob_norm = + -(-comp_params[myConv].sumC * RefMap.sum_RefMap[iRefMap] + + param.param_device.Ntotpi * value) / + (comp_params[myConv].sumC * comp_params[myConv].sumC - + comp_params[myConv].sumsquareC * param.param_device.Ntotpi); + pProbMap.max.max_prob_mu = + -(-comp_params[myConv].sumC * value + + comp_params[myConv].sumsquareC * RefMap.sum_RefMap[iRefMap]) / + (comp_params[myConv].sumC * comp_params[myConv].sumC - + comp_params[myConv].sumsquareC * param.param_device.Ntotpi); + +#ifdef DEBUG_PROB + printf("\tProbabilities change: iRefMap %d, iOrient %d, iConv %d, " + "Total %f, Const %f, bestlogpro %f, sumExp %f, bestId %d\n", + iRefMap, iOrient, iConvStart + myConv, pProbMap.Total, + pProbMap.Constoadd, comp_block[i].logpro, comp_block[i].sumExp, + comp_block[i].id); + printf("\tParameters: myConv %d, myX %d, myY %d, cent_x -, cent_y -, " + "probX %d, probY %d\n", + myConv, myX, myY, pProbMap.max.max_prob_cent_x, + pProbMap.max.max_prob_cent_y); +#endif } + pProbMap.Total += + comp_block[i].sumExp * exp(comp_block[i].logpro - pProbMap.Constoadd); +#ifdef DEBUG_PROB + printf("\t\tProbabilities after Reduce: iRefMap %d, iOrient %d, iConv " + "%d, Total %f, Const %f, bestlogpro %f, sumExp %f, bestId %d\n", + iRefMap, iOrient, iConvStart, pProbMap.Total, pProbMap.Constoadd, + comp_block[i].logpro, comp_block[i].sumExp, comp_block[i].id); #endif + // Code for writing angles, not used by default + if (param.param_device.writeAngles) + { + bioem_Probability_angle &pProbAngle = + pProb.getProbAngle(iRefMap, iOrient); + if (pProbAngle.ConstAngle < comp_block[i].logpro) + { + pProbAngle.forAngles *= + exp(-comp_block[i].logpro + pProbAngle.ConstAngle); + pProbAngle.ConstAngle = comp_block[i].logpro; + } + pProbAngle.forAngles += comp_block[i].sumExp * + exp(comp_block[i].logpro - pProbAngle.ConstAngle); + } + } } -int bioem::createProjection(int iMap, mycomplex_t* mapFFT) +int bioem::createProjection(int iMap, mycomplex_t *mapFFT) { // ************************************************************************************** - // **** BioEM Create Projection routine in Euler angles / Quaternions ****************** - // ********************* and turns projection into Fourier space ************************ + // **** BioEM Create Projection routine in Euler angles / Quaternions + // ****************** + // ********************* and turns projection into Fourier space + // ************************ // ************************************************************************************** cuda_custom_timeslot("Projection", iMap, 0, COLOR_PROJECTION); @@ -1140,35 +1560,38 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT) myfloat3_t RotatedPointsModel[Model.nPointsModel]; myfloat_t rotmat[3][3]; myfloat_t alpha, gam, beta; - myfloat_t* localproj; + myfloat_t *localproj; localproj = param.fft_scratch_real[omp_get_thread_num()]; - memset(localproj, 0, param.param_device.NumberPixels * param.param_device.NumberPixels * sizeof(*localproj)); + memset(localproj, 0, param.param_device.NumberPixels * + param.param_device.NumberPixels * + sizeof(*localproj)); //*************** Rotating the model **************************** //*************** Quaternions **************************** - if(param.doquater){ + if (param.doquater) + { myfloat_t quater[4]; - //quaternion - quater[0]=param.angles[iMap].pos[0]; - quater[1]=param.angles[iMap].pos[1]; - quater[2]=param.angles[iMap].pos[2]; - quater[3]=param.angles[iMap].quat4; - - //Rotation Matrix for Quaterions (wikipeda) - rotmat[0][0] = 1- 2 * quater[1] * quater[1] - 2 * quater[2] * quater[2]; - rotmat[1][0] = 2 * ( quater[0] * quater[1] - quater[2] * quater[3]); - rotmat[2][0] = 2 * ( quater[0] * quater[2] + quater[1] * quater[3]); - rotmat[0][1] = 2 * ( quater[0] * quater[1] + quater[2] * quater[3]); - rotmat[1][1] = 1- 2 * quater[0] * quater[0] - 2 * quater[2] * quater[2]; - rotmat[2][1] = 2 * ( quater[1] * quater[2] - quater[0] * quater[3]); - rotmat[0][2] = 2 * ( quater[0] * quater[2] - quater[1] * quater[3]); - rotmat[1][2] = 2 * ( quater[1] * quater[2] + quater[0] * quater[3]); - rotmat[2][2] = 1- 2 * quater[0] * quater[0] - 2 * quater[1] * quater[1]; - - - } else{ + // quaternion + quater[0] = param.angles[iMap].pos[0]; + quater[1] = param.angles[iMap].pos[1]; + quater[2] = param.angles[iMap].pos[2]; + quater[3] = param.angles[iMap].quat4; + + // Rotation Matrix for Quaterions (wikipeda) + rotmat[0][0] = 1 - 2 * quater[1] * quater[1] - 2 * quater[2] * quater[2]; + rotmat[1][0] = 2 * (quater[0] * quater[1] - quater[2] * quater[3]); + rotmat[2][0] = 2 * (quater[0] * quater[2] + quater[1] * quater[3]); + rotmat[0][1] = 2 * (quater[0] * quater[1] + quater[2] * quater[3]); + rotmat[1][1] = 1 - 2 * quater[0] * quater[0] - 2 * quater[2] * quater[2]; + rotmat[2][1] = 2 * (quater[1] * quater[2] - quater[0] * quater[3]); + rotmat[0][2] = 2 * (quater[0] * quater[2] - quater[1] * quater[3]); + rotmat[1][2] = 2 * (quater[1] * quater[2] + quater[0] * quater[3]); + rotmat[2][2] = 1 - 2 * quater[0] * quater[0] - 2 * quater[1] * quater[1]; + } + else + { //*************** Euler Angles**************************** // Doing Euler angles instead of Quaternions @@ -1176,9 +1599,10 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT) beta = param.angles[iMap].pos[1]; gam = param.angles[iMap].pos[2]; - //*** To see how things are going: -#ifdef DEBUG - cout << "Id " << omp_get_thread_num() << " Angs: " << alpha << " " << beta << " " << gam << "\n"; +//*** To see how things are going: +#ifdef DEBUG + cout << "Id " << omp_get_thread_num() << " Angs: " << alpha << " " << beta + << " " << gam << "\n"; #endif // ********** Creat Rotation with pre-defiend grid of orientations********** // Same notation as in Goldstein and Mathematica @@ -1191,308 +1615,446 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT) rotmat[2][0] = sin(beta) * sin(alpha); rotmat[2][1] = -sin(beta) * cos(alpha); rotmat[2][2] = cos(beta); + } -} - -// The rotation matrix is calculated either for the quaternions or for the euler angles - for(int n = 0; n < Model.nPointsModel; n++) - { - RotatedPointsModel[n].pos[0] = 0.0; - RotatedPointsModel[n].pos[1] = 0.0; - RotatedPointsModel[n].pos[2] = 0.0; - } - for(int n = 0; n < Model.nPointsModel; n++) + // The rotation matrix is calculated either for the quaternions or for the + // euler angles + for (int n = 0; n < Model.nPointsModel; n++) + { + RotatedPointsModel[n].pos[0] = 0.0; + RotatedPointsModel[n].pos[1] = 0.0; + RotatedPointsModel[n].pos[2] = 0.0; + } + for (int n = 0; n < Model.nPointsModel; n++) + { + for (int k = 0; k < 3; k++) + { + for (int j = 0; j < 3; j++) { - for(int k = 0; k < 3; k++) - { - for(int j = 0; j < 3; j++) - { - RotatedPointsModel[n].pos[k] += rotmat[k][j] * Model.points[n].point.pos[j]; - } - } + RotatedPointsModel[n].pos[k] += + rotmat[k][j] * Model.points[n].point.pos[j]; } + } + } - - - if(param.printrotmod) { - for(int n = 0; n < Model.nPointsModel; n++) cout << "ROTATED " << iMap << " " << n <<" "<< RotatedPointsModel[n].pos[0] << " " << RotatedPointsModel[n].pos[1] << " " << RotatedPointsModel[n].pos[2] << "\n"; - + if (param.printrotmod) + { + for (int n = 0; n < Model.nPointsModel; n++) + cout << "ROTATED " << iMap << " " << n << " " + << RotatedPointsModel[n].pos[0] << " " + << RotatedPointsModel[n].pos[1] << " " + << RotatedPointsModel[n].pos[2] << "\n"; } int i, j; - //*************** Creating projection **************************** + //*************** Creating projection **************************** //********** Projection with radius *************** int irad; myfloat_t dist, rad2; - myfloat_t tempden=0.0; + myfloat_t tempden = 0.0; - for(int n = 0; n < Model.nPointsModel; n++) + for (int n = 0; n < Model.nPointsModel; n++) + { + if (Model.points[n].radius <= param.pixelSize) { - if(Model.points[n].radius <= param.pixelSize){ - // cout << "Radius less than Pixel size: use keyword NO_PROJECT_RADIUS in inputfile\n"; - i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); - j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); - - if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels) - { - if (DebugOutput >= 0) cout << "WARNING:::: Model Point out of Projection map: " << i << ", " << j << "\n"; - // continue; - if(not param.ignorepointsout)exit(1); - } - - localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density; - tempden += Model.points[n].density; - - // exit(1); - }else{ - - //Getting Centers of Sphere - i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f) -param.shiftX; - j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f) -param.shiftY; - //Getting the radius - irad=int( Model.points[n].radius / param.pixelSize ) + 1; - rad2= Model.points[n].radius * Model.points[n].radius; - - if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels) - { - if (DebugOutput >= 0) cout << "WARNING::: Model Point out of Projection map: " << i << ", " << j << "\n"; - cout << "Model point " << n << "Rotation: " << iMap <<" "<< RotatedPointsModel[n].pos[0] << " " << RotatedPointsModel[n].pos[1] << " " << RotatedPointsModel[n].pos[2] << "\n"; - cout << "Original coor " << n <<" " << Model.points[n].point.pos[0] << " " << Model.points[n].point.pos[1] << " " <<Model.points[n].point.pos[2] << "\n"; - cout << "WARNING: Angle orient " << n << " " << param.angles[iMap].pos[0] << " " << param.angles[iMap].pos[1] << " " << param.angles[iMap].pos[2] << " out " << i << " " << j << "\n"; - cout << "WARNING: MPI rank " << mpi_rank <<"\n"; - // continue; - if(not param.ignorepointsout)exit(1); - } - - - //Projecting over the radius - for(int ii= i - irad; ii < i + irad + 1 ; ii++) - { - for(int jj = j - irad; jj < j + irad + 1 ; jj++) - { - dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) * param.pixelSize * param.pixelSize ; //at pixel center - if( dist < rad2 ) - { - localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density - * 3 / (4 * M_PI * Model.points[n].radius * rad2 ); - tempden += param.pixelSize * param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density - * 3 / (4 * M_PI * Model.points[n].radius * rad2 ); - } - } - } + // cout << "Radius less than Pixel size: use keyword NO_PROJECT_RADIUS + // in inputfile\n"; + i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); + j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); + + if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || + j >= param.param_device.NumberPixels) + { + if (DebugOutput >= 0) + cout << "WARNING:::: Model Point out of Projection map: " << i << ", " + << j << "\n"; + // continue; + if (not param.ignorepointsout) + exit(1); } + localproj[i * param.param_device.NumberPixels + j] += + Model.points[n].density; + tempden += Model.points[n].density; + + // exit(1); } + else + { - // To avoid numerical mismatch in projection errors we normalize by the initial density + // Getting Centers of Sphere + i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f) - + param.shiftX; + j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f) - + param.shiftY; + // Getting the radius + irad = int(Model.points[n].radius / param.pixelSize) + 1; + rad2 = Model.points[n].radius * Model.points[n].radius; + + if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || + j >= param.param_device.NumberPixels) + { + if (DebugOutput >= 0) + cout << "WARNING::: Model Point out of Projection map: " << i << ", " + << j << "\n"; + cout << "Model point " << n << "Rotation: " << iMap << " " + << RotatedPointsModel[n].pos[0] << " " + << RotatedPointsModel[n].pos[1] << " " + << RotatedPointsModel[n].pos[2] << "\n"; + cout << "Original coor " << n << " " << Model.points[n].point.pos[0] + << " " << Model.points[n].point.pos[1] << " " + << Model.points[n].point.pos[2] << "\n"; + cout << "WARNING: Angle orient " << n << " " + << param.angles[iMap].pos[0] << " " << param.angles[iMap].pos[1] + << " " << param.angles[iMap].pos[2] << " out " << i << " " << j + << "\n"; + cout << "WARNING: MPI rank " << mpi_rank << "\n"; + // continue; + if (not param.ignorepointsout) + exit(1); + } - myfloat_t ratioDen; - - ratioDen = Model.NormDen / tempden ; + // Projecting over the radius + for (int ii = i - irad; ii < i + irad + 1; ii++) + { + for (int jj = j - irad; jj < j + irad + 1; jj++) + { + dist = ((myfloat_t)(ii - i) * (ii - i) + (jj - j) * (jj - j)) * + param.pixelSize * param.pixelSize; // at pixel center + if (dist < rad2) + { + localproj[ii * param.param_device.NumberPixels + jj] += + param.pixelSize * param.pixelSize * 2 * sqrt(rad2 - dist) * + Model.points[n].density * 3 / + (4 * M_PI * Model.points[n].radius * rad2); + tempden += param.pixelSize * param.pixelSize * 2 * + sqrt(rad2 - dist) * Model.points[n].density * 3 / + (4 * M_PI * Model.points[n].radius * rad2); + } + } + } + } + } + + // To avoid numerical mismatch in projection errors we normalize by the + // initial density - for(int i = 0; i < param.param_device.NumberPixels ; i++){ - for(int j = 0; j < param.param_device.NumberPixels ; j++){ - localproj[ i * param.param_device.NumberPixels + j] *= ratioDen; - } - } + myfloat_t ratioDen; + + ratioDen = Model.NormDen / tempden; + + for (int i = 0; i < param.param_device.NumberPixels; i++) + { + for (int j = 0; j < param.param_device.NumberPixels; j++) + { + localproj[i * param.param_device.NumberPixels + j] *= ratioDen; + } + } - // **** Output Just to check**** +// **** Output Just to check**** #ifdef DEBUG // if(iMap == 0) { ofstream myexamplemap; ofstream myexampleRot; - myexamplemap.open ("MAP_i10"); - myexampleRot.open ("Rot_i10"); + myexamplemap.open("MAP_i10"); + myexampleRot.open("Rot_i10"); myexamplemap << "ANGLES " << alpha << " " << beta << " " << gam << "\n"; - for(int k = 0; k < param.param_device.NumberPixels; k++) - { - for(int j = 0; j < param.param_device.NumberPixels; j++) myexamplemap << "\nMAP " << k << " " << j << " " << localproj[k * param.param_device.NumberPixels + j]; - } + for (int k = 0; k < param.param_device.NumberPixels; k++) + { + for (int j = 0; j < param.param_device.NumberPixels; j++) + myexamplemap << "\nMAP " << k << " " << j << " " + << localproj[k * param.param_device.NumberPixels + j]; + } myexamplemap << " \n"; - for(int n = 0; n < Model.nPointsModel; n++)myexampleRot << "\nCOOR " << RotatedPointsModel[n].pos[0] << " " << RotatedPointsModel[n].pos[1] << " " << RotatedPointsModel[n].pos[2]; + for (int n = 0; n < Model.nPointsModel; n++) + myexampleRot << "\nCOOR " << RotatedPointsModel[n].pos[0] << " " + << RotatedPointsModel[n].pos[1] << " " + << RotatedPointsModel[n].pos[2]; myexamplemap.close(); myexampleRot.close(); } #endif - // ***** Converting projection to Fourier Space for Convolution later with kernel**** + // ***** Converting projection to Fourier Space for Convolution later with + // kernel**** // ********** Omp Critical is necessary with FFTW******* myfftw_execute_dft_r2c(param.fft_plan_r2c_forward, localproj, mapFFT); cuda_custom_timeslot_end; - return(0); + return (0); } -int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj, myfloat_t* Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC) +int bioem::createConvolutedProjectionMap(int iMap, int iConv, + mycomplex_t *lproj, + mycomplex_t *localmultFFT, + myfloat_t &sumC, myfloat_t &sumsquareC) { // ************************************************************************************** - // **** BioEM Create Convoluted Projection Map routine, multiplies in Fourier ********** - // **************** calculated Projection with convoluted precalculated Kernel*********** - // *************** and Backtransforming it to real Space ******************************** + // **** BioEM Create Convoluted Projection Map routine, multiplies in Fourier + // ********** + // **************** calculated Projection with convoluted precalculated + // Kernel*********** + // *************** and Backtransforming it to real Space + // ******************************** // ************************************************************************************** - cuda_custom_timeslot("Convolution", iMap, iConv, COLOR_CONVOLUTION); - mycomplex_t* tmp = param.fft_scratch_complex[omp_get_thread_num()]; - // **** Multiplying FFTmap of model with corresponding kernel ******* - const mycomplex_t* refCTF = ¶m.refCTF[iConv * param.FFTMapSize]; + const mycomplex_t *refCTF = ¶m.refCTF[iConv * param.FFTMapSize]; - for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D; i++) - { - localmultFFT[i][0] = ( lproj[i][0] * refCTF[i][0] + lproj[i][1] * refCTF[i][1] ) ; - localmultFFT[i][1] = ( lproj[i][1] * refCTF[i][0] - lproj[i][0] * refCTF[i][1] ) ; - } + for (int i = 0; i < param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D; + i++) + { + localmultFFT[i][0] = + (lproj[i][0] * refCTF[i][0] + lproj[i][1] * refCTF[i][1]); + localmultFFT[i][1] = + (lproj[i][1] * refCTF[i][0] - lproj[i][0] * refCTF[i][1]); + } - // *** Calculating Cross-correlations of cal-convoluted map with its self ***** (for BioEM formula) + // *** Calculating Cross-correlations of cal-convoluted map with its self + // ***** (for BioEM formula) sumC = localmultFFT[0][0]; //**** Calculating the second norm and storing it (for BioEM formula) sumsquareC = 0; - if (FFTAlgo) + + //*** With FFT algorithm + int jloopend = param.param_device.NumberFFTPixels1D; + if ((param.param_device.NumberPixels & 1) == 0) + jloopend--; + for (int i = 0; i < param.param_device.NumberPixels; i++) + { + for (int j = 1; j < jloopend; j++) { - //*** With FFT algorithm - int jloopend = param.param_device.NumberFFTPixels1D; - if ((param.param_device.NumberPixels & 1) == 0) jloopend--; - for(int i = 0; i < param.param_device.NumberPixels; i++) - { - for (int j = 1;j < jloopend;j++) - { - int k = i * param.param_device.NumberFFTPixels1D + j; - sumsquareC += (localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1]) * 2; - } - int k = i * param.param_device.NumberFFTPixels1D; - sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1]; - if ((param.param_device.NumberPixels & 1) == 0) - { - k += param.param_device.NumberFFTPixels1D - 1; - sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1]; - } - } - - myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels); - sumsquareC = sumsquareC / norm2; + int k = i * param.param_device.NumberFFTPixels1D + j; + sumsquareC += (localmultFFT[k][0] * localmultFFT[k][0] + + localmultFFT[k][1] * localmultFFT[k][1]) * + 2; } - else + int k = i * param.param_device.NumberFFTPixels1D; + sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + + localmultFFT[k][1] * localmultFFT[k][1]; + if ((param.param_device.NumberPixels & 1) == 0) { - //***** Slow No FFT *** + k += param.param_device.NumberFFTPixels1D - 1; + sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + + localmultFFT[k][1] * localmultFFT[k][1]; + } + } - //**** Backtransforming the convoluted map it into real space - //FFTW_C2R will destroy the input array, so we have to work on a copy here - memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); + myfloat_t norm2 = (myfloat_t)(param.param_device.NumberPixels * + param.param_device.NumberPixels); + sumsquareC = sumsquareC / norm2; - // **** Bringing convoluted Map to real Space **** - myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv); + cuda_custom_timeslot_end; - for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberPixels; i++) - { - sumsquareC += Mapconv[i] * Mapconv[i]; - // cout << "CONV " << i << " " << Mapconv[i] << "\n"; - } + return (0); +} - myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels); - myfloat_t norm4 = norm2 * norm2; - sumsquareC = sumsquareC / norm4; - } +int bioem::createConvolutedProjectionMap_noFFT(mycomplex_t *lproj, + myfloat_t *Mapconv, + mycomplex_t *localmultFFT, + myfloat_t &sumC, + myfloat_t &sumsquareC) +{ + // ************************************************************************************** + // **** BioEM Create Convoluted Projection Map routine, multiplies in Fourier + // ********** + // **************** calculated Projection with convoluted precalculated + // Kernel*********** + // *************** and Backtransforming it to real Space + // ******************************** + // ************************************************************************************** + // *************** This routine is only for printing Model + // ****************************** + // ************************************************************************************** + + mycomplex_t *tmp = param.fft_scratch_complex[omp_get_thread_num()]; + + // **** Multiplying FFTmap of model with corresponding kernel ******* + const mycomplex_t *refCTF = param.refCTF; + + for (int i = 0; i < param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D; + i++) + { + localmultFFT[i][0] = + (lproj[i][0] * refCTF[i][0] + lproj[i][1] * refCTF[i][1]); + localmultFFT[i][1] = + (lproj[i][1] * refCTF[i][0] - lproj[i][0] * refCTF[i][1]); + } + // *** Calculating Cross-correlations of cal-convoluted map with its self + // ***** (for BioEM formula) + sumC = localmultFFT[0][0]; + + //**** Calculating the second norm and storing it (for BioEM formula) + sumsquareC = 0; + + //***** Slow No FFT *** + //**** Backtransforming the convoluted map it into real space + // FFTW_C2R will destroy the input array, so we have to work on a copy here + memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * + param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D); + + // **** Bringing convoluted Map to real Space **** + myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv); + + for (int i = 0; + i < param.param_device.NumberPixels * param.param_device.NumberPixels; + i++) + { + sumsquareC += Mapconv[i] * Mapconv[i]; + // cout << "CONV " << i << " " << Mapconv[i] << "\n"; + } + + myfloat_t norm2 = (myfloat_t)(param.param_device.NumberPixels * + param.param_device.NumberPixels); + myfloat_t norm4 = norm2 * norm2; + sumsquareC = sumsquareC / norm4; // ************************************************************************************** - // *********** Routine for printing out the best projetion ****************************** + // *********** Routine for printing out the best projetion + // ****************************** // ************************************************************************************** - if (mpi_rank == 0 && param.printModel) - { -// MTRand mtr; - bran::mt19937 gen; - //Generating random seed so the maps do not have correlated Noise - gen.seed(static_cast<unsigned int>(std::time(0))); - //Uniform Noise: bran::uniform_int_distribution<> dist(1, 6); - - //Gaussian noise - bran::normal_distribution <> distn(0.0,param.stnoise); - - memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); - - // **** Bringing convoluted Map to real Space **** - myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv); - - myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels); - - ofstream myexamplemap; - myexamplemap.open ("BESTMAP"); - for(int k = 0; k < param.param_device.NumberPixels; k++) - { - for(int j = 0; j < param.param_device.NumberPixels; j++) { - if(!param.withnoise){ - myexamplemap << "\nMAP " << k+param.ddx << " " << j+param.ddy << " " << Mapconv[k * param.param_device.NumberPixels + j] / norm2 *param.bestnorm +param.bestoff ; - } else { - myexamplemap << "\nMAP " << k+param.ddx << " " << j+param.ddy << " " << Mapconv[k * param.param_device.NumberPixels + j] / norm2 *param.bestnorm +param.bestoff+distn(gen); -// cout << distn(gen) << "CHECK\n"; - } - } - myexamplemap << " \n"; - } - myexamplemap.close(); - - cout << "\n\nBest map printed in file: BESTMAP with gnuplot format in columns 2, 3 and 4. \n\n\n"; - exit(1); + // Calling random number routine from MersenneTwister.h + MTRand mtr; - } + // Generating random seed so the maps do not have correlated Noise + mtr.seed(static_cast<unsigned int>(std::time(0))); - cuda_custom_timeslot_end; + memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * + param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D); + + // **** Bringing convoluted Map to real Space **** + myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv); + + // Calculating the cross-correlation to the ref maps + // PILAR WORK RefMap.maps + if (param.BestmapCalcCC) + { + myfloat_t ccbm = 0.; + int kk, jj; + + for (int k = 0; k < param.param_device.NumberPixels; k++) + { + for (int j = 0; j < param.param_device.NumberPixels; j++) + { + // Missing periodicity and centers; + kk = k; + jj = j; + if (k - param.ddx < 0) + kk = param.param_device.NumberPixels - (k - param.ddx); + if (j - param.ddy < 0) + jj = param.param_device.NumberPixels - (j - param.ddy); + if (k - param.ddx >= param.param_device.NumberPixels) + kk = k - param.ddx - param.param_device.NumberPixels; + if (j - param.ddy >= param.param_device.NumberPixels) + jj = j - param.ddy - param.param_device.NumberPixels; + + ccbm += (Mapconv[kk * param.param_device.NumberPixels + jj] / norm2 * + param.bestnorm - + RefMap.maps[k * param.param_device.NumberPixels + j]) * + (Mapconv[kk * param.param_device.NumberPixels + jj] / norm2 * + param.bestnorm - + RefMap.maps[k * param.param_device.NumberPixels + j]); + } + } + cout << "CROSS CORELATION " << ccbm << "\n"; + } + else + { + ofstream myexamplemap; + myexamplemap.open("BESTMAP"); + for (int k = 0; k < param.param_device.NumberPixels; k++) + { + for (int j = 0; j < param.param_device.NumberPixels; j++) + { + if (!param.withnoise) + { + myexamplemap << "\nMAP " << k + param.ddx << " " << j + param.ddy + << " " + << Mapconv[k * param.param_device.NumberPixels + j] / + norm2 * param.bestnorm + + param.bestoff; + if (k + param.ddx < param.param_device.NumberPixels && + j + param.ddy < param.param_device.NumberPixels) + { + myexamplemap + << "\nMAPddx " << k << " " << j << " " + << Mapconv[(k - param.ddx) * param.param_device.NumberPixels + + j - param.ddy] / + norm2 * param.bestnorm + + param.bestoff; + } + } + else + { + myexamplemap << "\nMAP " << k + param.ddx << " " << j + param.ddy + << " " + << Mapconv[k * param.param_device.NumberPixels + j] / + norm2 * param.bestnorm + + param.bestoff + + mtr.randNorm(0.0, + param.stnoise); //\\+ distn(gen); + // cout << distn(gen) << "CHECK\n"; + } + } + myexamplemap << " \n"; + } + myexamplemap.close(); - return(0); + cout << "\n\nBest map printed in file: BESTMAP with gnuplot format in " + "columns 2, 3 and 4. \n\n\n"; + } + return (0); } -int bioem::calcross_cor(myfloat_t* localmap, myfloat_t& sum, myfloat_t& sumsquare) +int bioem::calcross_cor(myfloat_t *localmap, myfloat_t &sum, + myfloat_t &sumsquare) { - // *********************** Routine to calculate Cross correlations*********************** + // *********************** Routine to calculate Cross + // correlations*********************** sum = 0.0; sumsquare = 0.0; for (int i = 0; i < param.param_device.NumberPixels; i++) + { + for (int j = 0; j < param.param_device.NumberPixels; j++) { - for (int j = 0; j < param.param_device.NumberPixels; j++) - { - // Calculate Sum of pixels - sum += localmap[i * param.param_device.NumberPixels + j]; - // Calculate Sum of pixels squared - sumsquare += localmap[i * param.param_device.NumberPixels + j] * localmap[i * param.param_device.NumberPixels + j]; - } + // Calculate Sum of pixels + sum += localmap[i * param.param_device.NumberPixels + j]; + // Calculate Sum of pixels squared + sumsquare += localmap[i * param.param_device.NumberPixels + j] * + localmap[i * param.param_device.NumberPixels + j]; } - return(0); + } + return (0); } -int bioem::deviceInit() -{ - return(0); -} +int bioem::deviceInit() { return (0); } -int bioem::deviceStartRun() -{ - return(0); -} +int bioem::deviceStartRun() { return (0); } -int bioem::deviceFinishRun() -{ - return(0); -} +int bioem::deviceFinishRun() { return (0); } -void* bioem::malloc_device_host(size_t size) -{ - return(mallocchk(size)); -} +void *bioem::malloc_device_host(size_t size) { return (mallocchk(size)); } -void bioem::free_device_host(void* ptr) -{ - free(ptr); -} +void bioem::free_device_host(void *ptr) { free(ptr); } void bioem::rebalanceWrapper(int workload) { diff --git a/bioem_algorithm.h b/bioem_algorithm.h index c1936efe85e6e38998f7c1f5b4caf566fec361df..494ba6599d51fd63d27b895070beca3ae5ffe1fb 100644 --- a/bioem_algorithm.h +++ b/bioem_algorithm.h @@ -1,505 +1,200 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ #ifndef BIOEM_ALGORITHM_H #define BIOEM_ALGORITHM_H -//#include <boost/iterator/iterator_concepts.hpp> -#ifndef BIOEM_GPUCODE -//#define SSECODE //Explicit SSE code, not correct yet since loop counter is assumed multiple of 4, anyway not faster than autovectorized code, only implemented for float, not for double. -#endif - -#ifdef SSECODE -#include <emmintrin.h> -#include <smmintrin.h> -#endif - -template <int GPUAlgo> -__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, bool doAngle, myfloat_t* buf3 = NULL, int* bufint = NULL) -{ - - // *********** Not using FFT ALGORITHM ****************** - //*********** Routine to perform the numerical BioEM intergal *********** - - // ******* Summing total Probabilities ************* - - bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); - - // ******* Need a constant because of numerical divergence***** - if(pProbMap.Constoadd < logpro) - { - pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd); - pProbMap.Constoadd = logpro; - - // ********** Getting parameters that maximize the probability *********** - if (GPUAlgo == 2) - { - bufint[0] = 1; - buf3[1] = logpro; - } - else - { - pProbMap.max.max_prob_cent_x = - cent_x; - pProbMap.max.max_prob_cent_y = - cent_y; - } - pProbMap.max.max_prob_orient = iOrient; - pProbMap.max.max_prob_conv = iConv; - // pProbMap.max.max_prob_norm = - ( -sumC * RefMap.sum_RefMap[iRefMap] + param.Ntotpi * value ) / ( sumC * sumC - sumsquareC * param.Ntotpi); - // pProbMap.max.max_prob_mu = - ( -sumC * value + sumsquareC * RefMap.sum_RefMap[iRefMap] ) / ( sumC * sumC - sumsquareC * param.Ntotpi); - } - if (GPUAlgo != 2) pProbMap.Total += exp(logpro - pProbMap.Constoadd); - - if (doAngle) - { - bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); - - //Summing probabilities for each orientation - if(pProbAngle.ConstAngle < logpro) - { - pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle); - pProbAngle.ConstAngle = logpro; - } - - if (GPUAlgo != 2) pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle); - } -} - -__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref) +__device__ static inline myprob_t +calc_logpro(const bioem_param_device ¶m, const myfloat_t amp, + const myfloat_t pha, const myfloat_t env, const myfloat_t sum, + const myfloat_t sumsquare, const myfloat_t crossproMapConv, + const myfloat_t sumref, const myfloat_t sumsquareref) { //*** MAIN ROUTINE TO CALCULATE THE LOGPRO FOR ALL KERNELS*************// // **** calculate the log posterior of Eq. of Pmw in SI of JSB paper ***// // Related to Reference calculated Projection - const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum); + const myprob_t ForLogProb = sumsquare * param.Ntotpi - sum * sum; // Products of different cross-correlations (first element in formula) - const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) + - 2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare; + const myprob_t firstele = + param.Ntotpi * + (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) + + 2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - + sumref * sumref * sumsquare; /// ******* Calculating log of Prob********* - // As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1); - - myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb); - - //*************Adding Gaussian Prior to envelope & Defocus parameter****************** - - if(not param.tousepsf){ - logpro = logpro - env * env / 2. / param.sigmaPriorbctf / param.sigmaPriorbctf - - (pha - param.Priordefcent ) * (pha - param.Priordefcent ) / 2. / param.sigmaPriordefo / param.sigmaPriordefo ; - } else { - myfloat_t envF,phaF; - envF = 4.* M_PI * M_PI * env / ( env * env + pha * pha) ; - phaF = 4.* M_PI * M_PI * pha / ( env * env + pha * pha); - logpro = logpro - envF * envF / 2. / param.sigmaPriorbctf / param.sigmaPriorbctf - (phaF - param.Priordefcent ) * (phaF - param.Priordefcent ) / 2. / param.sigmaPriordefo / param.sigmaPriordefo ; + // As in fortran code: + // logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1); + + myprob_t logpro = + (3 - param.Ntotpi) * 0.5 * log(firstele) + + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb); + + //*************Adding Gaussian Prior to envelope & Defocus + // parameter****************** + + if (not param.tousepsf) + { + logpro -= env * env / 2. / param.sigmaPriorbctf / param.sigmaPriorbctf - + (pha - param.Priordefcent) * (pha - param.Priordefcent) / 2. / + param.sigmaPriordefo / param.sigmaPriordefo - + (amp - param.Priorampcent) * (amp - param.Priorampcent) / 2. / + param.sigmaPrioramp / param.sigmaPrioramp; + } + else + { + myprob_t envF, phaF; + envF = 4. * M_PI * M_PI * env / (env * env + pha * pha); + phaF = 4. * M_PI * M_PI * pha / (env * env + pha * pha); + logpro -= envF * envF / 2. / param.sigmaPriorbctf / param.sigmaPriorbctf - + (phaF - param.Priordefcent) * (phaF - param.Priordefcent) / 2. / + param.sigmaPriordefo / param.sigmaPriordefo - + (amp - param.Priorampcent) * (amp - param.Priorampcent) / 2. / + param.sigmaPrioramp / param.sigmaPrioramp; } - return(logpro); + return (logpro); } -__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) +__device__ static inline void +calProb(int iRefMap, int iOrient, int iConv, const myfloat_t amp, + const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, + const myfloat_t sumsquareC, myfloat_t value, int disx, int disy, + bioem_Probability &pProb, const bioem_param_device ¶m, + const bioem_RefMap &RefMap) { // IMPORTANT ROUTINE Summation of LogProb using FFTALGO // ******************************************************** // *********** Calculates the BioEM probability *********** // ******************************************************** - myfloat_t logpro = calc_logpro(param, amp, pha, env, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); + myfloat_t logpro = + calc_logpro(param, amp, pha, env, sumC, sumsquareC, value, + RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); - //GCC is too stupid to inline properly, so the code is copied here - //update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb, param.writeAngles); - - bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); - - // printf("Separate PtotBef: %f Const: %f logProb %f %d %d %d \n",pProbMap.Total,pProbMap.Constoadd,logpro,iRefMap,iOrient,iConv); - if(pProbMap.Constoadd < logpro) - { - pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd); - pProbMap.Constoadd = logpro; +#ifdef DEBUG_PROB + printf("\t\t\tProb: iRefMap %d, iOrient %d, iConv %d, " + "disx %d, disy %d, address -, value %f, logpro %f\n", + iRefMap, iOrient, iConv, disx, disy, value, logpro); +#endif - // ********** Getting parameters that maximize the probability *********** - pProbMap.max.max_prob_cent_x = - disx; - pProbMap.max.max_prob_cent_y = - disy; - pProbMap.max.max_prob_orient = iOrient; - pProbMap.max.max_prob_conv = iConv; - pProbMap.max.max_prob_norm = - ( -sumC * RefMap.sum_RefMap[iRefMap] + param.Ntotpi * value ) / ( sumC * sumC - sumsquareC * param.Ntotpi); - pProbMap.max.max_prob_mu = - ( -sumC * value + sumsquareC * RefMap.sum_RefMap[iRefMap] ) / ( sumC * sumC - sumsquareC * param.Ntotpi); - } + bioem_Probability_map &pProbMap = pProb.getProbMap(iRefMap); + + if (pProbMap.Constoadd < logpro) + { + pProbMap.Total *= exp(-logpro + pProbMap.Constoadd); + pProbMap.Constoadd = logpro; + + // ********** Getting parameters that maximize the probability *********** + pProbMap.max.max_prob_cent_x = -disx; + pProbMap.max.max_prob_cent_y = -disy; + pProbMap.max.max_prob_orient = iOrient; + pProbMap.max.max_prob_conv = iConv; + pProbMap.max.max_prob_norm = + -(-sumC * RefMap.sum_RefMap[iRefMap] + param.Ntotpi * value) / + (sumC * sumC - sumsquareC * param.Ntotpi); + pProbMap.max.max_prob_mu = + -(-sumC * value + sumsquareC * RefMap.sum_RefMap[iRefMap]) / + (sumC * sumC - sumsquareC * param.Ntotpi); + +#ifdef DEBUG_PROB + printf("\tProbabilities change: iRefMap %d, iOrient %d, iConv %d, Total " + "%f, Const %f, bestlogpro %f, sumExp -, bestId -\n", + iRefMap, iOrient, iConv, pProbMap.Total, pProbMap.Constoadd, logpro); + printf("\tParameters: iConv %d, myX -, myY -, disx %d, disy %d, probX " + "%d, probY %d\n", + iConv, disx, disy, pProbMap.max.max_prob_cent_x, + pProbMap.max.max_prob_cent_y); +#endif + } pProbMap.Total += exp(logpro - pProbMap.Constoadd); - +#ifdef DEBUG_PROB + printf("\t\tProbabilities after Sum: iRefMap %d, iOrient %d, iConv %d, " + "Total %f, Const %f, bestlogpro %f, sumExp -, bestId -\n", + iRefMap, iOrient, iConv, pProbMap.Total, pProbMap.Constoadd, logpro); +#endif if (param.writeAngles) - { - bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); - // if(iOrient==1)printf("Separate Ptot: %f Const: %f logProb %f param: %d %d %d \n",logpro,pProbAngle.ConstAngle,pProbAngle.forAngles,disx,disx,iOrient); - - if(pProbAngle.ConstAngle < logpro) - { - pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle); - pProbAngle.ConstAngle = logpro; - } - pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle); - // if(iOrient==5)printf("After separate Ptot: %f Const: %f logProb %f \n",logpro,pProbAngle.ConstAngle,pProbAngle.forAngles); - } + { + bioem_Probability_angle &pProbAngle = pProb.getProbAngle(iRefMap, iOrient); -} - -__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) -{ - //******************** Using FFT algorithm ************************** - //******************* Get cross-crollation of Ical to Iobs ******************* - //*********** Routine to get the Cross-Corellation from lCC for the interested center displacement ************* - - for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter) + if (pProbAngle.ConstAngle < logpro) { - for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter) - { - calProb(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap); - } - for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter) - { - calProb(iRefMap, iOrient, iConv,amp, pha, env, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap); - } + pProbAngle.forAngles *= exp(-logpro + pProbAngle.ConstAngle); + pProbAngle.ConstAngle = logpro; } - for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter) - { - for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter) - { - calProb(iRefMap, iOrient, iConv,amp, pha, env, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap); - } - for (int cent_y = param.NumberPixels - param.maxDisplaceCenter ; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter) - { - calProb(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap); - } - } - - //************ The following if is not in the manual*********** - if (param.writeCC) - { - // If the Cross-correlation is to be written out and stored using Bayesian analysis - int cc=0; - for (int cent_x = 0; cent_x < param.NumberPixels ; cent_x = cent_x + param.CCdisplace) - { - for (int cent_y = 0; cent_y < param.NumberPixels ; cent_y = cent_y + param.CCdisplace) - { - - - bioem_Probability_cc& pProbCC = pProb.getProbCC(iRefMap, cc); - - myfloat_t ttmp,ttmp2; - ttmp2 = (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels); - - if(not param.flipped){ - //Here we are inverting the sign of the cross-correlation for the images that are not flipped - ttmp=-ttmp2; } - else{ ttmp=ttmp2; } - - if(!param.CCwithBayes){ - // Storing Only the Maximum both for flipped and not flipped - - if(pProbCC.forCC < ttmp) pProbCC.forCC = ttmp; - - }else { - // Storing the Cross-correlation with Bayesian formalism - if(pProbCC.ConstCC < ttmp) - { - pProbCC.forCC = pProbCC.forCC * exp(-ttmp + pProbCC.ConstCC); - pProbCC.ConstCC = ttmp; - } - pProbCC.forCC += exp(ttmp - pProbCC.ConstCC); - - } - // printf("Separate %d %d Ptot: %f Const: %f logProb %f \n",cent_x,cent_y,pProbCC.forCC,pProbCC.ConstCC,ttmp); - cc++; - - } - } - } - - + pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle); + } } -template <int GPUAlgo, class RefT> - __device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, - const myfloat_t sumsquareC, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap, - const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true) +__device__ static inline void +doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, + const myfloat_t amp, const myfloat_t pha, const myfloat_t env, + const myfloat_t sumC, const myfloat_t sumsquareC, + const myfloat_t *lCC, bioem_Probability &pProb, + const bioem_param_device ¶m, const bioem_RefMap &RefMap) { - - //********************* Non FOURIER ALGORITHMS (refer to David) *********** - // ********************** Calculating BioEM Probability ******************************** - // ************************* Loop of center displacement here *************************** - - // Taking into account the center displacement - - // Inizialzing crosscorrelations of calculated projected convolutions -#ifdef SSECODE - myfloat_t sum, sumsquare, crossproMapConv; - __m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2; -#else - myfloat_t sum = 0.0; - myfloat_t sumsquare = 0.0; - myfloat_t crossproMapConv = 0.0; -#endif - // Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map - myfloat_t logpro; - if (GPUAlgo != 2 || threadActive) + //******************** Using FFT algorithm ************************** + //******************* Get cross-crollation of Ical to Iobs ******************* + //*********** Routine to get the Cross-Corellation from lCC for the interested + // center displacement ************* + + for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; + cent_x = cent_x + param.GridSpaceCenter) + { + for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; + cent_y = cent_y + param.GridSpaceCenter) { - int iStart, jStart, iEnd, jEnd; - if (cent_x < 0) - { - iStart = -cent_x; - iEnd = param.NumberPixels; - } - else - { - iStart = 0; - iEnd = param.NumberPixels - cent_x; - } - if (cent_y < 0) - { - jStart = -cent_y; - jEnd = param.NumberPixels; - } - else - { - jStart = 0; - jEnd = param.NumberPixels - cent_y; - } - - for (int i = iStart; i < iEnd; i += 1) - { -#ifdef SSECODE - const float* ptr1 = &Mapconv.points[i + cent_x][jStart + cent_y]; - const float* ptr2 = RefMap.getp(iRefMap, i, jStart); - int j; - const int count = jEnd - jStart; - for (j = 0; j <= count - 4; j += 4) - { - d1 = _mm_loadu_ps(ptr1); - d2 = _mm_loadu_ps(ptr2); - sum_v = _mm_add_ps(sum_v, d1); - sumsquare_v = _mm_add_ps(sumsquare_v, _mm_mul_ps(d1, d1)); - cross_v = _mm_add_ps(cross_v, _mm_mul_ps(d1, d2)); - ptr1 += 4; - ptr2 += 4; - } -#else - for (int j = jStart; j < jEnd; j += 1) - { - const myfloat_t pointMap = Mapconv[(i + cent_x) * param.NumberPixels + j + cent_y]; - const myfloat_t pointRefMap = RefMap.get(iRefMap, i, j); - crossproMapConv += pointMap * pointRefMap; - // Crosscorrelation of calculated displaced map - sum += pointMap; - // Calculate Sum of pixels squared - sumsquare += pointMap * pointMap; - } -#endif - } -#ifdef SSECODE - sum_v = _mm_hadd_ps(sum_v, sum_v); - sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v); - cross_v = _mm_hadd_ps(cross_v, cross_v); - sum_v = _mm_hadd_ps(sum_v, sum_v); - sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v); - cross_v = _mm_hadd_ps(cross_v, cross_v); - sum = _mm_cvtss_f32(sum_v); - sumsquare = _mm_cvtss_f32(sumsquare_v); - crossproMapConv = _mm_cvtss_f32(cross_v); -#endif - - // Calculating elements in BioEM Probability formula - logpro = calc_logpro(param, amp, pha, env, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); + calProb(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, + (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / + (myfloat_t)(param.NumberPixels * param.NumberPixels), + cent_x, cent_y, pProb, param, RefMap); } - else + for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; + cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter) { - logpro = 0; + calProb(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, + (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / + (myfloat_t)(param.NumberPixels * param.NumberPixels), + cent_x, cent_y - param.NumberPixels, pProb, param, RefMap); } + } -#ifdef BIOEM_GPUCODE - if (GPUAlgo == 2) - { - extern __shared__ myfloat_t buf[]; - myfloat_t* buf2 = &buf[myBlockDimX]; - myfloat_t* buf3 = &buf2[myBlockDimX + 4 * myRef]; - int* bufint = (int*) buf3; - - buf[myThreadIdxX] = logpro; - if (myShift == 0) - { - bufint[0] = 0; - } - __syncthreads(); - - if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024 - { - if (myShift < 512) if (buf[myThreadIdxX + 512] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 512]; - __syncthreads(); - } - - if (nShifts2 >= 512) - { - if (myShift < 256) if (buf[myThreadIdxX + 256] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 256]; - __syncthreads(); - } - - if (nShifts2 >= 256) - { - if (myShift < 128) if (buf[myThreadIdxX + 128] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 128]; - __syncthreads(); - } - - if (nShifts2 >= 128) - { - if (myShift < 64) if (buf[myThreadIdxX + 64] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 64]; - __syncthreads(); - } - - if (myShift < 32) //Warp Size is 32, threads are synched automatically - { - volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered - if (nShifts2 >= 64 && vbuf[myThreadIdxX + 32] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 32]; - if (nShifts2 >= 32 && vbuf[myThreadIdxX + 16] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 16]; - if (nShifts2 >= 16 && vbuf[myThreadIdxX + 8] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 8]; - if (nShifts2 >= 8 && vbuf[myThreadIdxX + 4] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 4]; - if (nShifts2 >= 4 && vbuf[myThreadIdxX + 2] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 2]; - if (nShifts2 >= 2 && vbuf[myThreadIdxX + 1] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 1]; - if (myShift == 0 && iRefMap < RefMap.ntotRefMap) - { - const myfloat_t logpro_max = vbuf[myThreadIdxX]; - update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, param.writeAngles, buf3, bufint); - } - } - - __syncthreads(); - - bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); - bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); - - if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1) - { - pProbMap.max.max_prob_cent_x = - cent_x; - pProbMap.max.max_prob_cent_y = - cent_y; - } - - __syncthreads(); - - if (iRefMap < RefMap.ntotRefMap) - { - buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd); - buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle); - } - __syncthreads(); - - if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024 - { - if (myShift < 512) - { - buf[myThreadIdxX] += buf[myThreadIdxX + 512]; - buf2[myThreadIdxX] += buf2[myThreadIdxX + 512]; - } - __syncthreads(); - } - - if (nShifts2 >= 512) - { - if (myShift < 256) - { - buf[myThreadIdxX] += buf[myThreadIdxX + 256]; - buf2[myThreadIdxX] += buf2[myThreadIdxX + 256]; - } - __syncthreads(); - } - - if (nShifts2 >= 256) - { - if (myShift < 128) - { - buf[myThreadIdxX] += buf[myThreadIdxX + 128]; - buf2[myThreadIdxX] += buf2[myThreadIdxX + 128]; - } - __syncthreads(); - } - - if (nShifts2 >= 128) - { - if (myShift < 64) - { - buf[myThreadIdxX] += buf[myThreadIdxX + 64]; - buf2[myThreadIdxX] += buf2[myThreadIdxX + 64]; - } - __syncthreads(); - } - - if (myShift < 32) //Warp Size is 32, threads are synched automatically - { - volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered - volatile myfloat_t* vbuf2 = buf2; - if (nShifts2 >= 64) - { - vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 32]; - vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 32]; - } - if (nShifts2 >= 32) - { - vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 16]; - vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 16]; - } - if (nShifts2 >= 16) - { - vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 8]; - vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 8]; - } - if (nShifts2 >= 8) - { - vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 4]; - vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 4]; - } - if (nShifts2 >= 4) - { - vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 2]; - vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 2]; - } - if (nShifts2 >= 2) - { - vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 1]; - vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 1]; - } - if (myShift == 0 && iRefMap < RefMap.ntotRefMap) - { - pProbMap.Total += vbuf[myThreadIdxX]; - pProbAngle.forAngles += vbuf2[myThreadIdxX]; - } - } - } - else -#endif + for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; + cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter) + { + for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; + cent_y = cent_y + param.GridSpaceCenter) { - update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb, param.writeAngles); + calProb(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, + (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / + (myfloat_t)(param.NumberPixels * param.NumberPixels), + cent_x - param.NumberPixels, cent_y, pProb, param, RefMap); } -} - -template <int GPUAlgo, class RefT> - __device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, const myfloat_t sumsquareC, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap) -{ - for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter) + for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; + cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter) { - for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter) - { - compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, Mapconv, pProb, param, RefMap, cent_x, cent_y); - } + calProb(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, + (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / + (myfloat_t)(param.NumberPixels * param.NumberPixels), + cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, + param, RefMap); } + } } #endif diff --git a/bioem_cuda.cu b/bioem_cuda.cu index 71a5c97ee2f30633ad69d64f139fc5cac4064798..75b6ee72e4c6bc6285280e956b908ee2ac39869b 100644 --- a/bioem_cuda.cu +++ b/bioem_cuda.cu @@ -1,9 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -18,715 +22,1076 @@ using namespace std; #include "bioem_cuda_internal.h" -#include "bioem_algorithm.h" //#include "helper_cuda.h" -#define checkCudaErrors(error) \ -{ \ - if ((error) != cudaSuccess) \ - { \ - printf("CUDA Error %d / %s (%s: %d)\n", error, cudaGetErrorString(error), __FILE__, __LINE__); \ - exit(1); \ - } \ -} +#include "bioem_algorithm.h" + +#define checkCudaErrors(error) \ + { \ + if ((error) != cudaSuccess) \ + { \ + printf("CUDA Error %d / %s (%s: %d)\n", error, \ + cudaGetErrorString(error), __FILE__, __LINE__); \ + exit(1); \ + } \ + } + +#ifdef DEBUG_GPU +#define printCudaDebugStart() \ + float time; \ + time = 0.; \ + cudaEvent_t start, stop; \ + checkCudaErrors(cudaEventCreate(&start)); \ + checkCudaErrors(cudaEventCreate(&stop)); \ + checkCudaErrors(cudaEventRecord(start, 0)); +#define printCudaDebug(msg) \ + checkCudaErrors(cudaEventRecord(stop, 0)); \ + checkCudaErrors(cudaEventSynchronize(stop)); \ + checkCudaErrors(cudaEventElapsedTime(&time, start, stop)); \ + printf("\t\t\tGPU: %s %1.6f sec\n", msg, time / 1000); \ + checkCudaErrors(cudaEventRecord(start, 0)); + +#else +#define printCudaDebugStart() +#define printCudaDebug(msg) +#endif static const char *cufftGetErrorStrung(cufftResult error) { - switch (error) - { - case CUFFT_SUCCESS: - return "CUFFT_SUCCESS"; + switch (error) + { + case CUFFT_SUCCESS: + return "CUFFT_SUCCESS"; - case CUFFT_INVALID_PLAN: - return "CUFFT_INVALID_PLAN"; + case CUFFT_INVALID_PLAN: + return "CUFFT_INVALID_PLAN"; - case CUFFT_ALLOC_FAILED: - return "CUFFT_ALLOC_FAILED"; + case CUFFT_ALLOC_FAILED: + return "CUFFT_ALLOC_FAILED"; - case CUFFT_INVALID_TYPE: - return "CUFFT_INVALID_TYPE"; + case CUFFT_INVALID_TYPE: + return "CUFFT_INVALID_TYPE"; - case CUFFT_INVALID_VALUE: - return "CUFFT_INVALID_VALUE"; + case CUFFT_INVALID_VALUE: + return "CUFFT_INVALID_VALUE"; - case CUFFT_INTERNAL_ERROR: - return "CUFFT_INTERNAL_ERROR"; + case CUFFT_INTERNAL_ERROR: + return "CUFFT_INTERNAL_ERROR"; - case CUFFT_EXEC_FAILED: - return "CUFFT_EXEC_FAILED"; + case CUFFT_EXEC_FAILED: + return "CUFFT_EXEC_FAILED"; - case CUFFT_SETUP_FAILED: - return "CUFFT_SETUP_FAILED"; + case CUFFT_SETUP_FAILED: + return "CUFFT_SETUP_FAILED"; - case CUFFT_INVALID_SIZE: - return "CUFFT_INVALID_SIZE"; + case CUFFT_INVALID_SIZE: + return "CUFFT_INVALID_SIZE"; - case CUFFT_UNALIGNED_DATA: - return "CUFFT_UNALIGNED_DATA"; - } - return "UNKNOWN"; + case CUFFT_UNALIGNED_DATA: + return "CUFFT_UNALIGNED_DATA"; + } + return "UNKNOWN"; } /* Handing CUDA Driver errors */ -#define cuErrorCheck(call) \ - do { \ - CUresult __error__; \ - if ((__error__ = (call)) != CUDA_SUCCESS) { \ - printf("CUDA Driver Error %d / %s (%s %d)\n", __error__, cuGetError(__error__),__FILE__, __LINE__); \ - return __error__; \ - } \ +#define cuErrorCheck(call) \ + do \ + { \ + CUresult __error__; \ + if ((__error__ = (call)) != CUDA_SUCCESS) \ + { \ + printf("CUDA Driver Error %d / %s (%s %d)\n", __error__, \ + cuGetError(__error__), __FILE__, __LINE__); \ + return __error__; \ + } \ } while (false) -static const char * cuGetError(CUresult result) { - switch (result) { - case CUDA_SUCCESS: return "No errors"; - case CUDA_ERROR_INVALID_VALUE: return "Invalid value"; - case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory"; - case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized"; - case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized"; - case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled"; - case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized"; - case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started"; - case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped"; - case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available"; - case CUDA_ERROR_INVALID_DEVICE: return "Invalid device"; - case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image"; - case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context"; - case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current"; - case CUDA_ERROR_MAP_FAILED: return "Map failed"; - case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed"; - case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped"; - case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped"; - case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU"; - case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired"; - case CUDA_ERROR_NOT_MAPPED: return "Not mapped"; - case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Not mapped as array"; - case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Not mapped as pointer"; - case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error"; - case CUDA_ERROR_UNSUPPORTED_LIMIT: return "Unsupported CUlimit"; - case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use"; - case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; - case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; - case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Shared object symbol not found"; - case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed"; - case CUDA_ERROR_OPERATING_SYSTEM: return "Operating System call failed"; - case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle"; - case CUDA_ERROR_NOT_FOUND: return "Not found"; - case CUDA_ERROR_NOT_READY: return "CUDA not ready"; - case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed"; - case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources"; - case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout"; - case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing"; - case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled"; - case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled"; - case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active"; - case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed"; - case CUDA_ERROR_ASSERT: return "Device assert failed"; - case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers"; - case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered"; - case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered"; - case CUDA_ERROR_UNKNOWN: return "Unknown error"; - default: return "Unknown error code"; +static const char *cuGetError(CUresult result) +{ + switch (result) + { + case CUDA_SUCCESS: + return "No errors"; + case CUDA_ERROR_INVALID_VALUE: + return "Invalid value"; + case CUDA_ERROR_OUT_OF_MEMORY: + return "Out of memory"; + case CUDA_ERROR_NOT_INITIALIZED: + return "Driver not initialized"; + case CUDA_ERROR_DEINITIALIZED: + return "Driver deinitialized"; + case CUDA_ERROR_PROFILER_DISABLED: + return "Profiler disabled"; + case CUDA_ERROR_PROFILER_NOT_INITIALIZED: + return "Profiler not initialized"; + case CUDA_ERROR_PROFILER_ALREADY_STARTED: + return "Profiler already started"; + case CUDA_ERROR_PROFILER_ALREADY_STOPPED: + return "Profiler already stopped"; + case CUDA_ERROR_NO_DEVICE: + return "No CUDA-capable device available"; + case CUDA_ERROR_INVALID_DEVICE: + return "Invalid device"; + case CUDA_ERROR_INVALID_IMAGE: + return "Invalid kernel image"; + case CUDA_ERROR_INVALID_CONTEXT: + return "Invalid context"; + case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: + return "Context already current"; + case CUDA_ERROR_MAP_FAILED: + return "Map failed"; + case CUDA_ERROR_UNMAP_FAILED: + return "Unmap failed"; + case CUDA_ERROR_ARRAY_IS_MAPPED: + return "Array is mapped"; + case CUDA_ERROR_ALREADY_MAPPED: + return "Already mapped"; + case CUDA_ERROR_NO_BINARY_FOR_GPU: + return "No binary for GPU"; + case CUDA_ERROR_ALREADY_ACQUIRED: + return "Already acquired"; + case CUDA_ERROR_NOT_MAPPED: + return "Not mapped"; + case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: + return "Not mapped as array"; + case CUDA_ERROR_NOT_MAPPED_AS_POINTER: + return "Not mapped as pointer"; + case CUDA_ERROR_ECC_UNCORRECTABLE: + return "Uncorrectable ECC error"; + case CUDA_ERROR_UNSUPPORTED_LIMIT: + return "Unsupported CUlimit"; + case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: + return "Context already in use"; + case CUDA_ERROR_INVALID_SOURCE: + return "Invalid source"; + case CUDA_ERROR_FILE_NOT_FOUND: + return "File not found"; + case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: + return "Shared object symbol not found"; + case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: + return "Shared object initialization failed"; + case CUDA_ERROR_OPERATING_SYSTEM: + return "Operating System call failed"; + case CUDA_ERROR_INVALID_HANDLE: + return "Invalid handle"; + case CUDA_ERROR_NOT_FOUND: + return "Not found"; + case CUDA_ERROR_NOT_READY: + return "CUDA not ready"; + case CUDA_ERROR_LAUNCH_FAILED: + return "Launch failed"; + case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: + return "Launch exceeded resources"; + case CUDA_ERROR_LAUNCH_TIMEOUT: + return "Launch exceeded timeout"; + case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: + return "Launch with incompatible texturing"; + case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: + return "Peer access already enabled"; + case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: + return "Peer access not enabled"; + case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: + return "Primary context active"; + case CUDA_ERROR_CONTEXT_IS_DESTROYED: + return "Context is destroyed"; + case CUDA_ERROR_ASSERT: + return "Device assert failed"; + case CUDA_ERROR_TOO_MANY_PEERS: + return "Too many peers"; + case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: + return "Host memory already registered"; + case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: + return "Host memory not registered"; + case CUDA_ERROR_UNKNOWN: + return "Unknown error"; + default: + return "Unknown error code"; } } bioem_cuda::bioem_cuda() { - deviceInitialized = 0; - GPUAlgo = getenv("GPUALGO") == NULL ? 2 : atoi(getenv("GPUALGO")); - GPUAsync = getenv("GPUASYNC") == NULL ? 1 : atoi(getenv("GPUASYNC")); - GPUWorkload = getenv("GPUWORKLOAD") == NULL ? 100 : atoi(getenv("GPUWORKLOAD")); - if (GPUWorkload == -1) GPUWorkload = 100; - GPUDualStream = getenv("GPUDUALSTREAM") == NULL ? 1 : atoi(getenv("GPUDUALSTREAM")); + deviceInitialized = 0; + GPUAsync = getenv("GPUASYNC") == NULL ? 1 : atoi(getenv("GPUASYNC")); + GPUWorkload = + getenv("GPUWORKLOAD") == NULL ? 100 : atoi(getenv("GPUWORKLOAD")); + if (GPUWorkload == -1) + GPUWorkload = 100; + GPUDualStream = + getenv("GPUDUALSTREAM") == NULL ? 1 : atoi(getenv("GPUDUALSTREAM")); } -bioem_cuda::~bioem_cuda() -{ - deviceExit(); -} +bioem_cuda::~bioem_cuda() { deviceExit(); } -__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, - const myfloat_t sumsquareC, const myfloat_t* pMap, bioem_Probability pProb, - const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef) +__global__ void multComplexMap(const mycomplex_t *convmap, + const mycomplex_t *refmap, mycuComplex_t *out, + const int MapSize, const int maxParallelConv, + const int NumberRefMaps, const int Offset) { - const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; - if (iRefMap < maxRef) - { - compareRefMap<0>(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, pMap, pProb, param, RefMap, cent_x, cent_y); - } + int myConv = myBlockIdxX / NumberRefMaps; + int myRef = myBlockIdxX - myConv * NumberRefMaps + Offset; + const mycuComplex_t *myin = (mycuComplex_t *) &refmap[myRef * MapSize]; + const mycuComplex_t *myconv = (mycuComplex_t *) &convmap[myConv * MapSize]; + mycuComplex_t *myout = &out[myBlockIdxX * MapSize]; + for (int i = myThreadIdxX; i < MapSize; i += myBlockDimX) + { + mycuComplex_t val; + const mycuComplex_t conv = myconv[i]; + const mycuComplex_t in = myin[i]; + + val.x = conv.x * in.x + conv.y * in.y; + val.y = conv.y * in.x - conv.x * in.y; + myout[i] = val; + } } -__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, const myfloat_t sumsquareC, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef) +__global__ void +cuDoRefMapsFFT(const int iOrient, const int iConv, const myfloat_t *lCC, + const myparam5_t *comp_params, bioem_Probability pProb, + const bioem_param_device param, const bioem_RefMap RefMap, + const int maxRef, const int Offset) { - const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; - if (iRefMap < maxRef) - { - compareRefMapShifted<1>(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, pMap, pProb, param, RefMap); - } + if (myBlockIdxX * myBlockDimX + myThreadIdxX >= maxRef) + return; + const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX + Offset; + const myfloat_t *mylCC = &lCC[(myBlockIdxX * myBlockDimX + myThreadIdxX) * + param.NumberPixels * param.NumberPixels]; + doRefMapFFT(iRefMap, iOrient, iConv, comp_params->amp, comp_params->pha, + comp_params->env, comp_params->sumC, comp_params->sumsquareC, + mylCC, pProb, param, RefMap); } -__global__ void cudaZeroMem(void* ptr, size_t size) +__global__ void +doRefMap_GPU_Parallel(const int iRefMap, const int iOrient, const int iConv, + const int maxParallelConv, const myfloat_t *lCC, + const myparam5_t *comp_params, myblockGPU_t *comp_block, + bioem_Probability pProb, const bioem_param_device param, + const bioem_RefMap RefMap, const int maxRef, + const int dispC) { - int* myptr = (int*) ptr; - int mysize = size / sizeof(int); - int myid = myBlockDimX * myBlockIdxX + myThreadIdxX; - int mygrid = myBlockDimX * myGridDimX; - for (int i = myid; i < mysize; i += mygrid) myptr[i] = 0; -} + int myGlobalId = myBlockIdxX * myBlockDimX + myThreadIdxX; + if (myGlobalId >= maxParallelConv * param.NtotDisp) + return; + int myConv = myGlobalId / param.NtotDisp; + myGlobalId -= myConv * param.NtotDisp; + int myX = myGlobalId / param.NxDisp; + myGlobalId -= myX * param.NxDisp; + int myY = myGlobalId; + myGlobalId = myBlockIdxX * myBlockDimX + myThreadIdxX; + + int cent_x = (myX * param.GridSpaceCenter + dispC) % param.NumberPixels; + int cent_y = (myY * param.GridSpaceCenter + dispC) % param.NumberPixels; + int address = (myConv * maxRef * param.NumberPixels * param.NumberPixels) + + (cent_x * param.NumberPixels + cent_y); + myfloat_t value = (myfloat_t) lCC[address] / + (myfloat_t)(param.NumberPixels * param.NumberPixels); + + __shared__ myprob_t bestLogpro[CUDA_THREAD_MAX]; + __shared__ int bestId[CUDA_THREAD_MAX]; + __shared__ myprob_t sumExp[CUDA_THREAD_MAX]; + __shared__ myprob_t sumAngles[CUDA_THREAD_MAX]; + + int nTotalThreads = + ((maxParallelConv * param.NtotDisp) < ((myBlockIdxX + 1) * myBlockDimX)) ? + ((maxParallelConv * param.NtotDisp) - (myBlockIdxX * myBlockDimX)) : + myBlockDimX; + int halfPoint = (nTotalThreads + 1) >> 1; // divide by two + + bioem_Probability_map &pProbMap = pProb.getProbMap(iRefMap); + + bestLogpro[myThreadIdxX] = + calc_logpro(param, comp_params[myConv].amp, comp_params[myConv].pha, + comp_params[myConv].env, comp_params[myConv].sumC, + comp_params[myConv].sumsquareC, value, + RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); +#ifdef DEBUG_PROB + printf("\t\t\tProb: iRefMap %d, iOrient %d, iConv %d, " + "cent_x %d, cent_y %d, address %d, value %f, logpro %f\n", + iRefMap, iOrient, iConv, cent_x, cent_y, address, value, + bestLogpro[myThreadIdxX]); +#endif + bestId[myThreadIdxX] = myGlobalId; + sumExp[myThreadIdxX] = exp(bestLogpro[myThreadIdxX] - pProbMap.Constoadd); + if (param.writeAngles) + { + bioem_Probability_angle &pProbAngle = pProb.getProbAngle(iRefMap, iOrient); + sumAngles[myThreadIdxX] = + exp(bestLogpro[myThreadIdxX] - pProbAngle.ConstAngle); + } + __syncthreads(); -__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, const myfloat_t sumsquareC, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef) -{ - const size_t myid = (myBlockIdxX + blockoffset) * myBlockDimX + myThreadIdxX; - const int iRefMap = myid >> (nShiftBits << 1); - const int myRef = myThreadIdxX >> (nShiftBits << 1); - const int myShiftIdx = (myid >> nShiftBits) & (nShifts - 1); - const int myShiftIdy = myid & (nShifts - 1); - const int myShift = myid & (nShifts * nShifts - 1); - const int cent_x = myShiftIdx * param.GridSpaceCenter - param.maxDisplaceCenter; - const int cent_y = myShiftIdy * param.GridSpaceCenter - param.maxDisplaceCenter; - - const bool threadActive = myShiftIdx < nShifts && myShiftIdy < nShifts && iRefMap < maxRef; - - compareRefMap<2>(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, pMap, pProb, param, RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef, threadActive); + // Total number of active threads + while (nTotalThreads > 1) + { + if (myThreadIdxX < (nTotalThreads >> 1)) + { + // Get the shared value stored by another thread + myprob_t temp = bestLogpro[myThreadIdxX + halfPoint]; + if (temp > bestLogpro[myThreadIdxX]) + { + bestLogpro[myThreadIdxX] = temp; + bestId[myThreadIdxX] = bestId[myThreadIdxX + halfPoint]; + } + sumExp[myThreadIdxX] += sumExp[myThreadIdxX + halfPoint]; + if (param.writeAngles) + { + sumAngles[myThreadIdxX] += sumAngles[myThreadIdxX + halfPoint]; + } + } + __syncthreads(); + nTotalThreads = halfPoint; // divide by two. + halfPoint = (nTotalThreads + 1) >> 1; // divide by two + // only the first half of the threads will be active. + } + if (myThreadIdxX == 0) + { + comp_block[myBlockIdxX].logpro = bestLogpro[0]; + comp_block[myBlockIdxX].id = bestId[0]; + comp_block[myBlockIdxX].sumExp = sumExp[0]; + if (param.writeAngles) + { + comp_block[myBlockIdxX].sumAngles = sumAngles[0]; + } +#ifdef DEBUG_PROB + printf("\t\t\tProb block: iRefMap %d, iOrient %d, iConv %d, " + "bestlogpro %f, bestId %d, sumExp %f\n", + iRefMap, iOrient, iConv, bestLogpro[0], bestId[0], sumExp[0]); +#endif + } } -__global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset) +__global__ void +doRefMap_GPU_Reduce(const int iRefMap, const int iOrient, const int iConv, + const int maxParallelConv, const myfloat_t *lCC, + const myparam5_t *comp_params, + const myblockGPU_t *comp_block, bioem_Probability pProb, + const bioem_param_device param, const bioem_RefMap RefMap, + const int maxRef, const int dispC) { - if (myBlockIdxX >= NumberMaps) return; - const mycuComplex_t* myin = (mycuComplex_t*) &refmap[(myBlockIdxX + Offset) * MapSize]; - const mycuComplex_t* myconv = (mycuComplex_t*) convmap; - mycuComplex_t* myout = &out[myBlockIdxX * MapSize]; - for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX) - { - mycuComplex_t val; - const mycuComplex_t conv = myconv[i]; - const mycuComplex_t in = myin[i]; - - val.x = conv.x * in.x + conv.y * in.y; - val.y = conv.y * in.x - conv.x * in.y; - myout[i] = val; - } + + __shared__ myprob_t bestLogpro[CUDA_THREAD_MAX]; + __shared__ int bestId[CUDA_THREAD_MAX]; + __shared__ myprob_t sumExp[CUDA_THREAD_MAX]; + __shared__ myprob_t sumAngles[CUDA_THREAD_MAX]; + + // if it is the last block + int nTotalThreads = myBlockDimX; + int halfPoint = (nTotalThreads + 1) >> 1; // divide by two + + bioem_Probability_map &pProbMap = pProb.getProbMap(iRefMap); + + bestLogpro[myThreadIdxX] = comp_block[myThreadIdxX].logpro; + bestId[myThreadIdxX] = comp_block[myThreadIdxX].id; + sumExp[myThreadIdxX] = comp_block[myThreadIdxX].sumExp; + if (param.writeAngles) + { + sumAngles[myThreadIdxX] = comp_block[myThreadIdxX].sumAngles; + } + __syncthreads(); + while (nTotalThreads > 1) + { + if (myThreadIdxX < (nTotalThreads >> 1)) + { + // Get the shared value stored by another thread + myfloat_t temp = bestLogpro[myThreadIdxX + halfPoint]; + if (temp > bestLogpro[myThreadIdxX]) + { + bestLogpro[myThreadIdxX] = temp; + bestId[myThreadIdxX] = bestId[myThreadIdxX + halfPoint]; + } + sumExp[myThreadIdxX] += sumExp[myThreadIdxX + halfPoint]; + if (param.writeAngles) + { + sumAngles[myThreadIdxX] += sumAngles[myThreadIdxX + halfPoint]; + } + } + __syncthreads(); + nTotalThreads = halfPoint; // divide by two. + halfPoint = (nTotalThreads + 1) >> 1; // divide by two + // only the first half of the threads will be active. + } + + if (myThreadIdxX == 0) + { + pProbMap.Total += sumExp[0]; + if (pProbMap.Constoadd < bestLogpro[0]) + { + pProbMap.Total *= exp(-bestLogpro[0] + pProbMap.Constoadd); + pProbMap.Constoadd = bestLogpro[0]; + + // ********** Getting parameters that maximize the probability *********** + int myGlobalId = bestId[0]; + int myConv = myGlobalId / param.NtotDisp; + myGlobalId -= myConv * param.NtotDisp; + int myX = myGlobalId / param.NxDisp; + myGlobalId -= myX * param.NxDisp; + int myY = myGlobalId; + + int cent_x = (myX * param.GridSpaceCenter + dispC) % param.NumberPixels; + int cent_y = (myY * param.GridSpaceCenter + dispC) % param.NumberPixels; + int address = + (myConv * maxRef * param.NumberPixels * param.NumberPixels) + + (cent_x * param.NumberPixels + cent_y); + myfloat_t value = (myfloat_t) lCC[address] / + (myfloat_t)(param.NumberPixels * param.NumberPixels); + + pProbMap.max.max_prob_cent_x = + -((myX * param.GridSpaceCenter + dispC) - param.NumberPixels); + pProbMap.max.max_prob_cent_y = + -((myY * param.GridSpaceCenter + dispC) - param.NumberPixels); + pProbMap.max.max_prob_orient = iOrient; + pProbMap.max.max_prob_conv = iConv + myConv; + pProbMap.max.max_prob_norm = + -(-comp_params[myConv].sumC * RefMap.sum_RefMap[iRefMap] + + param.Ntotpi * value) / + (comp_params[myConv].sumC * comp_params[myConv].sumC - + comp_params[myConv].sumsquareC * param.Ntotpi); + pProbMap.max.max_prob_mu = + -(-comp_params[myConv].sumC * value + + comp_params[myConv].sumsquareC * RefMap.sum_RefMap[iRefMap]) / + (comp_params[myConv].sumC * comp_params[myConv].sumC - + comp_params[myConv].sumsquareC * param.Ntotpi); + +#ifdef DEBUG_PROB + printf("\tProbabilities change: iRefMap %d, iOrient %d, iConv %d, " + "Total %f, Const %f, bestlogpro %f, sumExp %f, bestId %d\n", + iRefMap, iOrient, iConv + myConv, pProbMap.Total, + pProbMap.Constoadd, bestLogpro[0], sumExp[0], bestId[0]); + printf("\tParameters: myConv %d, myX %d, myY %d, cent_x %d, cent_y %d, " + "probX %d, probY %d\n", + myConv, myX, myY, cent_x, cent_y, pProbMap.max.max_prob_cent_x, + pProbMap.max.max_prob_cent_y); +#endif + } +#ifdef DEBUG_PROB + printf("\t\tProbabilities after Reduce: iRefMap %d, iOrient %d, iConv " + "%d, Total %f, Const %f, bestlogpro %f, sumExp %f, bestId %d\n", + iRefMap, iOrient, iConv, pProbMap.Total, pProbMap.Constoadd, + bestLogpro[0], sumExp[0], bestId[0]); +#endif + + if (param.writeAngles) + { + bioem_Probability_angle &pProbAngle = + pProb.getProbAngle(iRefMap, iOrient); + pProbAngle.forAngles += sumAngles[0]; + if (pProbAngle.ConstAngle < bestLogpro[0]) + { + pProbAngle.forAngles *= exp(-bestLogpro[0] + pProbAngle.ConstAngle); + pProbAngle.ConstAngle = bestLogpro[0]; + } + } + } } -__global__ void cuDoRefMapsFFT(const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int maxRef, const int Offset) +__global__ void +init_Constoadd(const int iRefMap, const int iOrient, const myfloat_t *lCC, + const myparam5_t *comp_params, bioem_Probability pProb, + const bioem_param_device param, const bioem_RefMap RefMap, + const int initialized_const) { - if (myBlockIdxX * myBlockDimX + myThreadIdxX >= maxRef) return; - const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX + Offset; - const myfloat_t* mylCC = &lCC[(myBlockIdxX * myBlockDimX + myThreadIdxX) * param.NumberPixels * param.NumberPixels]; - doRefMapFFT(iRefMap, iOrient, iConv, amp, pha, env, mylCC, sumC, sumsquareC, pProb, param, RefMap); + myfloat_t value = + (myfloat_t) lCC[0] / (myfloat_t)(param.NumberPixels * param.NumberPixels); + + myfloat_t logpro = + calc_logpro(param, comp_params->amp, comp_params->pha, comp_params->env, + comp_params->sumC, comp_params->sumsquareC, value, + RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); + + bioem_Probability_map &pProbMap = pProb.getProbMap(iRefMap); + + // Needed only once, in the first projection + if (!initialized_const) + { + pProbMap.Constoadd = logpro; + } + // Needed for every projection + if (param.writeAngles) + { + bioem_Probability_angle &pProbAngle = pProb.getProbAngle(iRefMap, iOrient); + pProbAngle.ConstAngle = logpro; + } + +#ifdef DEBUG_GPU + printf("\tInitialized pProbMap.Constoadd of refmap %d to %f\n", iRefMap, + pProbMap.Constoadd); +#endif } -template <class T> static inline T divup(T num, T divider) {return((num + divider - 1) / divider);} -static inline bool IsPowerOf2(int x) {return ((x > 0) && ((x & (x - 1)) == 0));} -#if defined(_WIN32) -static inline int ilog2 (int value) +template <class T> static inline T divup(T num, T divider) { - DWORD index; - _BitScanReverse (&index, value); - return(value); + return ((num + divider - 1) / divider); } -#else -static inline int ilog2(int value) {return 31 - __builtin_clz(value);} -#endif -int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) +int bioem_cuda::compareRefMaps(int iPipeline, int iOrient, int iConv, + int maxParallelConv, mycomplex_t *conv_mapsFFT, + myparam5_t *comp_params, const int startMap) { - if (startMap) - { - cout << "Error startMap not implemented for GPU Code\n"; - exit(1); - } -#ifdef DEBUG_GPU - float time; - cudaEvent_t start, stop; - checkCudaErrors(cudaEventCreate(&start)); - checkCudaErrors(cudaEventCreate(&stop)); - checkCudaErrors(cudaEventRecord(start, 0)); -#endif - if (GPUAsync) - { - checkCudaErrors(cudaEventSynchronize(cudaEvent[iConv & 1])); - } -#ifdef DEBUG_GPU - checkCudaErrors(cudaEventRecord(stop, 0)); - checkCudaErrors(cudaEventSynchronize(stop)); - checkCudaErrors(cudaEventElapsedTime(&time, start, stop)); - printf("\t\t\tGPU: time to synch projections %1.6f sec\n", time/1000); - checkCudaErrors(cudaEventRecord(start, 0)); -#endif - if (FFTAlgo) - { - memcpy(&pConvMapFFT_Host[(iConv & 1) * param.FFTMapSize], localmultFFT, param.FFTMapSize * sizeof(mycomplex_t)); - checkCudaErrors(cudaMemcpyAsync(&pConvMapFFT[(iConv & 1) * param.FFTMapSize], &pConvMapFFT_Host[(iConv & 1) * param.FFTMapSize], param.FFTMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice, cudaStream[GPUAsync ? 2 : 0])); -#ifdef DEBUG_GPU - checkCudaErrors(cudaEventRecord(stop, 0)); - checkCudaErrors(cudaEventSynchronize(stop)); - checkCudaErrors(cudaEventElapsedTime(&time, start, stop)); - printf("\t\t\tGPU: time for memcpy %1.6f sec\n", time/1000); - checkCudaErrors(cudaEventRecord(start, 0)); -#endif - if (GPUAsync) - { - checkCudaErrors(cudaEventRecord(cudaEvent[2], cudaStream[2])); - checkCudaErrors(cudaStreamWaitEvent(cudaStream[0], cudaEvent[2], 0)); - } - if (GPUDualStream) - { - checkCudaErrors(cudaEventRecord(cudaFFTEvent[0], cudaStream[0])); - checkCudaErrors(cudaStreamWaitEvent(cudaStream[1], cudaFFTEvent[0], 0)); - } - for (int i = 0, j = 0; i < maxRef; i += CUDA_FFTS_AT_ONCE, j++) - { - if (!GPUDualStream) j = 0; - const int num = min(CUDA_FFTS_AT_ONCE, maxRef - i); - multComplexMap<<<num, CUDA_THREAD_COUNT, 0, cudaStream[j & 1]>>>(&pConvMapFFT[(iConv & 1) * param.FFTMapSize], pRefMapsFFT, pFFTtmp2[j & 1], param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D, param.FFTMapSize, num, i); - cufftResult err = mycufftExecC2R(i + CUDA_FFTS_AT_ONCE > maxRef ? plan[1][j & 1] : plan[0][j & 1], pFFTtmp2[j & 1], pFFTtmp[j & 1]); - if (err != CUFFT_SUCCESS) - { - cout << "Error running CUFFT " << cufftGetErrorStrung(err) << "\n"; - exit(1); - } - cuDoRefMapsFFT<<<divup(num, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream[j & 1]>>>(iOrient, iConv, amp, pha, env, pFFTtmp[j & 1], sumC, sumsquareC, pProb_device, param.param_device, *gpumap, num, i); - } - checkCudaErrors(cudaPeekAtLastError()); - if (GPUDualStream) - { - checkCudaErrors(cudaEventRecord(cudaFFTEvent[1], cudaStream[1])); - checkCudaErrors(cudaStreamWaitEvent(cudaStream[0], cudaFFTEvent[1], 0)); - } - } - else - { - checkCudaErrors(cudaMemcpyAsync(pConvMap_device[iConv & 1], conv_map, sizeof(myfloat_t) * RefMap.refMapSize, cudaMemcpyHostToDevice, cudaStream[0])); -#ifdef DEBUG_GPU - checkCudaErrors(cudaEventRecord(stop, 0)); - checkCudaErrors(cudaEventSynchronize(stop)); - checkCudaErrors(cudaEventElapsedTime(&time, start, stop)); - printf("\t\t\tGPU: time for memcpy %1.6f sec\n", time/1000); - checkCudaErrors(cudaEventRecord(start, 0) ); -#endif - if (GPUAlgo == 2) //Loop over shifts - { - const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; - if (!IsPowerOf2(nShifts)) - { - cout << "Invalid number of displacements, no power of two\n"; - exit(1); - } - if (CUDA_THREAD_COUNT % (nShifts * nShifts)) - { - cout << "CUDA Thread count (" << CUDA_THREAD_COUNT << ") is no multiple of number of shifts (" << (nShifts * nShifts) << ")\n"; - exit(1); - } - if (nShifts > CUDA_MAX_SHIFT_REDUCE) - { - cout << "Too many displacements for CUDA reduction\n"; - exit(1); - } - const int nShiftBits = ilog2(nShifts); - size_t totalBlocks = divup((size_t) maxRef * (size_t) nShifts * (size_t) nShifts, (size_t) CUDA_THREAD_COUNT); - size_t nBlocks = CUDA_BLOCK_COUNT; - for (size_t i = 0; i < totalBlocks; i += nBlocks) - { - compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream[0] >>> (iOrient, iConv, amp, pha, env, sumC, sumsquareC, pConvMap_device[iConv & 1], pProb_device, param.param_device, *gpumap, i, nShifts, nShiftBits, maxRef); - } - } - else if (GPUAlgo == 1) //Split shifts in multiple kernels - { - for (int cent_x = -param.param_device.maxDisplaceCenter; cent_x <= param.param_device.maxDisplaceCenter; cent_x = cent_x + param.param_device.GridSpaceCenter) - { - for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter) - { - compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream[0]>>> (iOrient, iConv, amp, pha, env, sumC, sumsquareC, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef); - } - } - } - else if (GPUAlgo == 0) //All shifts in one kernel - { - compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream[0]>>> (iOrient, iConv, amp, pha, env, sumC, sumsquareC, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef); - } - else - { - cout << "Invalid GPU Algorithm selected\n"; - exit(1); - } - } -#ifdef DEBUG_GPU - checkCudaErrors(cudaEventRecord(stop, 0)); - checkCudaErrors(cudaEventSynchronize(stop)); - checkCudaErrors(cudaEventElapsedTime(&time, start, stop)); - printf("\t\t\tGPU: time to run CUDA %1.6f sec\n", time/1000); - checkCudaErrors(cudaEventRecord(start, 0)); -#endif - if (GPUWorkload < 100) - { - bioem::compareRefMaps(iOrient, iConv, amp, pha, env, conv_map, localmultFFT, sumC, sumsquareC, maxRef); - } -#ifdef DEBUG_GPU - checkCudaErrors(cudaEventRecord(stop, 0)); - checkCudaErrors(cudaEventSynchronize(stop)); - checkCudaErrors(cudaEventElapsedTime(&time, start, stop)); - printf("\t\t\tGPU: time to run OMP %1.6f sec\n", time/1000); -#endif - if (GPUAsync) - { - checkCudaErrors(cudaEventRecord(cudaEvent[iConv & 1], cudaStream[0])); - } - else - { - checkCudaErrors(cudaStreamSynchronize(cudaStream[0])); - } - return(0); + if (startMap) + { + cout << "Error startMap not implemented for GPU Code\n"; + exit(1); + } + printCudaDebugStart(); + if (GPUAsync) + { + checkCudaErrors(cudaEventSynchronize(cudaEvent[iPipeline & 1])); + printCudaDebug("time to synch projections"); + } + + int k = (iPipeline & 1) * param.nTotParallelConv; + memcpy(&pConvMapFFT_Host[k * param.FFTMapSize], + conv_mapsFFT[k * param.FFTMapSize], + param.FFTMapSize * maxParallelConv * sizeof(mycomplex_t)); + printCudaDebug("time for memcpy"); + checkCudaErrors( + cudaMemcpyAsync(&pConvMapFFT[k * param.FFTMapSize], + &pConvMapFFT_Host[k * param.FFTMapSize], + param.FFTMapSize * maxParallelConv * sizeof(mycomplex_t), + cudaMemcpyHostToDevice, cudaStream[GPUAsync ? 2 : 0])); + // If one wants just a single tranfer, without memcpy: + // checkCudaErrors(cudaMemcpyAsync(&pConvMapFFT[k * param.FFTMapSize], + // conv_mapsFFT[k * param.FFTMapSize], param.FFTMapSize * maxParallelConv * + // sizeof(mycomplex_t), cudaMemcpyHostToDevice, cudaStream[GPUAsync ? 2 : + // 0])); + checkCudaErrors(cudaMemcpyAsync(&pTmp_comp_params[k], &comp_params[k], + maxParallelConv * sizeof(myparam5_t), + cudaMemcpyHostToDevice, + cudaStream[GPUAsync ? 2 : 0])); + printCudaDebug("time for asyncmemcpy"); + if (GPUAsync) + { + checkCudaErrors(cudaEventRecord(cudaEvent[2], cudaStream[2])); + checkCudaErrors(cudaStreamWaitEvent(cudaStream[0], cudaEvent[2], 0)); + } + if (GPUDualStream) + { + checkCudaErrors(cudaEventRecord(cudaFFTEvent[0], cudaStream[0])); + checkCudaErrors(cudaStreamWaitEvent(cudaStream[1], cudaFFTEvent[0], 0)); + } + for (int offset = 0, stream = 0; offset < maxRef; + offset += param.nTotParallelMaps, stream++) + { + if (!GPUDualStream) + stream = 0; + const int nRef = min(param.nTotParallelMaps, maxRef - offset); + multComplexMap<<<maxParallelConv * nRef, CudaThreadCount, 0, + cudaStream[stream & 1]>>>( + &pConvMapFFT[k * param.FFTMapSize], pRefMapsFFT, pFFTtmp2[stream & 1], + param.FFTMapSize, maxParallelConv, nRef, offset); + printCudaDebug("time for multComplexMap kernel"); + cufftResult err = mycufftExecC2R(offset + param.nTotParallelMaps > maxRef ? + plan[1][stream & 1] : + plan[0][stream & 1], + pFFTtmp2[stream & 1], pFFTtmp[stream & 1]); + if (err != CUFFT_SUCCESS) + { + cout << "Error running CUFFT " << cufftGetErrorStrung(err) << "\n"; + exit(1); + } + printCudaDebug("time for mycufftExecC2R kernel"); + if (BioEMAlgo == 1) + { + for (int conv = 0; conv < maxParallelConv; conv++) + { + cuDoRefMapsFFT<<<divup(nRef, CudaThreadCount), CudaThreadCount, 0, + cudaStream[stream & 1]>>>( + iOrient, iConv + conv, + pFFTtmp[stream & 1] + + conv * nRef * param.param_device.NumberPixels * + param.param_device.NumberPixels, + &pTmp_comp_params[k + conv], pProb_device, param.param_device, + *gpumap, nRef, offset); + printCudaDebug("time for cuDoRefMapsFFT kernel"); + } + } + else + { + for (int refmap = offset; refmap < nRef + offset; refmap++) + { + // First iteration needs to initialize Constoadd with the first valid + // value to avoid overflow due to high sumExp values + if ((initialized_const[refmap] == false) || + (param.param_device.writeAngles && iConv == 0)) + { + init_Constoadd<<<1, 1, 0, cudaStream[stream & 1]>>>( + refmap, iOrient, + pFFTtmp[stream & 1] + + (refmap - offset) * param.param_device.NumberPixels * + param.param_device.NumberPixels, + &pTmp_comp_params[k], pProb_device, param.param_device, *gpumap, + (int) initialized_const[refmap]); + initialized_const[refmap] = true; + printCudaDebug("time for init_Constoadd kernel"); + } + + doRefMap_GPU_Parallel<<<divup(maxParallelConv * + param.param_device.NtotDisp, + CudaThreadCount), + CudaThreadCount, 0, cudaStream[stream & 1]>>>( + refmap, iOrient, iConv, maxParallelConv, + pFFTtmp[stream & 1] + + (refmap - offset) * param.param_device.NumberPixels * + param.param_device.NumberPixels, + &pTmp_comp_params[k], &pTmp_comp_blocks[refmap * Ncomp_blocks], + pProb_device, param.param_device, *gpumap, nRef, + param.param_device.NumberPixels - + param.param_device.maxDisplaceCenter); + printCudaDebug("time for doRefMaps_GPU_Parallel kernel"); + + doRefMap_GPU_Reduce<<<1, divup(maxParallelConv * + param.param_device.NtotDisp, + CudaThreadCount), + 0, cudaStream[stream & 1]>>>( + refmap, iOrient, iConv, maxParallelConv, + pFFTtmp[stream & 1] + + (refmap - offset) * param.param_device.NumberPixels * + param.param_device.NumberPixels, + &pTmp_comp_params[k], &pTmp_comp_blocks[refmap * Ncomp_blocks], + pProb_device, param.param_device, *gpumap, nRef, + param.param_device.NumberPixels - + param.param_device.maxDisplaceCenter); + printCudaDebug("time for doRefMaps_GPU_Reduce kernel"); + } + } + } + checkCudaErrors(cudaPeekAtLastError()); + + if (GPUDualStream) + { + checkCudaErrors(cudaEventRecord(cudaFFTEvent[1], cudaStream[1])); + checkCudaErrors(cudaStreamWaitEvent(cudaStream[0], cudaFFTEvent[1], 0)); + } + + if ((BioEMAlgo == 1) && (GPUWorkload < 100)) + { + bioem::compareRefMaps(iPipeline, iOrient, iConv, maxParallelConv, + conv_mapsFFT, comp_params, maxRef); + printCudaDebug("time to run OMP"); + } + if (GPUAsync) + { + checkCudaErrors(cudaEventRecord(cudaEvent[iPipeline & 1], cudaStream[0])); + } + else + { + checkCudaErrors(cudaStreamSynchronize(cudaStream[0])); + printCudaDebug("time to synch at the end"); + } + return (0); } int bioem_cuda::selectCudaDevice() { - int count; - int bestDevice = 0; - cudaDeviceProp deviceProp; - - /* Initializing CUDA driver API */ - cuErrorCheck(cuInit(0)); - - /* Get number of available CUDA devices */ - checkCudaErrors(cudaGetDeviceCount(&count)); - if (count == 0) - { - printf("No CUDA device detected\n"); - return(1); - } - - /* Find the best GPU */ - long long int bestDeviceSpeed = -1, deviceSpeed = -1; - for (int i = 0; i < count; i++) - { - cudaGetDeviceProperties(&deviceProp, i); - deviceSpeed = (long long int) deviceProp.multiProcessorCount * (long long int) deviceProp.clockRate * (long long int) deviceProp.warpSize; - if (deviceSpeed > bestDeviceSpeed) - { - bestDevice = i; - bestDeviceSpeed = deviceSpeed; - } - } - - /* Get user-specified GPU choice */ - if (getenv("GPUDEVICE")) - { - int device = atoi(getenv("GPUDEVICE")); - if (device > count) - { - printf("Invalid CUDA device specified, max device number is %d\n", count); - exit(1); - } + int count; + int bestDevice = 0; + cudaDeviceProp deviceProp; + + /* Initializing CUDA driver API */ + cuErrorCheck(cuInit(0)); + + /* Get number of available CUDA devices */ + checkCudaErrors(cudaGetDeviceCount(&count)); + if (count == 0) + { + printf("No CUDA device detected\n"); + return (1); + } + + /* Find the best GPU */ + long long int bestDeviceSpeed = -1, deviceSpeed = -1; + for (int i = 0; i < count; i++) + { + cudaGetDeviceProperties(&deviceProp, i); + deviceSpeed = (long long int) deviceProp.multiProcessorCount * + (long long int) deviceProp.clockRate * + (long long int) deviceProp.warpSize; + if (deviceSpeed > bestDeviceSpeed) + { + bestDevice = i; + bestDeviceSpeed = deviceSpeed; + } + } + + /* Get user-specified GPU choice */ + if (getenv("GPUDEVICE")) + { + int device = atoi(getenv("GPUDEVICE")); + if (device > count) + { + printf("Invalid CUDA device specified, max device number is %d\n", count); + exit(1); + } #ifdef WITH_MPI - if (device == -1) - { - device = mpi_rank % count; - } + if (device == -1) + { + device = mpi_rank % count; + } #endif - if (device < 0) - { - printf("Negative CUDA device specified: %d, invalid!\n", device); - exit(1); - } - bestDevice = device; - } - - /* Set CUDA processes to appropriate devices */ - cudaGetDeviceProperties(&deviceProp, bestDevice); - if (deviceProp.computeMode == 0) - { - checkCudaErrors(cudaSetDevice(bestDevice)); - } - else - { - if (DebugOutput >= 1) - { - printf("CUDA device %d is not set in DEFAULT mode, make sure that CUDA processes are pinned as planned!\n", bestDevice); - printf("Pinning process %d to CUDA device %d\n", mpi_rank, bestDevice); - } - checkCudaErrors(cudaSetDevice(bestDevice)); - /* This synchronization is needed in order to detect bogus silent errors from cudaSetDevice call */ - checkCudaErrors(cudaDeviceSynchronize()); - } - - /* Debugging information about CUDA devices used by the current process */ - if (DebugOutput >= 3) - { - printf("Using CUDA Device %s with Properties:\n", deviceProp.name); - printf("totalGlobalMem = %lld\n", (unsigned long long int) deviceProp.totalGlobalMem); - printf("sharedMemPerBlock = %lld\n", (unsigned long long int) deviceProp.sharedMemPerBlock); - printf("regsPerBlock = %d\n", deviceProp.regsPerBlock); - printf("warpSize = %d\n", deviceProp.warpSize); - printf("memPitch = %lld\n", (unsigned long long int) deviceProp.memPitch); - printf("maxThreadsPerBlock = %d\n", deviceProp.maxThreadsPerBlock); - printf("maxThreadsDim = %d %d %d\n", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]); - printf("maxGridSize = %d %d %d\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); - printf("totalConstMem = %lld\n", (unsigned long long int) deviceProp.totalConstMem); - printf("major = %d\n", deviceProp.major); - printf("minor = %d\n", deviceProp.minor); - printf("clockRate = %d\n", deviceProp.clockRate); - printf("memoryClockRate = %d\n", deviceProp.memoryClockRate); - printf("multiProcessorCount = %d\n", deviceProp.multiProcessorCount); - printf("textureAlignment = %lld\n", (unsigned long long int) deviceProp.textureAlignment); - printf("computeMode = %d\n", deviceProp.computeMode); + if (device < 0) + { + printf("Negative CUDA device specified: %d, invalid!\n", device); + exit(1); + } + bestDevice = device; + } + + /* Set CUDA processes to appropriate devices */ + cudaGetDeviceProperties(&deviceProp, bestDevice); + if (deviceProp.computeMode == 0) + { + checkCudaErrors(cudaSetDevice(bestDevice)); + } + else + { + if (DebugOutput >= 1) + { + printf("CUDA device %d is not set in DEFAULT mode, make sure that CUDA " + "processes are pinned as planned!\n", + bestDevice); + printf("Pinning process %d to CUDA device %d\n", mpi_rank, bestDevice); + } + checkCudaErrors(cudaSetDevice(bestDevice)); + /* This synchronization is needed in order to detect bogus silent errors + * from cudaSetDevice call */ + checkCudaErrors(cudaDeviceSynchronize()); + } + + /* Debugging information about CUDA devices used by the current process */ + if (DebugOutput >= 2) + { + printf("Using CUDA Device %s with Properties:\n", deviceProp.name); + printf("totalGlobalMem = %lld\n", + (unsigned long long int) deviceProp.totalGlobalMem); + printf("sharedMemPerBlock = %lld\n", + (unsigned long long int) deviceProp.sharedMemPerBlock); + printf("regsPerBlock = %d\n", deviceProp.regsPerBlock); + printf("warpSize = %d\n", deviceProp.warpSize); + printf("memPitch = %lld\n", (unsigned long long int) deviceProp.memPitch); + printf("maxThreadsPerBlock = %d\n", deviceProp.maxThreadsPerBlock); + printf("maxThreadsDim = %d %d %d\n", deviceProp.maxThreadsDim[0], + deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]); + printf("maxGridSize = %d %d %d\n", deviceProp.maxGridSize[0], + deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); + printf("totalConstMem = %lld\n", + (unsigned long long int) deviceProp.totalConstMem); + printf("major = %d\n", deviceProp.major); + printf("minor = %d\n", deviceProp.minor); + printf("clockRate = %d\n", deviceProp.clockRate); + printf("memoryClockRate = %d\n", deviceProp.memoryClockRate); + printf("multiProcessorCount = %d\n", deviceProp.multiProcessorCount); + printf("textureAlignment = %lld\n", + (unsigned long long int) deviceProp.textureAlignment); + printf("computeMode = %d\n", deviceProp.computeMode); #if CUDA_VERSION > 3010 - size_t free, total; + size_t free, total; #else - unsigned int free, total; + unsigned int free, total; #endif - if (deviceProp.computeMode == 0) - { - CUdevice tmpDevice; - cuErrorCheck(cuDeviceGet(&tmpDevice, bestDevice)); - CUcontext tmpContext; - cuErrorCheck(cuCtxCreate(&tmpContext, 0, tmpDevice)); - cuErrorCheck(cuMemGetInfo(&free, &total)); - cuErrorCheck(cuCtxDestroy(tmpContext)); - } - else - { - cuErrorCheck(cuMemGetInfo(&free, &total)); - } - printf("free memory = %lld; total memory = %lld\n", free, total); - } - - if (DebugOutput >= 1) - { - printf("BioEM for CUDA initialized (MPI Rank %d), %d GPUs found, using GPU %d\n", mpi_rank, count, bestDevice); - } - - return(0); + if (deviceProp.computeMode == 0) + { + CUdevice tmpDevice; + cuErrorCheck(cuDeviceGet(&tmpDevice, bestDevice)); + CUcontext tmpContext; + cuErrorCheck(cuCtxCreate(&tmpContext, 0, tmpDevice)); + cuErrorCheck(cuMemGetInfo(&free, &total)); + cuErrorCheck(cuCtxDestroy(tmpContext)); + } + else + { + cuErrorCheck(cuMemGetInfo(&free, &total)); + } + printf("free memory = %lld; total memory = %lld\n", free, total); + } + + if (DebugOutput >= 1) + { + printf("BioEM for CUDA initialized (MPI Rank %d), %d GPUs found, using GPU " + "%d\n", + mpi_rank, count, bestDevice); + } + + return (0); } int bioem_cuda::deviceInit() { - deviceExit(); - - selectCudaDevice(); - - if (FFTAlgo) GPUAlgo = 2; - - gpumap = new bioem_RefMap; - memcpy(gpumap, &RefMap, sizeof(bioem_RefMap)); - if (FFTAlgo == 0) - { - checkCudaErrors(cudaMalloc(&maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize)); - - if (GPUAlgo == 0 || GPUAlgo == 1) - { - pRefMap_device_Mod = (bioem_RefMap_Mod*) gpumap; - bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod; - RefMapGPU->init(RefMap); - checkCudaErrors(cudaMemcpy(maps, RefMapGPU->maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice)); - delete RefMapGPU; - } - else - { - checkCudaErrors(cudaMemcpy(maps, RefMap.maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice)); - } - } - checkCudaErrors(cudaMalloc(&sum, sizeof(myfloat_t) * RefMap.ntotRefMap)); - checkCudaErrors(cudaMemcpy(sum, RefMap.sum_RefMap, sizeof(myfloat_t) * RefMap.ntotRefMap, cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMalloc(&sumsquare, sizeof(myfloat_t) * RefMap.ntotRefMap)); - checkCudaErrors(cudaMemcpy(sumsquare, RefMap.sumsquare_RefMap, sizeof(myfloat_t) * RefMap.ntotRefMap, cudaMemcpyHostToDevice)); - gpumap->maps = maps; - gpumap->sum_RefMap = sum; - gpumap->sumsquare_RefMap = sumsquare; - - checkCudaErrors(cudaMalloc(&pProb_memory, pProb_device.get_size(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, param.param_device.writeAngles, param.param_device.writeCC))); - - for (int i = 0; i < 2; i++) - { - checkCudaErrors(cudaStreamCreate(&cudaStream[i])); - checkCudaErrors(cudaEventCreate(&cudaEvent[i])); - checkCudaErrors(cudaEventCreate(&cudaFFTEvent[i])); - checkCudaErrors(cudaMalloc(&pConvMap_device[i], sizeof(myfloat_t) * RefMap.refMapSize)); - } - if (GPUAsync) - { - checkCudaErrors(cudaStreamCreate(&cudaStream[2])); - checkCudaErrors(cudaEventCreate(&cudaEvent[2])); - } - - if (FFTAlgo) - { - checkCudaErrors(cudaMalloc(&pRefMapsFFT, RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t))); - checkCudaErrors(cudaMalloc(&pFFTtmp2[0], CUDA_FFTS_AT_ONCE * param.FFTMapSize * 2 * sizeof(mycomplex_t))); - checkCudaErrors(cudaMalloc(&pFFTtmp[0], CUDA_FFTS_AT_ONCE * param.param_device.NumberPixels * param.param_device.NumberPixels * 2 * sizeof(myfloat_t))); - pFFTtmp2[1] = pFFTtmp2[0] + CUDA_FFTS_AT_ONCE * param.FFTMapSize; - pFFTtmp[1] = pFFTtmp[0] + CUDA_FFTS_AT_ONCE * param.param_device.NumberPixels * param.param_device.NumberPixels; - checkCudaErrors(cudaMalloc(&pConvMapFFT, param.FFTMapSize * sizeof(mycomplex_t) * 2)); - checkCudaErrors(cudaHostAlloc(&pConvMapFFT_Host, param.FFTMapSize * sizeof(mycomplex_t) * 2, 0)); - checkCudaErrors(cudaMemcpy(pRefMapsFFT, RefMap.RefMapsFFT, RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice)); - } - - deviceInitialized = 1; - return(0); + deviceExit(); + + selectCudaDevice(); + + gpumap = new bioem_RefMap; + memcpy(gpumap, &RefMap, sizeof(bioem_RefMap)); + + checkCudaErrors(cudaMalloc(&sum, sizeof(myfloat_t) * RefMap.ntotRefMap)); + checkCudaErrors(cudaMemcpy(sum, RefMap.sum_RefMap, + sizeof(myfloat_t) * RefMap.ntotRefMap, + cudaMemcpyHostToDevice)); + checkCudaErrors( + cudaMalloc(&sumsquare, sizeof(myfloat_t) * RefMap.ntotRefMap)); + checkCudaErrors(cudaMemcpy(sumsquare, RefMap.sumsquare_RefMap, + sizeof(myfloat_t) * RefMap.ntotRefMap, + cudaMemcpyHostToDevice)); + gpumap->sum_RefMap = sum; + gpumap->sumsquare_RefMap = sumsquare; + + checkCudaErrors( + cudaMalloc(&pProb_memory, + pProb_device.get_size(RefMap.ntotRefMap, param.nTotGridAngles, + param.param_device.writeAngles))); + + for (int i = 0; i < PIPELINE_LVL; i++) + { + checkCudaErrors(cudaStreamCreate(&cudaStream[i])); + checkCudaErrors(cudaEventCreate(&cudaEvent[i])); + } + for (int i = 0; i < MULTISTREAM_LVL; i++) + { + checkCudaErrors(cudaEventCreate(&cudaFFTEvent[i])); + } + if (GPUAsync) + { + checkCudaErrors(cudaStreamCreate(&cudaStream[2])); + checkCudaErrors(cudaEventCreate(&cudaEvent[2])); + } + + checkCudaErrors( + cudaMalloc(&pRefMapsFFT, + RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t))); + checkCudaErrors( + cudaMalloc(&pFFTtmp2[0], param.nTotParallelConv * param.nTotParallelMaps * + param.FFTMapSize * MULTISTREAM_LVL * + sizeof(mycomplex_t))); + checkCudaErrors( + cudaMalloc(&pFFTtmp[0], param.nTotParallelConv * param.nTotParallelMaps * + param.param_device.NumberPixels * + param.param_device.NumberPixels * + MULTISTREAM_LVL * sizeof(myfloat_t))); + for (int i = 1; i < MULTISTREAM_LVL; i++) + { + pFFTtmp2[i] = + pFFTtmp2[0] + + i * param.nTotParallelConv * param.nTotParallelMaps * param.FFTMapSize; + pFFTtmp[i] = pFFTtmp[0] + + i * param.nTotParallelConv * param.nTotParallelMaps * + param.param_device.NumberPixels * + param.param_device.NumberPixels; + } + checkCudaErrors(cudaMalloc(&pConvMapFFT, param.nTotParallelConv * + param.FFTMapSize * PIPELINE_LVL * + sizeof(mycomplex_t))); + checkCudaErrors(cudaHostAlloc(&pConvMapFFT_Host, + param.nTotParallelConv * param.FFTMapSize * + PIPELINE_LVL * sizeof(mycomplex_t), + 0)); + checkCudaErrors( + cudaMemcpy(pRefMapsFFT, RefMap.RefMapsFFT, + RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t), + cudaMemcpyHostToDevice)); + checkCudaErrors( + cudaMalloc(&pTmp_comp_params, + param.nTotParallelConv * PIPELINE_LVL * sizeof(myparam5_t))); + Ncomp_blocks = divup(param.nTotParallelConv * param.param_device.NtotDisp, + CudaThreadCount); + if (Ncomp_blocks > CudaThreadCount) + { + cout << "Error with input parameters. Check CudaThreadCount, " + "displacements and max number of parallel comparisons\n"; + exit(1); + } + checkCudaErrors( + cudaMalloc(&pTmp_comp_blocks, + Ncomp_blocks * RefMap.ntotRefMap * sizeof(myblockGPU_t))); + + initialized_const = new bool[RefMap.ntotRefMap]; + for (int i = 0; i < RefMap.ntotRefMap; i++) + initialized_const[i] = false; + + deviceInitialized = 1; + return (0); } int bioem_cuda::deviceExit() { - if (deviceInitialized == 0) return(0); - - - cudaFree(pProb_memory); - cudaFree(sum); - cudaFree(sumsquare); - for (int i = 0; i < 2; i++) - { - cudaStreamDestroy(cudaStream[i]); - cudaEventDestroy(cudaEvent[i]); - cudaEventDestroy(cudaFFTEvent[i]); - cudaFree(pConvMap_device[i]); - } - if (FFTAlgo) - { - cudaFree(pRefMapsFFT); - cudaFree(pConvMapFFT); - cudaFreeHost(pConvMapFFT_Host); - cudaFree(pFFTtmp[0]); - cudaFree(pFFTtmp2[0]); - } - else - { - cudaFree(maps); - } - if (GPUAlgo == 0 || GPUAlgo == 1) - { - cudaFree(pRefMap_device_Mod); - } - if (GPUAsync) - { - cudaStreamDestroy(cudaStream[2]); - cudaEventDestroy(cudaEvent[2]); - } - - delete gpumap; - cudaThreadExit(); - - deviceInitialized = 0; - return(0); + if (deviceInitialized == 0) + return (0); + + cudaFree(pProb_memory); + cudaFree(sum); + cudaFree(sumsquare); + for (int i = 0; i < PIPELINE_LVL; i++) + { + cudaStreamDestroy(cudaStream[i]); + cudaEventDestroy(cudaEvent[i]); + } + for (int i = 0; i < MULTISTREAM_LVL; i++) + { + cudaEventDestroy(cudaFFTEvent[i]); + } + + cudaFree(pRefMapsFFT); + cudaFree(pConvMapFFT); + cudaFreeHost(pConvMapFFT_Host); + cudaFree(pFFTtmp[0]); + cudaFree(pFFTtmp2[0]); + cudaFree(pTmp_comp_params); + cudaFree(pTmp_comp_blocks); + + if (GPUAsync) + { + cudaStreamDestroy(cudaStream[2]); + cudaEventDestroy(cudaEvent[2]); + } + + delete gpumap; + delete initialized_const; + cudaThreadExit(); + + deviceInitialized = 0; + return (0); } int bioem_cuda::deviceStartRun() { - if (GPUWorkload >= 100) - { - maxRef = RefMap.ntotRefMap; - pProb_host = &pProb; - } - else - { - maxRef = RefMap.ntotRefMap == 1 ? (size_t) RefMap.ntotRefMap : (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100; - pProb_host = new bioem_Probability; - pProb_host->init(maxRef, param.nTotGridAngles, param.nTotCC, *this); - pProb_host->copyFrom(&pProb, *this); - } - - pProb_device = *pProb_host; - pProb_device.ptr = pProb_memory; - pProb_device.set_pointers(); - checkCudaErrors(cudaMemcpyAsync(pProb_device.ptr, pProb_host->ptr, pProb_host->get_size(maxRef, param.nTotGridAngles, param.nTotCC, param.param_device.writeAngles, param.param_device.writeCC), cudaMemcpyHostToDevice, cudaStream[0])); - - if (FFTAlgo) - { - for (int j = 0;j < 2;j++) - { - for (int i = 0; i < 2; i++) - { - if (i && maxRef % CUDA_FFTS_AT_ONCE == 0) continue; - int n[2] = {param.param_device.NumberPixels, param.param_device.NumberPixels}; - if (cufftPlanMany(&plan[i][j], 2, n, NULL, 1, param.FFTMapSize, NULL, 1, 0, MY_CUFFT_C2R, i ? (maxRef % CUDA_FFTS_AT_ONCE) : CUDA_FFTS_AT_ONCE) != CUFFT_SUCCESS) - { - cout << "Error planning CUFFT\n"; - exit(1); - } - if (cufftSetCompatibilityMode(plan[i][j], CUFFT_COMPATIBILITY_FFTW_PADDING) != CUFFT_SUCCESS) - { - cout << "Error planning CUFFT compatibility\n"; - exit(1); - } - if (cufftSetStream(plan[i][j], cudaStream[j]) != CUFFT_SUCCESS) - { - cout << "Error setting CUFFT stream\n"; - exit(1); - } - } - if (!GPUDualStream) break; - } - } - return(0); + if (GPUWorkload >= 100) + { + maxRef = RefMap.ntotRefMap; + pProb_host = &pProb; + } + else + { + maxRef = ((size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100) < 1 ? + (size_t) RefMap.ntotRefMap : + (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100; + pProb_host = new bioem_Probability; + pProb_host->init(maxRef, param.nTotGridAngles, *this); + pProb_host->copyFrom(&pProb, *this); + } + + pProb_device = *pProb_host; + pProb_device.ptr = pProb_memory; + pProb_device.set_pointers(); + checkCudaErrors( + cudaMemcpyAsync(pProb_device.ptr, pProb_host->ptr, + pProb_host->get_size(maxRef, param.nTotGridAngles, + param.param_device.writeAngles), + cudaMemcpyHostToDevice, cudaStream[0])); + + if (maxRef / (param.nTotParallelMaps * param.nTotParallelConv) > + (double) SPLIT_MAPS_LVL) + { + cout << "Error planning CUFFT dimensions\n"; + exit(1); + } + for (int j = 0; j < MULTISTREAM_LVL; j++) + { + for (int i = 0; i < SPLIT_MAPS_LVL; i++) + { + if (i && maxRef % param.nTotParallelMaps == 0) + continue; + int n[2] = {param.param_device.NumberPixels, + param.param_device.NumberPixels}; + if (cufftPlanMany( + &plan[i][j], 2, n, NULL, 1, param.FFTMapSize, NULL, 1, 0, + MY_CUFFT_C2R, + i ? ((maxRef % param.nTotParallelMaps) * param.nTotParallelConv) : + (param.nTotParallelMaps * param.nTotParallelConv)) != + CUFFT_SUCCESS) + { + cout << "Error planning CUFFT\n"; + exit(1); + } + if (cufftSetCompatibilityMode( + plan[i][j], CUFFT_COMPATIBILITY_FFTW_PADDING) != CUFFT_SUCCESS) + { + cout << "Error planning CUFFT compatibility\n"; + exit(1); + } + if (cufftSetStream(plan[i][j], cudaStream[j]) != CUFFT_SUCCESS) + { + cout << "Error setting CUFFT stream\n"; + exit(1); + } + } + if (!GPUDualStream) + break; + } + + return (0); } int bioem_cuda::deviceFinishRun() { - if (GPUAsync) cudaStreamSynchronize(cudaStream[0]); - checkCudaErrors(cudaMemcpyAsync(pProb_host->ptr, pProb_device.ptr, pProb_host->get_size(maxRef, param.nTotGridAngles, param.nTotCC, param.param_device.writeAngles, param.param_device.writeCC), cudaMemcpyDeviceToHost, cudaStream[0])); - - if (FFTAlgo) - { - for (int j = 0;j < 2;j++) - { - for (int i = 0; i < 2; i++) - { - if (i && maxRef % CUDA_FFTS_AT_ONCE == 0) continue; - cufftDestroy(plan[i][j]); - } - if (!GPUDualStream) break; - } - } - cudaThreadSynchronize(); - if (GPUWorkload < 100) - { - pProb.copyFrom(pProb_host, *this); - free_device_host(pProb_host->ptr); - delete[] pProb_host; - } - - return(0); -} + if (GPUAsync) + cudaStreamSynchronize(cudaStream[0]); + checkCudaErrors( + cudaMemcpyAsync(pProb_host->ptr, pProb_device.ptr, + pProb_host->get_size(maxRef, param.nTotGridAngles, + param.param_device.writeAngles), + cudaMemcpyDeviceToHost, cudaStream[0])); + + for (int j = 0; j < MULTISTREAM_LVL; j++) + { + for (int i = 0; i < SPLIT_MAPS_LVL; i++) + { + if (i && maxRef % param.nTotParallelMaps == 0) + continue; + cufftDestroy(plan[i][j]); + } + if (!GPUDualStream) + break; + } -void* bioem_cuda::malloc_device_host(size_t size) -{ - void* ptr; - checkCudaErrors(cudaHostAlloc(&ptr, size, 0)); - return(ptr); + cudaThreadSynchronize(); + if (GPUWorkload < 100) + { + pProb.copyFrom(pProb_host, *this); + free_device_host(pProb_host->ptr); + delete[] pProb_host; + } + + return (0); } -void bioem_cuda::free_device_host(void* ptr) +void *bioem_cuda::malloc_device_host(size_t size) { - cudaFreeHost(ptr); + void *ptr; + checkCudaErrors(cudaHostAlloc(&ptr, size, 0)); + return (ptr); } +void bioem_cuda::free_device_host(void *ptr) { cudaFreeHost(ptr); } + void bioem_cuda::rebalance(int workload) { - if ((workload < 0) || (workload > 100) || (workload == GPUWorkload)) return; + if ((workload < 0) || (workload > 100) || (workload == GPUWorkload)) + return; - deviceFinishRun(); + deviceFinishRun(); - if (DebugOutput >= 2) - { - printf("\t\tSetting GPU workload to %d%% (rank %d)\n", workload, mpi_rank); - } + if (DebugOutput >= 2) + { + printf("\t\tSetting GPU workload to %d%% (rank %d)\n", workload, mpi_rank); + } - GPUWorkload = workload; - maxRef = (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100; + GPUWorkload = workload; + maxRef = (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100; - deviceStartRun(); + deviceStartRun(); } -bioem* bioem_cuda_create() +bioem *bioem_cuda_create() { - int count; - - if (cudaGetDeviceCount(&count) != cudaSuccess) count = 0; - if (count == 0) - { - printf("No CUDA device available, using fallback to CPU version\n"); - return new bioem; - } - - return new bioem_cuda; + int count; + + if (cudaGetDeviceCount(&count) != cudaSuccess) + count = 0; + if (count == 0) + { + printf("No CUDA device available, using fallback to CPU version\n"); + return new bioem; + } + + return new bioem_cuda; } diff --git a/include/autotuner.h b/include/autotuner.h index 10db9ca8d21810f883d4d3bbf74dd9895a9e1498..64ad3ae071a6133da6153233f38e0a0a8166d767 100644 --- a/include/autotuner.h +++ b/include/autotuner.h @@ -1,9 +1,11 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, Markus Rampp, Luka Stanisic and Gerhard + Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. + Max Planck Computing and Data Facility, Garching, Germany. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -11,52 +13,58 @@ #ifndef AUTOTUNER_H #define AUTOTUNER_H -class Autotuner { +class Autotuner +{ public: - Autotuner() {stopTuning = true;} + Autotuner() { stopTuning = true; } - /* Setting variables to initial values */ - inline void Initialize(int alg=3, int st=7) {algo = alg; stable=st; Reset(); } + /* Setting variables to initial values */ + inline void Initialize(int alg = 3, int st = 7) + { + algo = alg; + stable = st; + Reset(); + } - /* Resetting variables to initial values */ - void Reset(); + /* Resetting variables to initial values */ + void Reset(); - /* Check if autotuning is needed, depending on which comparison is finished */ - bool Needed(int iteration); + /* Check if autotuning is needed, depending on which comparison is finished */ + bool Needed(int iteration); - /* Check if optimal workload value has been computed */ - bool Finished(); + /* Check if optimal workload value has been computed */ + bool Finished(); - /* Set a new workload value to test, depending on the algorithm */ - void Tune(double compTime); + /* Set a new workload value to test, depending on the algorithm */ + void Tune(double compTime); - /* Return workload value */ - inline int Workload() {return workload;} + /* Return workload value */ + inline int Workload() { return workload; } private: - int algo; - int stable; - - bool stopTuning; - int workload; - - /* Variables needed for AlgoSimple and AlgoRatio */ - double best_time; - int best_workload; - - /* Variables needed for AlgoBisection */ - int a; - int b; - int c; - int x; - int limit; - double fb, fx; - - /* Autotuning algorithms */ - void AlgoSimple(double compTime); - void AlgoRatio(double compTime); - void AlgoBisection(double compTime); + int algo; + int stable; + + bool stopTuning; + int workload; + + /* Variables needed for AlgoSimple and AlgoRatio */ + double best_time; + int best_workload; + + /* Variables needed for AlgoBisection */ + int a; + int b; + int c; + int x; + int limit; + double fb, fx; + + /* Autotuning algorithms */ + void AlgoSimple(double compTime); + void AlgoRatio(double compTime); + void AlgoBisection(double compTime); }; #endif diff --git a/include/bioem.h b/include/bioem.h index 813acec26c2bcaeaa78459402d1f8eec0d8997c1..8aa3f3faa1201923fc640483a30304dc2d398d0a 100644 --- a/include/bioem.h +++ b/include/bioem.h @@ -1,12 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -14,66 +15,87 @@ #ifndef BIOEM_H #define BIOEM_H -#include "defs.h" #include "bioem.h" -#include "model.h" +#include "defs.h" #include "map.h" +#include "model.h" #include "param.h" class bioem { - friend class bioem_RefMap; - friend class bioem_Probability; + friend class bioem_RefMap; + friend class bioem_Probability; public: - bioem(); - virtual ~bioem(); - - int configure(int ac, char* av[]); - void cleanup(); //Cleanup everything happening during configure - - int precalculate(); // Is it better to pass directly the input File names? - int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal); - int run(); - int doProjections(int iMap); - int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t* lproj, myfloat_t* Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC); - - virtual int compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); - - virtual void* malloc_device_host(size_t size); - virtual void free_device_host(void* ptr); - virtual void rebalance(int workload); //Rebalance GPUWorkload - void rebalanceWrapper(int workload); //Rebalance wrapper - - int createProjection(int iMap, mycomplex_t* map); - int calcross_cor(myfloat_t* localmap, myfloat_t& sum, myfloat_t& sumsquare); - void calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC); - - bioem_Probability pProb; - - string OutfileName; - bool yesoutfilename; - + bioem(); + virtual ~bioem(); + + void printOptions(myoption_t *myoptions, int myoptions_length); + int readOptions(int ac, char *av[]); + int configure(int ac, char *av[]); + void cleanup(); // Cleanup everything happening during configure + + int precalculate(); // Is it better to pass directly the input File names? + inline int needToPrintModel() { return param.printModel; } + int printModel(); + int run(); + int doProjections(int iMap); + int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t *lproj, + mycomplex_t *localmultFFT, myfloat_t &sumC, + myfloat_t &sumsquareC); + int createConvolutedProjectionMap_noFFT(mycomplex_t *lproj, + myfloat_t *Mapconv, + mycomplex_t *localmultFFT, + myfloat_t &sumC, + myfloat_t &sumsquareC); + + virtual int compareRefMaps(int iPipeline, int iOrient, int iConv, + int maxParallelConv, mycomplex_t *localmultFFT, + myparam5_t *comp_params, const int startMap = 0); + + virtual void *malloc_device_host(size_t size); + virtual void free_device_host(void *ptr); + virtual void rebalance(int workload); // Rebalance GPUWorkload + void rebalanceWrapper(int workload); // Rebalance wrapper + + int createProjection(int iMap, mycomplex_t *map); + int calcross_cor(myfloat_t *localmap, myfloat_t &sum, myfloat_t &sumsquare); + void calculateCCFFT(int iMap, mycomplex_t *localConvFFT, + mycomplex_t *localCCT, myfloat_t *lCC); + void doRefMap_CPU_Parallel(int iRefMap, int iOrient, int iConv, + myfloat_t *lCC, myparam5_t *comp_params, + myblockCPU_t *comp_block); + void doRefMap_CPU_Reduce(int iRefMap, int iOrient, int iConvStart, + int maxParallelConv, myparam5_t *comp_params, + myblockCPU_t *comp_block); + + bioem_Probability pProb; + + string OutfileName; protected: - virtual int deviceInit(); - virtual int deviceStartRun(); - virtual int deviceFinishRun(); - - bioem_param param; - bioem_model Model; - bioem_RefMap RefMap; - - int nReferenceMaps; //Maps in memory at a time - int nReferenceMapsTotal; //Maps in total - - int nProjectionMaps; //Maps in memory at a time - int nProjectionMapsTotal; //Maps in total - - int FFTAlgo; //Use the FFT Algorithm (Default 1) - int DebugOutput; //Debug Output Level (Default 2) - int nProjectionsAtOnce; //Number of projections to do at once via OpenMP (Default 1) - bool Autotuning; //Do the autotuning of the load-balancing between CPUs and GPUs + virtual int deviceInit(); + virtual int deviceStartRun(); + virtual int deviceFinishRun(); + + bioem_param param; + bioem_model Model; + bioem_RefMap RefMap; + + int nReferenceMaps; // Maps in memory at a time + int nReferenceMapsTotal; // Maps in total + + int nProjectionMaps; // Maps in memory at a time + int nProjectionMapsTotal; // Maps in total + + int BioEMAlgo; // BioEM algorithm used to do comparison (Default 1) + int CudaThreadCount; // Number of CUDA threads used in each block (Default + // depends on the BioEM algorithm) + int DebugOutput; // Debug Output Level (Default 0) + int nProjectionsAtOnce; // Number of projections to do at once via OpenMP + // (Default number of OMP threads) + bool Autotuning; // Do the autotuning of the load-balancing between CPUs and + // GPUs (Default 1, if GPUs are used and GPUWORKLOAD is not specified) }; #endif diff --git a/include/bioem_cuda.h b/include/bioem_cuda.h index 99add42be1576028c88d7f66b209656de7797041..6a4552a11c6beeab13e4b1e53f10aae5678b144f 100644 --- a/include/bioem_cuda.h +++ b/include/bioem_cuda.h @@ -1,12 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -16,6 +17,6 @@ #include "bioem.h" -extern bioem* bioem_cuda_create(); +extern bioem *bioem_cuda_create(); #endif diff --git a/include/bioem_cuda_internal.h b/include/bioem_cuda_internal.h index 708b40fb3e9a2a10b265965baaa224244159b975..1b269a393bc28b5e79f69fa6211f770c62f02e1e 100644 --- a/include/bioem_cuda_internal.h +++ b/include/bioem_cuda_internal.h @@ -1,12 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -17,7 +18,7 @@ #include <cuda.h> #include <cufft.h> -//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway +// Hack to make nvcc compiler accept fftw.h, float128 is not used anyway #define __float128 double #include <fftw3.h> #undef __float128 @@ -27,51 +28,60 @@ class bioem_cuda : public bioem { public: - bioem_cuda(); - virtual ~bioem_cuda(); + bioem_cuda(); + virtual ~bioem_cuda(); - virtual int compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); - virtual void* malloc_device_host(size_t size); - virtual void free_device_host(void* ptr); - virtual void rebalance(int workload); //Rebalance GPUWorkload + virtual int compareRefMaps(int iPipeline, int iOrient, int iConv, + int maxParallelConv, mycomplex_t *localmultFFT, + myparam5_t *comp_params, const int startMap = 0); + virtual void *malloc_device_host(size_t size); + virtual void free_device_host(void *ptr); + virtual void rebalance(int workload); // Rebalance GPUWorkload protected: - virtual int deviceInit(); - virtual int deviceStartRun(); - virtual int deviceFinishRun(); - int deviceExit(); - + virtual int deviceInit(); + virtual int deviceStartRun(); + virtual int deviceFinishRun(); + int deviceExit(); + private: - int selectCudaDevice(); - - int deviceInitialized; - - cudaStream_t cudaStream[3]; - cudaEvent_t cudaEvent[3]; - cudaEvent_t cudaFFTEvent[2]; - bioem_RefMap_Mod* pRefMap_device_Mod; - bioem_RefMap* gpumap; - bioem_Probability* pProb_host; - bioem_Probability pProb_device; - void* pProb_memory; - myfloat_t* pConvMap_device[2]; - - mycomplex_t* pRefMapsFFT; - mycomplex_t* pConvMapFFT; - mycomplex_t* pConvMapFFT_Host; - mycuComplex_t* pFFTtmp2[2]; - myfloat_t* pFFTtmp[2]; - cufftHandle plan[2][2]; - - myfloat_t *maps, *sum, *sumsquare; - - int GPUAlgo; //GPU Algorithm to use, 0: parallelize over maps, 1: as 0 but work split in multiple kernels (better), 2: also parallelize over shifts (best) - int GPUAsync; //Run GPU Asynchronously, do the convolutions on the host in parallel. - int GPUDualStream; //Use two streams to improve paralelism - int GPUWorkload; //Percentage of workload to perform on GPU. Default 100. Rest is done on processor in parallel. - - int maxRef; + int selectCudaDevice(); + + int deviceInitialized; + + cudaStream_t cudaStream[PIPELINE_LVL + 1]; // Streams are used for both + // PIPELINE and MULTISTREAM control + cudaEvent_t cudaEvent[PIPELINE_LVL + 1]; + cudaEvent_t cudaFFTEvent[MULTISTREAM_LVL]; + bioem_RefMap *gpumap; + bioem_Probability *pProb_host; + bioem_Probability pProb_device; + void *pProb_memory; + + mycomplex_t *pRefMapsFFT; + mycomplex_t *pConvMapFFT; + mycomplex_t *pConvMapFFT_Host; + mycuComplex_t *pFFTtmp2[MULTISTREAM_LVL]; + myfloat_t *pFFTtmp[MULTISTREAM_LVL]; + cufftHandle plan[SPLIT_MAPS_LVL][MULTISTREAM_LVL]; + + myparam5_t *pTmp_comp_params; + + myblockGPU_t *pTmp_comp_blocks; + int Ncomp_blocks; + + bool *initialized_const; // In order to make sure Constoadd is initialized to + // the first value + + myfloat_t *sum, *sumsquare; + + int GPUAsync; // Run GPU Asynchronously, do the convolutions on the host in + // parallel. + int GPUDualStream; // Use two streams to improve paralelism + int GPUWorkload; // Percentage of workload to perform on GPU. Default 100. + // Rest is done on processor in parallel. + + int maxRef; }; #endif - diff --git a/include/defs.h b/include/defs.h index b7338ca8246126becf8cebce6191b0fa7ca86ec6..70438dd27ad804c763a36e255beb9439bad8aa09 100644 --- a/include/defs.h +++ b/include/defs.h @@ -1,12 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -14,11 +15,22 @@ #ifndef BIOEM_DEFS_H #define BIOEM_DEFS_H +#define BIOEM_PROB_DOUBLE //#define BIOEM_USE_DOUBLE +//#define DEBUG +//#define DEBUG_GPU +//#define DEBUG_PROB -//#define PILAR_DEBUG +#ifndef BIOEM_PROB_DOUBLE +typedef float myprob_t; +#define MY_MPI_FLOAT MPI_FLOAT +#else +typedef double myprob_t; +#define MY_MPI_FLOAT MPI_DOUBLE +#endif #ifndef BIOEM_USE_DOUBLE +#define MIN_PROB -999999. typedef float myfloat_t; #define myfftw_malloc fftwf_malloc #define myfftw_free fftwf_free @@ -35,9 +47,9 @@ typedef float myfloat_t; #define MY_CUFFT_C2R CUFFT_C2R #define mycufftExecC2R cufftExecC2R #define mycuComplex_t cuComplex -#define MY_MPI_FLOAT MPI_FLOAT #else typedef double myfloat_t; +#define MIN_PROB -999999. #define myfftw_malloc fftw_malloc #define myfftw_free fftw_free #define myfftw_destroy_plan fftw_destroy_plan @@ -53,17 +65,62 @@ typedef double myfloat_t; #define mycufftExecC2R cufftExecZ2D #define mycuComplex_t cuDoubleComplex #define MY_CUFFT_C2R CUFFT_Z2D -#define MY_MPI_FLOAT MPI_DOUBLE #endif typedef myfloat_t mycomplex_t[2]; -#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU +#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 // Possible set to 4 for GPU struct myfloat3_t { - myfloat_t pos[BIOEM_FLOAT_3_PHYSICAL_SIZE]; - myfloat_t quat4; - // myfloat_t prior; + myfloat_t pos[BIOEM_FLOAT_3_PHYSICAL_SIZE]; + myfloat_t quat4; + // myfloat_t prior; +}; + +/* myoptions +Structure for saving options, in order to mimic old Boost program_options +behaviour +*/ +struct myoption_t +{ + const char *name; + int arg; + const char *desc; + bool hidden; +}; + +/* comp_params +Put all parameters needed for each comparison in a single structure +This makes code cleaner and requires less GPU transfers +*/ +struct myparam5_t +{ + myfloat_t amp; + myfloat_t pha; + myfloat_t env; + myfloat_t sumC; + myfloat_t sumsquareC; +}; + +/* comp_block +Put all parameters created by each inside-block comparison +This makes code cleaner +*/ +// For GPUs +struct myblockGPU_t +{ + myprob_t logpro; + int id; + myprob_t sumExp; + myprob_t sumAngles; +}; +// For CPUs (easier to save value as well) +struct myblockCPU_t +{ + myprob_t logpro; + int id; + myprob_t sumExp; + myfloat_t value; }; #ifdef BIOEM_GPUCODE @@ -85,44 +142,53 @@ struct myfloat3_t #define myBlockIdxY 0 #endif -#define CUDA_THREAD_COUNT 256 -#define CUDA_BLOCK_COUNT 1024 * 16 -#define CUDA_MAX_SHIFT_REDUCE 1024 +#define OUTPUT_PRECISION 4 + +#define CUDA_THREAD_COUNT_ALGO1 256 +#define CUDA_THREAD_COUNT_ALGO2 512 +#define CUDA_THREAD_MAX 1024 #define CUDA_FFTS_AT_ONCE 1024 -//#define BIOEM_USE_NVTX + +#define PIPELINE_LVL 2 +#define MULTISTREAM_LVL 2 +#define SPLIT_MAPS_LVL 2 /* Autotuning Autotuning algorithms: - 1. AlgoSimple = 1; Testing workload values between 100 and 30, all multiples of 5. Taking the value with the best timing. - 2. AlgoRatio = 2; Comparisons where GPU handles 100% or only 1% of the workload are timed, and then the optimal workload balance is computed. - 3. AlgoBisection = 3; Based on bisection, multiple workload values are tested until the optimal one is found. + 1. AlgoSimple = 1; Testing workload values between 100 and 30, all multiples + of 5. Taking the value with the best timing. + 2. AlgoRatio = 2; Comparisons where GPU handles 100% or only 1% of the + workload are timed, and then the optimal workload balance is computed. + 3. AlgoBisection = 3; Based on bisection, multiple workload values are + tested until the optimal one is found. */ #define AUTOTUNING_ALGORITHM 3 -/* Recalibrate every X projections. Put to a very high value, i.e., 99999, to de facto disable recalibration */ +/* Recalibrate every X projections. Put to a very high value, i.e., 99999, to de + * facto disable recalibration */ #define RECALIB_FACTOR 200 /* After how many comparison iterations, comparison duration becomes stable */ #define FIRST_STABLE 7 -static inline void* mallocchk(size_t size) +static inline void *mallocchk(size_t size) { - void* ptr = malloc(size); - if (ptr == 0) - { - std::cout << "Memory allocation error\n"; - exit(1); - } - return(ptr); + void *ptr = malloc(size); + if (ptr == 0) + { + std::cout << "Memory allocation error\n"; + exit(1); + } + return (ptr); } -static inline void* reallocchk(void* oldptr, size_t size) +static inline void *reallocchk(void *oldptr, size_t size) { - void* ptr = realloc(oldptr, size); - if (ptr == 0) - { - std::cout << "Memory allocation error\n"; - exit(1); - } - return(ptr); + void *ptr = realloc(oldptr, size); + if (ptr == 0) + { + std::cout << "Memory allocation error\n"; + exit(1); + } + return (ptr); } #ifndef WITH_OPENMP diff --git a/include/map.h b/include/map.h index 88ad682c709abfeaeec11159da73b96f98a7b1ea..9081a332495dd1026634a9fb5a72ae018398d43b 100644 --- a/include/map.h +++ b/include/map.h @@ -1,12 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -18,7 +19,6 @@ #include "param.h" #include <complex> #include <math.h> -#include <boost/concept_check.hpp> class bioem_param; class bioem; @@ -26,137 +26,147 @@ class bioem; class bioem_RefMap { public: - bioem_RefMap() - { - maps = NULL; - RefMapsFFT = NULL; - sum_RefMap = NULL; - sumsquare_RefMap = NULL; - } - - void freePointers() - { - if (maps) free(maps); - if (sum_RefMap) free(sum_RefMap); - if (sumsquare_RefMap) free(sumsquare_RefMap); - if (RefMapsFFT) delete[] RefMapsFFT; - maps = NULL; - sum_RefMap = NULL; - sumsquare_RefMap = NULL; - RefMapsFFT = NULL; - } - int readRefMaps(bioem_param& param, const char* filemap); - int precalculate(bioem_param& param, bioem& bio); - int PreCalculateMapsFFT(bioem_param& param); - - int read_int(int *currlong, FILE *fin, int swap); - int read_float(float *currfloat, FILE *fin, int swap); - int read_float_empty (FILE *fin); - int read_char_float (float *currfloat, FILE *fin) ; - int test_mrc (const char *vol_file, int swap); - int read_MRC(const char* filename,bioem_param& param); - - mycomplex_t* RefMapsFFT; - - bool readMRC,readMultMRC; - - int ntotRefMap; - int numPixels; - int refMapSize; - myfloat_t* maps; - myfloat_t* sum_RefMap; - myfloat_t* sumsquare_RefMap; - - __host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(maps[map * refMapSize + x * numPixels + y]);} - __host__ __device__ inline const myfloat_t* getp(int map, int x, int y) const {return(&maps[map * refMapSize + x * numPixels]);} - __host__ __device__ inline myfloat_t* getmap(int map) {return(&maps[map * refMapSize]);} + bioem_RefMap() + { + maps = NULL; + RefMapsFFT = NULL; + sum_RefMap = NULL; + sumsquare_RefMap = NULL; + } + + void freePointers() + { + if (maps) + free(maps); + if (sum_RefMap) + free(sum_RefMap); + if (sumsquare_RefMap) + free(sumsquare_RefMap); + if (RefMapsFFT) + delete[] RefMapsFFT; + maps = NULL; + sum_RefMap = NULL; + sumsquare_RefMap = NULL; + RefMapsFFT = NULL; + } + int readRefMaps(bioem_param ¶m, const char *filemap); + int precalculate(bioem_param ¶m, bioem &bio); + int PreCalculateMapsFFT(bioem_param ¶m); + + int read_int(int *currlong, FILE *fin, int swap); + int read_float(float *currfloat, FILE *fin, int swap); + int read_float_empty(FILE *fin); + int read_char_float(float *currfloat, FILE *fin); + int test_mrc(const char *vol_file, int swap); + int read_MRC(const char *filename, bioem_param ¶m); + + mycomplex_t *RefMapsFFT; + + bool readMRC, readMultMRC; + + int ntotRefMap; + int numPixels; + int refMapSize; + myfloat_t *maps; + myfloat_t *sum_RefMap; + myfloat_t *sumsquare_RefMap; + + __host__ __device__ inline myfloat_t get(int map, int x, int y) const + { + return (maps[map * refMapSize + x * numPixels + y]); + } + __host__ __device__ inline const myfloat_t *getp(int map, int x, int y) const + { + return (&maps[map * refMapSize + x * numPixels]); + } + __host__ __device__ inline myfloat_t *getmap(int map) + { + return (&maps[map * refMapSize]); + } }; class bioem_RefMap_Mod : public bioem_RefMap { public: - __host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(maps[(x * numPixels + y) * ntotRefMap + map]);} - - void init(const bioem_RefMap& map) - { - maps = (myfloat_t*) malloc(map.refMapSize * map.ntotRefMap * sizeof(myfloat_t)); - #pragma omp parallel for - for (int i = 0; i < map.ntotRefMap; i++) - { - for (int j = 0; j < map.numPixels; j++) - { - for (int k = 0; k < map.numPixels; k++) - { - maps[(j * map.numPixels + k) * map.ntotRefMap + i] = map.get(i, j, k); - } - } - } - } + __host__ __device__ inline myfloat_t get(int map, int x, int y) const + { + return (maps[(x * numPixels + y) * ntotRefMap + map]); + } + + void init(const bioem_RefMap &map) + { + maps = (myfloat_t *) malloc(map.refMapSize * map.ntotRefMap * + sizeof(myfloat_t)); +#pragma omp parallel for + for (int i = 0; i < map.ntotRefMap; i++) + { + for (int j = 0; j < map.numPixels; j++) + { + for (int k = 0; k < map.numPixels; k++) + { + maps[(j * map.numPixels + k) * map.ntotRefMap + i] = map.get(i, j, k); + } + } + } + } }; class bioem_Probability_map { public: - myfloat_t Total; - myfloat_t Constoadd; - - class bioem_Probability_map_max - { - public: - int max_prob_cent_x, max_prob_cent_y, max_prob_orient, max_prob_conv; - myfloat_t max_prob_norm,max_prob_mu; - } max; + myprob_t Total; + myprob_t Constoadd; + + class bioem_Probability_map_max + { + public: + int max_prob_cent_x, max_prob_cent_y, max_prob_orient, max_prob_conv; + myfloat_t max_prob_norm, max_prob_mu; + } max; }; class bioem_Probability_angle { public: - myfloat_t forAngles; - myfloat_t ConstAngle; - myfloat_t priorang; -}; - -class bioem_Probability_cc -{ -public: - myfloat_t forCC; - myfloat_t ConstCC; + myprob_t forAngles; + myprob_t ConstAngle; }; class bioem_Probability { public: - int nMaps; - int nAngles; - int nCC; - - __device__ __host__ bioem_Probability_map& getProbMap(int map) {return(ptr_map[map]);} - __device__ __host__ bioem_Probability_angle& getProbAngle(int map, int angle) {return(ptr_angle[angle * nMaps + map]);} - __device__ __host__ bioem_Probability_cc& getProbCC(int map, int cc) {return(ptr_cc[cc * nMaps + map]);} - - void* ptr; - bioem_Probability_map* ptr_map; - bioem_Probability_angle* ptr_angle; - bioem_Probability_cc* ptr_cc; - - static size_t get_size(size_t maps, size_t angles, size_t cc, bool writeAngles, bool writeCC) - { - size_t size = sizeof(bioem_Probability_map); - if (writeAngles) size += angles * sizeof(bioem_Probability_angle); - if (writeCC) size += cc * sizeof(bioem_Probability_cc); - return(maps * size); - } - - void init(size_t maps, size_t angles, size_t cc, bioem& bio); - void copyFrom(bioem_Probability* from, bioem& bio); - - void set_pointers() - { - ptr_map = (bioem_Probability_map*) ptr; - ptr_angle = (bioem_Probability_angle*) (&ptr_map[nMaps]); -// ptr_cc = (bioem_Probability_cc*) (&ptr_angle[nMaps * nAngles]); - ptr_cc = (bioem_Probability_cc*) (&ptr_map[nMaps]); - } + int nMaps; + int nAngles; + + __device__ __host__ bioem_Probability_map &getProbMap(int map) + { + return (ptr_map[map]); + } + __device__ __host__ bioem_Probability_angle &getProbAngle(int map, int angle) + { + return (ptr_angle[angle * nMaps + map]); + } + + void *ptr; + bioem_Probability_map *ptr_map; + bioem_Probability_angle *ptr_angle; + + static size_t get_size(size_t maps, size_t angles, int writeAngles) + { + size_t size = sizeof(bioem_Probability_map); + if (writeAngles) + size += angles * sizeof(bioem_Probability_angle); + return (maps * size); + } + + void init(size_t maps, size_t angles, bioem &bio); + void copyFrom(bioem_Probability *from, bioem &bio); + + void set_pointers() + { + ptr_map = (bioem_Probability_map *) ptr; + ptr_angle = (bioem_Probability_angle *) (&ptr_map[nMaps]); + } }; #endif diff --git a/include/model.h b/include/model.h index fcfafa29a743f9b6af656aa8e2d5674eefc871a6..46736f9776fe01a64b6fafa46e55e97e52919140 100644 --- a/include/model.h +++ b/include/model.h @@ -1,12 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -16,32 +17,31 @@ #include "defs.h" #include "param.h" -#include <boost/concept_check.hpp> class bioem_model { public: - class bioem_model_point - { - public: - myfloat3_t point; - myfloat_t radius; - myfloat_t density; - }; - - bioem_model(); - ~bioem_model(); - - int readModel(bioem_param& param, const char* filemodel); - - bool readPDB; - - myfloat_t getAminoAcidRad(char *name); - myfloat_t getAminoAcidDensity(char *name); - myfloat_t NormDen; - - int nPointsModel; - bioem_model_point* points; + class bioem_model_point + { + public: + myfloat3_t point; + myfloat_t radius; + myfloat_t density; + }; + + bioem_model(); + ~bioem_model(); + + int readModel(bioem_param ¶m, const char *filemodel); + + bool readPDB; + + myfloat_t getAminoAcidRad(char *name); + myfloat_t getAminoAcidDensity(char *name); + myfloat_t NormDen; + + int nPointsModel; + bioem_model_point *points; }; #endif diff --git a/include/param.h b/include/param.h index 8a952a56fdad73d81bfed65621cc503fb2a2b3cb..5faf1b63f968607e3c22d7087a3dd20b0d3768ac 100644 --- a/include/param.h +++ b/include/param.h @@ -1,12 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -17,145 +18,139 @@ #include "defs.h" #include "map.h" #include <complex> -#include <math.h> #include <fftw3.h> +#include <math.h> using namespace std; class bioem_param_device { public: -// Grids in center assuming equidistance from 0,0 - int maxDisplaceCenter; - int GridSpaceCenter; - int NumberPixels; - int NumberFFTPixels1D; - int NtotDist; - - myfloat_t Ntotpi; - myfloat_t volu; - myfloat_t sigmaPriorbctf; - myfloat_t sigmaPriordefo; - myfloat_t Priordefcent; - - -// If to write Probabilities of Angles from Model - bool writeAngles; - bool writeCC; - bool flipped; - bool debugterm; - int CCdisplace; - bool CCwithBayes; - bool tousepsf; - - + // Grids in center assuming equidistance from 0,0 + int maxDisplaceCenter; + int GridSpaceCenter; + int NumberPixels; + int NumberFFTPixels1D; + int NxDisp; + int NtotDisp; + + myfloat_t Ntotpi; + myfloat_t volu; + myfloat_t sigmaPriorbctf; + myfloat_t sigmaPriordefo; + myfloat_t Priordefcent; + myfloat_t sigmaPrioramp; + myfloat_t Priorampcent; + // If to write Probabilities of Angles from Model + int writeAngles; + bool tousepsf; }; class bioem_param { public: - bioem_param(); - ~bioem_param(); - - - int readParameters(const char* fileinput); - int CalculateGridsParam(const char* fileangles); - int CalculateRefCTF(); - int forprintBest(const char* fileinput); - void PrepareFFTs(); - bool doaaradius; - bool writeCTF; - bool ignoreCCoff; - bool nocentermass; - bool printrotmod; - bool readquatlist; - bool showrotatemod; - bool notsqure; - bool notnormmap; - bool usepsf; - bool ignorepointsout; - bool ignorePDB; - - myfloat_t elecwavel; - - bioem_param_device param_device; - - int FFTMapSize; - int Alignment; - mycomplex_t* refCTF; - myfloat3_t* CtfParam; - size_t getRefCtfCount() {return nTotCTFs * FFTMapSize;} - size_t getCtfParamCount() {return nTotCTFs;} - - myfloat_t pixelSize; -// Priors - myfloat_t priorMod; - bool yespriorAngles; - myfloat_t* angprior; - -// Grid Points in Euler angles, assuming uniform sampling d_alpha=d_gamma (in 2pi) & cos(beta)=-1,1 - int angleGridPointsAlpha; - int angleGridPointsBeta; - - int GridPointsQuatern; - bool doquater; - - myfloat_t voluang; - bool notuniformangles; - int NotUn_angles; - - bool withnoise; - myfloat_t stnoise; -// std::string inanglef; -// std::string quatfile; - - int numberGridPointsDisplaceCenter; -// Grid sampling for the convolution kernel - -// CTF - myfloat_t startBfactor, endBfactor; - int numberBfactor; - myfloat_t startDefocus, endDefocus; - int numberDefocus; - - //ENVELOPE - myfloat_t startGridEnvelop; - myfloat_t endGridEnvelop; - int numberGridPointsEnvelop; - myfloat_t gridEnvelop; - //CTF=Amp*cos(phase*x)-sqrt(1-Amp**2)*sin(phase*x) - myfloat_t startGridCTF_phase; - myfloat_t endGridCTF_phase; - int numberGridPointsCTF_phase; - myfloat_t gridCTF_phase; - myfloat_t startGridCTF_amp; - myfloat_t endGridCTF_amp; - int numberGridPointsCTF_amp; - myfloat_t gridCTF_amp; - // Others - myfloat3_t* angles; - int nTotGridAngles; - int nTotCTFs; - int nTotCC; - int shiftX,shiftY; - - bool printModel; - int printModelOrientation; - int printModelConvolution; - - int fft_plans_created; - myfftw_plan fft_plan_c2c_forward, fft_plan_c2c_backward, fft_plan_r2c_forward, fft_plan_c2r_backward; - - mycomplex_t** fft_scratch_complex; - myfloat_t** fft_scratch_real; - - bool dumpMap, loadMap; - - int ddx,ddy; - myfloat_t bestnorm,bestoff; + bioem_param(); + ~bioem_param(); + + int readParameters(const char *fileinput); + int CalculateGridsParam(const char *fileangles); + int CalculateRefCTF(); + int forprintBest(const char *fileinput); + void PrepareFFTs(); + bool doaaradius; + bool writeCTF; + bool nocentermass; + bool printrotmod; + bool readquatlist; + bool showrotatemod; + bool notnormmap; + bool usepsf; + bool ignorepointsout; + bool ignorePDB; + + myfloat_t elecwavel; + + bioem_param_device param_device; + + int FFTMapSize; + int Alignment; + mycomplex_t *refCTF; + myfloat3_t *CtfParam; + size_t getRefCtfCount() { return nTotCTFs * FFTMapSize; } + size_t getCtfParamCount() { return nTotCTFs; } + + myfloat_t pixelSize; + // Priors + myfloat_t priorMod; + bool yespriorAngles; + myfloat_t *angprior; + + // Grid Points in Euler angles, assuming uniform sampling d_alpha=d_gamma (in + // 2pi) & cos(beta)=-1,1 + int angleGridPointsAlpha; + int angleGridPointsBeta; + + int GridPointsQuatern; + bool doquater; + + myfloat_t voluang; + bool notuniformangles; + int NotUn_angles; + + bool withnoise; + myfloat_t stnoise; + // std::string inanglef; + // std::string quatfile; + + int numberGridPointsDisplaceCenter; + // Grid sampling for the convolution kernel + + // CTF + myfloat_t startBfactor, endBfactor; + int numberBfactor; + myfloat_t startDefocus, endDefocus; + int numberDefocus; + + // ENVELOPE + myfloat_t startGridEnvelop; + myfloat_t endGridEnvelop; + int numberGridPointsEnvelop; + myfloat_t gridEnvelop; + // CTF=Amp*cos(phase*x)-sqrt(1-Amp**2)*sin(phase*x) + myfloat_t startGridCTF_phase; + myfloat_t endGridCTF_phase; + int numberGridPointsCTF_phase; + myfloat_t gridCTF_phase; + myfloat_t startGridCTF_amp; + myfloat_t endGridCTF_amp; + int numberGridPointsCTF_amp; + myfloat_t gridCTF_amp; + // Others + myfloat3_t *angles; + int nTotGridAngles; + int nTotCTFs; + int shiftX, shiftY; + + int nTotParallelConv; + int nTotParallelMaps; + + bool printModel; + bool BestmapCalcCC; + + int fft_plans_created; + myfftw_plan fft_plan_c2c_forward, fft_plan_c2c_backward, fft_plan_r2c_forward, + fft_plan_c2r_backward; + + mycomplex_t **fft_scratch_complex; + myfloat_t **fft_scratch_real; + + bool dumpMap, loadMap; + + int ddx, ddy; + myfloat_t bestnorm, bestoff; private: - void releaseFFTPlans(); + void releaseFFTPlans(); }; #endif diff --git a/include/timer.h b/include/timer.h index 2875aad47f906cc066bdcd0d49d070097609e31c..06d5b2b6924f79362b1be242c0ae9b36588f6f52 100644 --- a/include/timer.h +++ b/include/timer.h @@ -1,9 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -11,68 +15,82 @@ #ifndef TIMER_H #define TIMER_H +#include <algorithm> +#include <cmath> +#include <numeric> #include <stdio.h> #include <string> -#include <numeric> #include <vector> -#include <algorithm> -#include <cmath> using namespace std; -class HighResTimer { +class HighResTimer +{ public: - HighResTimer(); - ~HighResTimer(); - void Start(); - void Stop(); - void Reset(); - void ResetStart(); - double GetElapsedTime(); - double GetCurrentElapsedTime(); + HighResTimer(); + ~HighResTimer(); + void Start(); + void Stop(); + void Reset(); + void ResetStart(); + double GetElapsedTime(); + double GetCurrentElapsedTime(); private: - static double Frequency; - static double GetFrequency(); + static double Frequency; + static double GetFrequency(); - double ElapsedTime; - double StartTime; - int running; + double ElapsedTime; + double StartTime; + int running; }; /* Structure for saving a vector of timings */ -typedef struct _TimeLog { - vector<double> vec; - - double sum; - double stdev; - - string name; -}TimeLog; -enum TS_NAMES{TS_TPROJECTION, TS_PROJECTION, TS_CONVOLUTION, TS_COMPARISON}; +typedef struct _TimeLog +{ + vector<double> vec; + + double sum; + double stdev; + + string name; +} TimeLog; +enum TS_NAMES +{ + TS_TPROJECTION, + TS_PROJECTION, + TS_CONVOLUTION, + TS_COMPARISON +}; -/* Structure for saving timings of different parts of code and doing basic statistics on them */ -class TimeStat { +/* Structure for saving timings of different parts of code and doing basic + * statistics on them */ +class TimeStat +{ public: - TimeStat(int Angles, int CTFs) : time(0),tl(NULL) {angles = Angles; ctfs = CTFs;}; - ~TimeStat() {EmptyTimeStat();}; - void InitTimeLog(int log, int size, string s); - void InitTimeStat(int nlogs); - void EmptyTimeStat(); - void inline Add(int log) {tl[log].vec.push_back(time);}; - void ComputeTimeStat(); - void PrintTimeStat(int mpi_rank); - - /* Variable for storing times during the execution */ - double time; + TimeStat(int Angles, int CTFs) : time(0), tl(NULL) + { + angles = Angles; + ctfs = CTFs; + }; + ~TimeStat() { EmptyTimeStat(); }; + void InitTimeLog(int log, int size, string s); + void InitTimeStat(int nlogs); + void EmptyTimeStat(); + void inline Add(int log) { tl[log].vec.push_back(time); }; + void ComputeTimeStat(); + void PrintTimeStat(int mpi_rank); + + /* Variable for storing times during the execution */ + double time; private: - TimeLog* tl; - int total_logs; - int angles; - int ctfs; + TimeLog *tl; + int total_logs; + int angles; + int ctfs; }; #endif diff --git a/main.cpp b/main.cpp index 565be80f4018de3c9b288445a8f8a3c9f31b8a6f..6929a95a31bc46bee5ab49bd16310860933e4bd3 100644 --- a/main.cpp +++ b/main.cpp @@ -1,13 +1,13 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. - + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -15,27 +15,28 @@ #ifdef WITH_MPI #include <mpi.h> -#define MPI_CHK(expr) \ - if (expr != MPI_SUCCESS) \ - { \ - fprintf(stderr, "Error in MPI function %s: %d\n", __FILE__, __LINE__); \ - } +#define MPI_CHK(expr) \ + if (expr != MPI_SUCCESS) \ + { \ + fprintf(stderr, "Error in MPI function %s: %d\n", __FILE__, __LINE__); \ + } #endif +#include <fenv.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> -#include <fenv.h> #ifdef _WIN32 -#include <Windows.h> #include <WinBase.h> +#include <Windows.h> #endif -#include <iostream> #include <algorithm> +#include <iostream> #include <iterator> + #include "bioem.h" #include "bioem_cuda.h" @@ -53,58 +54,82 @@ int mpi_size = 1; #include "timer.h" -int main(int argc, char* argv[]) +int main(int argc, char *argv[]) { - // ************************************************************************************** - // ********************************* Main BioEM code ********************************** - // ************************************************************************************ +// ************************************************************************************** +// ********************************* Main BioEM code +// ********************************** +// ************************************************************************************ #ifdef WITH_MPI - MPI_CHK(MPI_Init(&argc, &argv)); - MPI_CHK(MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank)); - MPI_CHK(MPI_Comm_size(MPI_COMM_WORLD, &mpi_size)); + MPI_CHK(MPI_Init(&argc, &argv)); + MPI_CHK(MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank)); + MPI_CHK(MPI_Comm_size(MPI_COMM_WORLD, &mpi_size)); #endif #ifdef _MM_DENORMALS_ZERO_ON - #pragma omp parallel - { - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); //Flush denormals to zero in all OpenMP threads - } +#pragma omp parallel + { + _MM_SET_DENORMALS_ZERO_MODE( + _MM_DENORMALS_ZERO_ON); // Flush denormals to zero in all OpenMP threads + } #endif - HighResTimer timer; + HighResTimer timer; - bioem* bio; + bioem *bio; #ifdef WITH_CUDA - if (getenv("GPU") && atoi(getenv("GPU"))) - { - bio = bioem_cuda_create(); - } - else + if (getenv("GPU") && atoi(getenv("GPU"))) + { + bio = bioem_cuda_create(); + } + else #endif - { - bio = new bioem; - } - - // ************ Configuration and Pre-calculating necessary objects ***************** -// if (mpi_rank == 0) printf("Configuring\n"); - if (bio->configure(argc, argv) == 0) - { - - // ******************************* Run BioEM routine ****************************** - if (mpi_rank == 0) printf("Running\n"); - timer.Start(); - bio->run(); - timer.Stop(); - - // ************************************ End ********************************** - printf ("The code ran for %f seconds (rank %d).\n", timer.GetElapsedTime(), mpi_rank); - bio->cleanup(); - } - delete bio; + { + bio = new bioem; + } + + // ************ Configuration and Pre-calculating necessary objects + // ***************** + if (mpi_rank == 0) + printf("Configuring\n"); + if (bio->configure(argc, argv) == 0) + { + if (bio->needToPrintModel()) + { + if (mpi_size == 1) + { + bio->printModel(); + } + else + { + printf("ERROR: Model printing can be performed only if using a single " + "MPI process. Please change your execution to use a single MPI " + "process or no MPI at all.\n"); + exit(1); + } + } + else + { + // ******************************* Run BioEM routine + // ****************************** + if (mpi_rank == 0) + printf("Running\n"); + timer.Start(); + bio->run(); + timer.Stop(); + + // ************************************ End + // ********************************** + printf("The code ran for %f seconds (rank %d).\n", timer.GetElapsedTime(), + mpi_rank); + bio->cleanup(); + } + } + delete bio; #ifdef WITH_MPI - MPI_Finalize(); + MPI_Finalize(); #endif - return(0); + return (0); } diff --git a/map.cpp b/map.cpp index dbdd7c0043a7b5e891958f4a816af27654390c32..6d155251ee4e067e2436221f737afe1b498c70d5 100644 --- a/map.cpp +++ b/map.cpp @@ -1,399 +1,424 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. - Note: This program contains subroutine "read_MRC" of the Situs 2.7.2 program. - Ref: Willy Wriggers. Using Situs for the Integration of Multi-Resolution Structures. + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + Note: This program contains subroutine "read_MRC" of the Situs 2.7.2 program. + Ref: Willy Wriggers. Using Situs for the Integration of Multi-Resolution + Structures. Biophysical Reviews, 2010, Vol. 2, pp. 21-27. with a GPL lisences version 3. - ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +#include <cstring> +#include <fftw3.h> #include <fstream> #include <iostream> +#include <math.h> #include <stdio.h> #include <stdlib.h> -#include <cstring> -#include <math.h> -#include <fftw3.h> #ifdef WITH_OPENMP #include <omp.h> #endif +#include "bioem.h" #include "map.h" #include "param.h" -#include "bioem.h" using namespace std; -int bioem_RefMap::readRefMaps(bioem_param& param, const char* filemap) +int bioem_RefMap::readRefMaps(bioem_param ¶m, const char *filemap) { numPixels = param.param_device.NumberPixels; - refMapSize = param.param_device.NumberPixels * param.param_device.NumberPixels; + refMapSize = + param.param_device.NumberPixels * param.param_device.NumberPixels; // ************************************************************************************** - // ***********************Reading reference Particle Maps************************ + // ***********************Reading reference Particle + // Maps************************ // ************************************************************************************** int allocsize = 0; if (param.loadMap) + { + //************** Loading Map from Binary file ******* + FILE *fp = fopen("maps.dump", "rb"); + if (fp == NULL) { - //************** Loading Map from Binary file ******* - FILE* fp = fopen("maps.dump", "rb"); - if (fp == NULL) - { - cout << "Error opening dump file\n"; - exit(1); - } - size_t elements_read; - elements_read = fread(&ntotRefMap, sizeof(ntotRefMap), 1, fp); - if (elements_read != 1) - { - cout << "Error reading file\n"; - exit(1); - } - maps = (myfloat_t*) mallocchk(ntotRefMap * refMapSize * sizeof(myfloat_t)); - elements_read = fread(maps, sizeof(myfloat_t) * refMapSize, ntotRefMap, fp); - if (elements_read != (size_t) ntotRefMap) - { - cout << "Error reading file\n"; - exit(1); - } - - fclose(fp); - - cout << "Particle Maps read from Map Dump\n"; + cout << "Error opening dump file\n"; + exit(1); + } + size_t elements_read; + elements_read = fread(&ntotRefMap, sizeof(ntotRefMap), 1, fp); + if (elements_read != 1) + { + cout << "Error reading file\n"; + exit(1); } - else if(readMRC) + maps = (myfloat_t *) mallocchk(ntotRefMap * refMapSize * sizeof(myfloat_t)); + elements_read = fread(maps, sizeof(myfloat_t) * refMapSize, ntotRefMap, fp); + if (elements_read != (size_t) ntotRefMap) { - //************** Reading MRC file ******* - ntotRefMap=0; + cout << "Error reading file\n"; + exit(1); + } - if(readMultMRC) - { + fclose(fp); - //************** Reading Multiple MRC files ************* - cout << "Opening File with MRC list names: " << filemap << "\n"; - ifstream input(filemap); + cout << "Particle Maps read from Map Dump\n"; + } + else if (readMRC) + { + //************** Reading MRC file ******* + ntotRefMap = 0; - if (!input.good()) - { - cout << "Failed to open file contaning MRC names: " << filemap << "\n"; - exit(1); - } + if (readMultMRC) + { - char line[512] = {0}; - char mapname[100]; - char tmpm[10] = {0}; - const char* indifile; + //************** Reading Multiple MRC files ************* + cout << "Opening File with MRC list names: " << filemap << "\n"; + ifstream input(filemap); - while (!input.eof()) - { - input.getline(line,511); - char tmpVals[100] = {0}; + if (!input.good()) + { + cout << "Failed to open file contaning MRC names: " << filemap << "\n"; + exit(1); + } - string strline(line); + char line[512] = {0}; + char mapname[100]; + char tmpm[10] = {0}; + const char *indifile; - // cout << "MRC File name:" << strline << "\n"; + while (!input.eof()) + { + input.getline(line, 511); + char tmpVals[100] = {0}; + string strline(line); - strncpy (tmpVals,line,99); - sscanf (tmpVals,"%99c",mapname); + // cout << "MRC File name:" << strline << "\n"; - // Check for last line - strncpy (tmpm,mapname,3); + strncpy(tmpVals, line, 99); + sscanf(tmpVals, "%99c", mapname); - if(strcmp(tmpm,"XXX")!=0) - { - indifile=strline.c_str(); - - // size_t foundpos= strline.find("mrc"); - // size_t endpos = strline.find_last_not_of(" \t"); - + // Check for last line + strncpy(tmpm, mapname, 3); - //Reading Multiple MRC - read_MRC(indifile,param); - } - for(int i=0;i<3;i++)mapname[i] = 'X'; - for(int i=3;i<100;i++)mapname[i] = 0; + if (strcmp(tmpm, "XXX") != 0) + { + indifile = strline.c_str(); - } - cout << "\n+++++++++++++++++++++++++++++++++++++++++++ \n"; - cout << "Particle Maps read from MULTIPLE MRC Files in: " << filemap << "\n" ; - } - else - { + // size_t foundpos= strline.find("mrc"); + // size_t endpos = strline.find_last_not_of(" \t"); - string strfilename(filemap); + // Reading Multiple MRC + read_MRC(indifile, param); + } + for (int i = 0; i < 3; i++) + mapname[i] = 'X'; + for (int i = 3; i < 100; i++) + mapname[i] = 0; + } + cout << "\n+++++++++++++++++++++++++++++++++++++++++++ \n"; + cout << "Particle Maps read from MULTIPLE MRC Files in: " << filemap + << "\n"; + } + else + { - size_t foundpos= strfilename.find("mrc"); - size_t endpos = strfilename.find_last_not_of(" \t"); + string strfilename(filemap); - if(foundpos > endpos){ - cout << "Warining:::: mrc extension NOT dectected in file name::" << filemap <<" \n"; - cout << "Warining:::: Are you sure you want to read an MRC? \n"; - } + size_t foundpos = strfilename.find("mrc"); + size_t endpos = strfilename.find_last_not_of(" \t"); - read_MRC(filemap,param); - cout << "\n++++++++++++++++++++++++++++++++++++++++++ \n"; - cout << "Particle Maps read from ONE MRC File: " << filemap << "\n" ; - } + if (foundpos > endpos) + { + cout << "Warining:::: mrc extension NOT dectected in file name::" + << filemap << " \n"; + cout << "Warining:::: Are you sure you want to read an MRC? \n"; + } + + read_MRC(filemap, param); + cout << "\n++++++++++++++++++++++++++++++++++++++++++ \n"; + cout << "Particle Maps read from ONE MRC File: " << filemap << "\n"; } + } else + { + //************** Reading Text file ************* + int nummap = -1; + int lasti = 0; + int lastj = 0; + ifstream input(filemap); + if (!input.good()) { - //************** Reading Text file ************* - int nummap = -1; - int lasti = 0; - int lastj = 0; - ifstream input(filemap); - if (!input.good()) - { - cout << "Particle Maps Failed to open file" << endl ; - exit(1); - } + cout << "Particle Maps Failed to open file" << endl; + exit(1); + } - char line[512] = {0}; - char tmpLine[512] = {0}; - bool first=true; + char line[512] = {0}; + char tmpLine[512] = {0}; + bool first = true; - int countpix=0; + int countpix = 0; - while (!input.eof()) - { - input.getline(line, 511); - - strncpy(tmpLine, line, strlen(line)); - char *token = strtok(tmpLine, " "); - - if(first){ - if (strcmp(token, "PARTICLE") != 0) { - cout << "Missing correct Standard Map Format: PARTICLE HEADER\n"<< endl ; - exit(1); - } - first=false; - } - - - if (strcmp(token, "PARTICLE") == 0) // to count the number of maps - { - nummap++; - countpix=0; - if (allocsize == 0) - { - allocsize = 64; - maps = (myfloat_t*) mallocchk(refMapSize * sizeof(myfloat_t) * allocsize); - } - else if (nummap + 1 >= allocsize) - { - allocsize *= 2; - maps = (myfloat_t*) reallocchk(maps, refMapSize * sizeof(myfloat_t) * allocsize); - } - if (nummap % 128 == 0) - { - cout << "..." << nummap << "\n"; - } - if(lasti+1 != param.param_device.NumberPixels && lastj+1 != param.param_device.NumberPixels && nummap > 0) - { - cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " << param.param_device.NumberPixels << ", i " << lasti << ", j " << lastj << ")" << "\n"; - exit(1); - } - } - else - { - int i, j; - float z; - - char tmpVals[36] = {0}; - - strncpy (tmpVals, line, 8); - sscanf (tmpVals, "%d", &i); - - strncpy (tmpVals, line + 8, 8); - sscanf (tmpVals, "%d", &j); - - strncpy (tmpVals, line + 16, 16); - sscanf (tmpVals, "%f", &z); - //checking for Map limits - if(i > -1 && i < param.param_device.NumberPixels && j > -1 && j < param.param_device.NumberPixels) - { - countpix++; - maps[nummap * refMapSize + i * numPixels + j] = (myfloat_t) z; - lasti = i; - lastj = j; - // cout << countpix << " " << param.param_device.NumberPixels*param.param_device.NumberPixels << "\n"; - } - else - { - cout << "PROBLEM READING MAP (Map number " << nummap << ", i " << i << ", j " << j << ")" << "\n"; - exit(1); - } - } - } - if(lasti != param.param_device.NumberPixels-1 || lastj != param.param_device.NumberPixels-1 || countpix != param.param_device.NumberPixels*param.param_device.NumberPixels +1 ) - { - cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " << param.param_device.NumberPixels << ", i " << lasti << ", j " << lastj << ")" << "\n"; - exit(1); - } - cout << "."; - ntotRefMap = nummap + 1; - maps = (myfloat_t*) reallocchk(maps, refMapSize * sizeof(myfloat_t) * ntotRefMap); - cout << "Particle Maps read from Standard File: " << ntotRefMap << "\n"; + while (!input.eof()) + { + input.getline(line, 511); + + strncpy(tmpLine, line, strlen(line)); + char *token = strtok(tmpLine, " "); + + if (first) + { + if (strcmp(token, "PARTICLE") != 0) + { + cout << "Missing correct Standard Map Format: PARTICLE HEADER\n" + << endl; + exit(1); + } + first = false; + } + + if (strcmp(token, "PARTICLE") == 0) // to count the number of maps + { + nummap++; + countpix = 0; + if (allocsize == 0) + { + allocsize = 64; + maps = (myfloat_t *) mallocchk(refMapSize * sizeof(myfloat_t) * + allocsize); + } + else if (nummap + 1 >= allocsize) + { + allocsize *= 2; + maps = (myfloat_t *) reallocchk(maps, refMapSize * sizeof(myfloat_t) * + allocsize); + } + if (nummap % 128 == 0) + { + cout << "..." << nummap << "\n"; + } + if (lasti + 1 != param.param_device.NumberPixels && + lastj + 1 != param.param_device.NumberPixels && nummap > 0) + { + cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE " + "( " + << param.param_device.NumberPixels << ", i " << lasti << ", j " + << lastj << ")" + << "\n"; + exit(1); + } + } + else + { + int i, j; + float z; + + char tmpVals[36] = {0}; + + strncpy(tmpVals, line, 8); + sscanf(tmpVals, "%d", &i); + + strncpy(tmpVals, line + 8, 8); + sscanf(tmpVals, "%d", &j); + + strncpy(tmpVals, line + 16, 16); + sscanf(tmpVals, "%f", &z); + // checking for Map limits + if (i > -1 && i < param.param_device.NumberPixels && j > -1 && + j < param.param_device.NumberPixels) + { + countpix++; + maps[nummap * refMapSize + i * numPixels + j] = (myfloat_t) z; + lasti = i; + lastj = j; + // cout << countpix << " " << + // param.param_device.NumberPixels*param.param_device.NumberPixels << + //"\n"; + } + else + { + cout << "PROBLEM READING MAP (Map number " << nummap << ", i " << i + << ", j " << j << ")" + << "\n"; + exit(1); + } + } } + if (lasti != param.param_device.NumberPixels - 1 || + lastj != param.param_device.NumberPixels - 1 || + countpix != + param.param_device.NumberPixels * param.param_device.NumberPixels + + 1) + { + cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " + << param.param_device.NumberPixels << ", i " << lasti << ", j " + << lastj << ")" + << "\n"; + exit(1); + } + cout << "."; + ntotRefMap = nummap + 1; + maps = (myfloat_t *) reallocchk(maps, refMapSize * sizeof(myfloat_t) * + ntotRefMap); + cout << "Particle Maps read from Standard File: " << ntotRefMap << "\n"; + } //************* If Dumping Maps ********************* if (param.dumpMap) + { + FILE *fp = fopen("maps.dump", "w+b"); + if (fp == NULL) { - FILE* fp = fopen("maps.dump", "w+b"); - if (fp == NULL) - { - cout << "Error opening dump file\n"; - exit(1); - } - fwrite(&ntotRefMap, sizeof(ntotRefMap), 1, fp); - fwrite(maps, sizeof(myfloat_t) * refMapSize, ntotRefMap, fp); - fclose(fp); + cout << "Error opening dump file\n"; + exit(1); } + fwrite(&ntotRefMap, sizeof(ntotRefMap), 1, fp); + fwrite(maps, sizeof(myfloat_t) * refMapSize, ntotRefMap, fp); + fclose(fp); + } //*********** To Debug with few Maps ******************** if (getenv("BIOEM_DEBUG_NMAPS")) - { - ntotRefMap = atoi(getenv("BIOEM_DEBUG_NMAPS")); - } + { + ntotRefMap = atoi(getenv("BIOEM_DEBUG_NMAPS")); + } + param.nTotParallelMaps = min(CUDA_FFTS_AT_ONCE, ntotRefMap); cout << "Total Number of particles: " << ntotRefMap; cout << "\n+++++++++++++++++++++++++++++++++++++++++++ \n"; - return(0); + return (0); } -int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param) +int bioem_RefMap::PreCalculateMapsFFT(bioem_param ¶m) { // ************************************************************************************** - // ********** Routine that pre-calculates Reference maps FFT for Convolution/ Comparison ********************** + // ********** Routine that pre-calculates Reference maps FFT for Convolution/ + // Comparison ********************** // ************************************************************************************ RefMapsFFT = new mycomplex_t[ntotRefMap * param.FFTMapSize]; #pragma omp parallel for - for (int iRefMap = 0; iRefMap < ntotRefMap ; iRefMap++) + for (int iRefMap = 0; iRefMap < ntotRefMap; iRefMap++) + { + const int num = omp_get_thread_num(); + myfloat_t *localMap = param.fft_scratch_real[num]; + mycomplex_t *localout = param.fft_scratch_complex[num]; + + // Assigning localMap values to padded Map + for (int i = 0; i < param.param_device.NumberPixels; i++) { - const int num = omp_get_thread_num(); - myfloat_t* localMap = param.fft_scratch_real[num]; - mycomplex_t* localout = param.fft_scratch_complex[num]; - - //Assigning localMap values to padded Map - for(int i = 0; i < param.param_device.NumberPixels; i++) - { - for(int j = 0; j < param.param_device.NumberPixels; j++) - { - localMap[i * param.param_device.NumberPixels + j] = maps[iRefMap * refMapSize + i * param.param_device.NumberPixels + j]; - } - } - - //Calling FFT_Forward - myfftw_execute_dft_r2c(param.fft_plan_r2c_forward, localMap, localout); - - //Saving the Reference CTFs (RefMap array possibly has incorrect alignment, so we copy here. Stupid but in fact does not matter.) - mycomplex_t* RefMap = &RefMapsFFT[iRefMap * param.FFTMapSize]; - - for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D ; i++ ) - { - RefMap[i][0] = localout[i][0]; - RefMap[i][1] = localout[i][1]; - } + for (int j = 0; j < param.param_device.NumberPixels; j++) + { + localMap[i * param.param_device.NumberPixels + j] = + maps[iRefMap * refMapSize + i * param.param_device.NumberPixels + + j]; + } } - return(0); + // Calling FFT_Forward + myfftw_execute_dft_r2c(param.fft_plan_r2c_forward, localMap, localout); + + // Saving the Reference CTFs (RefMap array possibly has incorrect alignment, + // so we copy here. Stupid but in fact does not matter.) + mycomplex_t *RefMap = &RefMapsFFT[iRefMap * param.FFTMapSize]; + + for (int i = 0; i < param.param_device.NumberPixels * + param.param_device.NumberFFTPixels1D; + i++) + { + RefMap[i][0] = localout[i][0]; + RefMap[i][1] = localout[i][1]; + } + } + + return (0); } -int bioem_RefMap::precalculate(bioem_param& param, bioem& bio) +int bioem_RefMap::precalculate(bioem_param ¶m, bioem &bio) { // ************************************************************************************** - // *******************************Precalculating Routine for Maps************************ + // *******************************Precalculating Routine for + // Maps************************ // ************************************************************************************** - sum_RefMap = (myfloat_t*) mallocchk(sizeof(myfloat_t) * ntotRefMap); - sumsquare_RefMap = (myfloat_t*) mallocchk(sizeof(myfloat_t) * ntotRefMap); + sum_RefMap = (myfloat_t *) mallocchk(sizeof(myfloat_t) * ntotRefMap); + sumsquare_RefMap = (myfloat_t *) mallocchk(sizeof(myfloat_t) * ntotRefMap); - //Precalculating cross-correlations of maps +// Precalculating cross-correlations of maps #pragma omp parallel for - for (int iRefMap = 0; iRefMap < ntotRefMap ; iRefMap++) - { - myfloat_t sum, sumsquare; - bio.calcross_cor(getmap(iRefMap), sum, sumsquare); - //Storing Crosscorrelations in Map class - sum_RefMap[iRefMap] = sum; - sumsquare_RefMap[iRefMap] = sumsquare; - } + for (int iRefMap = 0; iRefMap < ntotRefMap; iRefMap++) + { + myfloat_t sum, sumsquare; + bio.calcross_cor(getmap(iRefMap), sum, sumsquare); + // Storing Crosscorrelations in Map class + sum_RefMap[iRefMap] = sum; + sumsquare_RefMap[iRefMap] = sumsquare; + } // Precalculating Maps in Fourier space - if (bio.FFTAlgo) - { - PreCalculateMapsFFT(param); - free(maps); - maps = NULL; - } + PreCalculateMapsFFT(param); + free(maps); + maps = NULL; - return(0); + return (0); } -void bioem_Probability::init(size_t maps, size_t angles, size_t cc, bioem& bio) +void bioem_Probability::init(size_t maps, size_t angles, bioem &bio) { //********** Initializing pointers ******************* nMaps = maps; nAngles = angles; - nCC = cc; - ptr = bio.malloc_device_host(get_size(maps, angles, cc, bio.param.param_device.writeAngles, bio.param.param_device.writeCC)); - if (bio.DebugOutput >= 1) cout << "Allocation #Maps " << maps << " #Angles " << angles << " #cross.cor " << cc << "\n"; - //<< " == " << get_size(maps, angles, cc, bio.param.param_device.writeAngles, bio.param.param_device.writeCC)<< "\n"; + ptr = bio.malloc_device_host( + get_size(maps, angles, bio.param.param_device.writeAngles)); + if (bio.DebugOutput >= 1) + cout << "Allocation #Maps " << maps << " #Angles " << angles << "\n"; set_pointers(); } -void bioem_Probability::copyFrom(bioem_Probability* from, bioem& bio) +void bioem_Probability::copyFrom(bioem_Probability *from, bioem &bio) { - bioem_Probability_map& pProbMap = getProbMap(0); - bioem_Probability_map& pProbMapFrom = from->getProbMap(0); + bioem_Probability_map &pProbMap = getProbMap(0); + bioem_Probability_map &pProbMapFrom = from->getProbMap(0); memcpy(&pProbMap, &pProbMapFrom, from->nMaps * sizeof(bioem_Probability_map)); if (bio.param.param_device.writeAngles) + { + for (int iOrient = 0; iOrient < nAngles; iOrient++) { - for (int iOrient = 0; iOrient < nAngles; iOrient ++) - { - bioem_Probability_angle& pProbAngle = getProbAngle(0, iOrient); - bioem_Probability_angle& pProbAngleFrom = from->getProbAngle(0, iOrient); - memcpy(&pProbAngle, &pProbAngleFrom, from->nMaps * sizeof(bioem_Probability_angle)); - } - } - - if (bio.param.param_device.writeCC) - { - for (int iCC = 0; iCC < nCC; iCC ++) - { - bioem_Probability_cc& pProbCC = getProbCC(0, iCC); - bioem_Probability_cc& pProbCCFrom = from->getProbCC(0, iCC); - memcpy(&pProbCC, &pProbCCFrom, from->nMaps * sizeof(bioem_Probability_cc)); - } + bioem_Probability_angle &pProbAngle = getProbAngle(0, iOrient); + bioem_Probability_angle &pProbAngleFrom = from->getProbAngle(0, iOrient); + memcpy(&pProbAngle, &pProbAngleFrom, + from->nMaps * sizeof(bioem_Probability_angle)); } + } } -int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) +int bioem_RefMap::read_MRC(const char *filename, bioem_param ¶m) { - /* subroutine "read_MRC" of the Situs 2.7.2 program. - Ref: Willy Wriggers. Using Situs for the Integration of Multi-Resolution Structures. - Biophysical Reviews, 2010, Vol. 2, pp. 21-27.*/ - + /* subroutine "read_MRC" of the Situs 2.7.2 program. + Ref: Willy Wriggers. Using Situs for the Integration of + Multi-Resolution Structures. + Biophysical Reviews, 2010, Vol. 2, pp. 21-27.*/ - myfloat_t st,st2; + myfloat_t st, st2; unsigned long count; FILE *fin; float currfloat; @@ -401,66 +426,78 @@ int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) float xlen, ylen, zlen; int mode, ncstart, nrstart, nsstart, ispg, nsymbt, lskflg; float a_tmp, b_tmp, g_tmp; - int mx, my, mz,mapc, mapr, maps_local; + int mx, my, mz, mapc, mapr, maps_local; float dmin, dmax, dmean; int n_range_viol0, n_range_viol1; fin = fopen(filename, "rb"); - if( fin == NULL ) { + if (fin == NULL) + { cout << "ERROR opening MRC: " << filename; exit(1); } - n_range_viol0 = test_mrc(filename,0); - n_range_viol1 = test_mrc(filename,1); + n_range_viol0 = test_mrc(filename, 0); + n_range_viol1 = test_mrc(filename, 1); - if (n_range_viol0 < n_range_viol1) { //* guess endianism + if (n_range_viol0 < n_range_viol1) + { //* guess endianism swap = 0; - if (n_range_viol0 > 0) { - printf(" Warning: %i header field range violations detected in file %s \n", n_range_viol0,filename); + if (n_range_viol0 > 0) + { + printf( + " Warning: %i header field range violations detected in file %s \n", + n_range_viol0, filename); } - } else { + } + else + { swap = 1; - if (n_range_viol1 > 0) { - printf("Warning: %i header field range violations detected in file %s \n", n_range_viol1,filename); + if (n_range_viol1 > 0) + { + printf("Warning: %i header field range violations detected in file %s \n", + n_range_viol1, filename); } } printf("\n+++++++++++++++++++++++++++++++++++++++++++\n"); printf("Reading Information from MRC: %s \n", filename); - header_ok *= read_int(&nc,fin,swap); - header_ok *= read_int(&nr,fin,swap); - header_ok *= read_int(&ns,fin,swap); - header_ok *= read_int(&mode,fin,swap); - header_ok *= read_int(&ncstart,fin,swap); - header_ok *= read_int(&nrstart,fin,swap); - header_ok *= read_int(&nsstart,fin,swap); - header_ok *= read_int(&mx,fin,swap); - header_ok *= read_int(&my,fin,swap); - header_ok *= read_int(&mz,fin,swap); - header_ok *= read_float(&xlen,fin,swap); - header_ok *= read_float(&ylen,fin,swap); - header_ok *= read_float(&zlen,fin,swap); - header_ok *= read_float(&a_tmp,fin,swap); - header_ok *= read_float(&b_tmp,fin,swap); - header_ok *= read_float(&g_tmp,fin,swap); - header_ok *= read_int(&mapc,fin,swap); - header_ok *= read_int(&mapr,fin,swap); - header_ok *= read_int(&maps_local,fin,swap); - header_ok *= read_float(&dmin,fin,swap); - header_ok *= read_float(&dmax,fin,swap); - header_ok *= read_float(&dmean,fin,swap); - header_ok *= read_int(&ispg,fin,swap); - header_ok *= read_int(&nsymbt,fin,swap); - header_ok *= read_int(&lskflg,fin,swap); - - printf("Number Columns = %8d \n",nc); - printf("Number Rows = %8d \n",nr); - printf("Number Sections = %8d \n",ns); - printf("MODE = %4d (only data type mode 2: 32-bit)\n",mode); - printf("NSYMBT = %4d (# bytes symmetry operators)\n",nsymbt); - - /* printf(" NCSTART = %8d (index of first column, counting from 0)\n",ncstart); - printf("> NRSTART = %8d (index of first row, counting from 0)\n",nrstart); - printf(" NSSTART = %8d (index of first section, counting from 0)\n",nsstart); + header_ok *= read_int(&nc, fin, swap); + header_ok *= read_int(&nr, fin, swap); + header_ok *= read_int(&ns, fin, swap); + header_ok *= read_int(&mode, fin, swap); + header_ok *= read_int(&ncstart, fin, swap); + header_ok *= read_int(&nrstart, fin, swap); + header_ok *= read_int(&nsstart, fin, swap); + header_ok *= read_int(&mx, fin, swap); + header_ok *= read_int(&my, fin, swap); + header_ok *= read_int(&mz, fin, swap); + header_ok *= read_float(&xlen, fin, swap); + header_ok *= read_float(&ylen, fin, swap); + header_ok *= read_float(&zlen, fin, swap); + header_ok *= read_float(&a_tmp, fin, swap); + header_ok *= read_float(&b_tmp, fin, swap); + header_ok *= read_float(&g_tmp, fin, swap); + header_ok *= read_int(&mapc, fin, swap); + header_ok *= read_int(&mapr, fin, swap); + header_ok *= read_int(&maps_local, fin, swap); + header_ok *= read_float(&dmin, fin, swap); + header_ok *= read_float(&dmax, fin, swap); + header_ok *= read_float(&dmean, fin, swap); + header_ok *= read_int(&ispg, fin, swap); + header_ok *= read_int(&nsymbt, fin, swap); + header_ok *= read_int(&lskflg, fin, swap); + + printf("Number Columns = %8d \n", nc); + printf("Number Rows = %8d \n", nr); + printf("Number Sections = %8d \n", ns); + printf("MODE = %4d (only data type mode 2: 32-bit)\n", mode); + printf("NSYMBT = %4d (# bytes symmetry operators)\n", nsymbt); + + /* printf(" NCSTART = %8d (index of first column, counting from + 0)\n",ncstart); + printf("> NRSTART = %8d (index of first row, counting from + 0)\n",nrstart); + printf(" NSSTART = %8d (index of first section, counting from + 0)\n",nsstart); printf(" MX = %8d (# of X intervals in unit cell)\n",mx); printf(" MY = %8d (# of Y intervals in unit cell)\n",my); printf(" MZ = %8d (# of Z intervals in unit cell)\n",mz); @@ -478,136 +515,162 @@ int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) printf(" DMEAN = %8.3f (mean density value - ignored)\n",dmean); printf(" ISPG = %8d (space group number - ignored)\n",ispg); printf(" NSYMBT = %8d (# bytes storing symmetry operators)\n",nsymbt); - printf(" LSKFLG = %8d (skew matrix flag: 0:none, 1:follows)\n",lskflg);*/ + printf(" LSKFLG = %8d (skew matrix flag: 0:none, + 1:follows)\n",lskflg);*/ - if (header_ok == 0) { + if (header_ok == 0) + { cout << "ERROR reading MRC header: " << filename; exit(1); } - if(nr!=param.param_device.NumberPixels || nc!=param.param_device.NumberPixels ) - { - cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " << param.param_device.NumberPixels << ", i " << nc << ", j " << nr << ")" << "\n"; - if(!param.notsqure) exit(1); - } + if (nr != param.param_device.NumberPixels || + nc != param.param_device.NumberPixels) + { + cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " + << param.param_device.NumberPixels << ", i " << nc << ", j " << nr + << ")" + << "\n"; + exit(1); + } if (ntotRefMap == 0) - { - maps = (myfloat_t*) mallocchk(refMapSize * sizeof(myfloat_t) * ns); - } + { + maps = (myfloat_t *) mallocchk(refMapSize * sizeof(myfloat_t) * ns); + } else - { - maps = (myfloat_t*) reallocchk(maps, refMapSize * sizeof(myfloat_t) * (ntotRefMap + ns)); - } + { + maps = (myfloat_t *) reallocchk(maps, refMapSize * sizeof(myfloat_t) * + (ntotRefMap + ns)); + } - if(mode!=2) - { - cout << "ERROR with MRC mode " << mode << "\n"; - cout << "Currently mode 2 is the only one allowed" << "\n"; - exit(1); - } + if (mode != 2) + { + cout << "ERROR with MRC mode " << mode << "\n"; + cout << "Currently mode 2 is the only one allowed" + << "\n"; + exit(1); + } else + { + rewind(fin); + for (count = 0; count < 256; ++count) + if (read_float_empty(fin) == 0) + { + cout << "ERROR Converting Data: " << filename; + exit(1); + } + + for (count = 0; count < (unsigned long) nsymbt; ++count) + if (read_char_float(&currfloat, fin) == 0) + { + cout << "ERROR Converting Data: " << filename; + exit(1); + } + + for (int nmap = 0; nmap < ns; nmap++) { - rewind (fin); - for (count=0; count<256; ++count) if (read_float_empty(fin)==0) { - cout << "ERROR Converting Data: " << filename; - exit(1); - } - - for (count=0; count<(unsigned long)nsymbt; ++count) if (read_char_float(&currfloat,fin)==0) { - cout << "ERROR Converting Data: " << filename; - exit(1); - } - - for ( int nmap = 0 ; nmap < ns ; nmap ++ ) - { - st=0.0; - st2=0.0; - for ( int j = 0 ; j < nr ; j ++ ) - for ( int i = 0 ; i < nc ; i ++ ) - { - if (read_float(&currfloat,fin,swap)==0) - { - cout << "ERROR Converting Data: " << filename; - exit(1); - } - else - { - if(!param.notsqure){ - maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] = (myfloat_t) currfloat; - st += currfloat; - st2 += currfloat*currfloat; - } else { - if( i > 595 && i < 675 && j > 1250 && j< 1330 && nmap >230 && nmap <310)cout << "map1: " << i << " "<< j << " " << nmap << " " << currfloat <<"\n"; - } - } - } - if(param.notsqure)exit(1); - //Normaling maps to zero mean and unit standard deviation - if(!param.notnormmap){ - st /= float(nr*nc); - st2 = sqrt(st2 / float(nr * nc) - st * st); - for ( int j = 0 ; j < nr ; j ++ ) for ( int i = 0 ; i < nc ; i ++ ){ - maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] = maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] / st2 - st/st2; - //cout <<"MAP:: " << i << " " << j << " " << maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] << "\n"; - } - } - } - ntotRefMap += ns ; - // cout << ntotRefMap << "\n"; + st = 0.0; + st2 = 0.0; + for (int j = 0; j < nr; j++) + for (int i = 0; i < nc; i++) + { + if (read_float(&currfloat, fin, swap) == 0) + { + cout << "ERROR Converting Data: " << filename; + exit(1); + } + else + { + maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] = + (myfloat_t) currfloat; + st += currfloat; + st2 += currfloat * currfloat; + } + } + // Normaling maps to zero mean and unit standard deviation + if (!param.notnormmap) + { + st /= float(nr * nc); + st2 = sqrt(st2 / float(nr * nc) - st * st); + for (int j = 0; j < nr; j++) + for (int i = 0; i < nc; i++) + { + maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] = + maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] / + st2 - + st / st2; + // cout <<"MAP:: " << i << " " << j << " " << maps[(nmap + + // ntotRefMap) * refMapSize + i * numPixels + j] << "\n"; + } + } } - fclose (fin); + ntotRefMap += ns; + // cout << ntotRefMap << "\n"; + } + fclose(fin); - return(0); + return (0); } -int bioem_RefMap::read_float(float *currfloat, FILE *fin, int swap) { +int bioem_RefMap::read_float(float *currfloat, FILE *fin, int swap) +{ unsigned char *cptr, tmp; - if (fread(currfloat,4,1,fin)!=1) return 0; - if (swap == 1) { - cptr = (unsigned char *)currfloat; + if (fread(currfloat, 4, 1, fin) != 1) + return 0; + if (swap == 1) + { + cptr = (unsigned char *) currfloat; tmp = cptr[0]; - cptr[0]=cptr[3]; - cptr[3]=tmp; + cptr[0] = cptr[3]; + cptr[3] = tmp; tmp = cptr[1]; - cptr[1]=cptr[2]; - cptr[2]=tmp; + cptr[1] = cptr[2]; + cptr[2] = tmp; } return 1; } -int bioem_RefMap::read_int(int *currlong, FILE *fin, int swap) { +int bioem_RefMap::read_int(int *currlong, FILE *fin, int swap) +{ unsigned char *cptr, tmp; - if (fread(currlong,4,1,fin)!=1) return 0; - if (swap == 1) { - cptr = (unsigned char *)currlong; + if (fread(currlong, 4, 1, fin) != 1) + return 0; + if (swap == 1) + { + cptr = (unsigned char *) currlong; tmp = cptr[0]; - cptr[0]=cptr[3]; - cptr[3]=tmp; + cptr[0] = cptr[3]; + cptr[3] = tmp; tmp = cptr[1]; - cptr[1]=cptr[2]; - cptr[2]=tmp; + cptr[1] = cptr[2]; + cptr[2] = tmp; } return 1; } -int bioem_RefMap::read_float_empty (FILE *fin) { +int bioem_RefMap::read_float_empty(FILE *fin) +{ float currfloat; - if (fread(&currfloat,4,1,fin)!=1) return 0; + if (fread(&currfloat, 4, 1, fin) != 1) + return 0; return 1; } -int bioem_RefMap::read_char_float (float *currfloat, FILE *fin) { +int bioem_RefMap::read_char_float(float *currfloat, FILE *fin) +{ char currchar; - if (fread(&currchar,1,1,fin)!=1) return 0; - *currfloat=(float)currchar; + if (fread(&currchar, 1, 1, fin) != 1) + return 0; + *currfloat = (float) currchar; return 1; } -int bioem_RefMap::test_mrc (const char *vol_file, int swap) { +int bioem_RefMap::test_mrc(const char *vol_file, int swap) +{ FILE *fin; int nc, nr, ns, mx, my, mz; int mode, ncstart, nrstart, nsstart; @@ -618,56 +681,71 @@ int bioem_RefMap::test_mrc (const char *vol_file, int swap) { float dmin, dmax, dmean, dummy, xorigin, yorigin, zorigin; fin = fopen(vol_file, "rb"); - if( fin == NULL ) { + if (fin == NULL) + { cout << "ERROR opening MRC: " << vol_file; exit(1); } //* read header info - header_ok *= read_int(&nc,fin,swap); - header_ok *= read_int(&nr,fin,swap); - header_ok *= read_int(&ns,fin,swap); - header_ok *= read_int(&mode,fin,swap); - header_ok *= read_int(&ncstart,fin,swap); - header_ok *= read_int(&nrstart,fin,swap); - header_ok *= read_int(&nsstart,fin,swap); - header_ok *= read_int(&mx,fin,swap); - header_ok *= read_int(&my,fin,swap); - header_ok *= read_int(&mz,fin,swap); - header_ok *= read_float(&xlen,fin,swap); - header_ok *= read_float(&ylen,fin,swap); - header_ok *= read_float(&zlen,fin,swap); - header_ok *= read_float(&alpha,fin,swap); - header_ok *= read_float(&beta,fin,swap); - header_ok *= read_float(&gamma,fin,swap); - header_ok *= read_int(&mapc,fin,swap); - header_ok *= read_int(&mapr,fin,swap); - header_ok *= read_int(&maps_local,fin,swap); - header_ok *= read_float(&dmin,fin,swap); - header_ok *= read_float(&dmax,fin,swap); - header_ok *= read_float(&dmean,fin,swap); - for (i=23; i<50; ++i) header_ok *= read_float(&dummy,fin,swap); - header_ok *= read_float(&xorigin,fin,swap); - header_ok *= read_float(&yorigin,fin,swap); - header_ok *= read_float(&zorigin,fin,swap); - fclose (fin); - if (header_ok == 0) { + header_ok *= read_int(&nc, fin, swap); + header_ok *= read_int(&nr, fin, swap); + header_ok *= read_int(&ns, fin, swap); + header_ok *= read_int(&mode, fin, swap); + header_ok *= read_int(&ncstart, fin, swap); + header_ok *= read_int(&nrstart, fin, swap); + header_ok *= read_int(&nsstart, fin, swap); + header_ok *= read_int(&mx, fin, swap); + header_ok *= read_int(&my, fin, swap); + header_ok *= read_int(&mz, fin, swap); + header_ok *= read_float(&xlen, fin, swap); + header_ok *= read_float(&ylen, fin, swap); + header_ok *= read_float(&zlen, fin, swap); + header_ok *= read_float(&alpha, fin, swap); + header_ok *= read_float(&beta, fin, swap); + header_ok *= read_float(&gamma, fin, swap); + header_ok *= read_int(&mapc, fin, swap); + header_ok *= read_int(&mapr, fin, swap); + header_ok *= read_int(&maps_local, fin, swap); + header_ok *= read_float(&dmin, fin, swap); + header_ok *= read_float(&dmax, fin, swap); + header_ok *= read_float(&dmean, fin, swap); + for (i = 23; i < 50; ++i) + header_ok *= read_float(&dummy, fin, swap); + header_ok *= read_float(&xorigin, fin, swap); + header_ok *= read_float(&yorigin, fin, swap); + header_ok *= read_float(&zorigin, fin, swap); + fclose(fin); + if (header_ok == 0) + { cout << "ERROR reading MRC header: " << vol_file; exit(1); } - n_range_viols += (nc>5000); n_range_viols += (nc<0); - n_range_viols += (nr>5000); n_range_viols += (nr<0); - n_range_viols += (ns>5000); n_range_viols += (ns<0); - n_range_viols += (ncstart>5000); n_range_viols += (ncstart<-5000); - n_range_viols += (nrstart>5000); n_range_viols += (nrstart<-5000); - n_range_viols += (nsstart>5000); n_range_viols += (nsstart<-5000); - n_range_viols += (mx>5000); n_range_viols += (mx<0); - n_range_viols += (my>5000); n_range_viols += (my<0); - n_range_viols += (mz>5000); n_range_viols += (mz<0); - n_range_viols += (alpha>360.0f); n_range_viols += (alpha<-360.0f); - n_range_viols += (beta>360.0f); n_range_viols += (beta<-360.0f); - n_range_viols += (gamma>360.0f); n_range_viols += (gamma<-360.0f); + n_range_viols += (nc > 5000); + n_range_viols += (nc < 0); + n_range_viols += (nr > 5000); + n_range_viols += (nr < 0); + n_range_viols += (ns > 5000); + n_range_viols += (ns < 0); + n_range_viols += (ncstart > 5000); + n_range_viols += (ncstart < -5000); + n_range_viols += (nrstart > 5000); + n_range_viols += (nrstart < -5000); + n_range_viols += (nsstart > 5000); + n_range_viols += (nsstart < -5000); + n_range_viols += (mx > 5000); + n_range_viols += (mx < 0); + n_range_viols += (my > 5000); + n_range_viols += (my < 0); + n_range_viols += (mz > 5000); + n_range_viols += (mz < 0); + n_range_viols += (alpha > 360.0f); + n_range_viols += (alpha < -360.0f); + n_range_viols += (beta > 360.0f); + n_range_viols += (beta < -360.0f); + n_range_viols += (gamma > 360.0f); + n_range_viols += (gamma < -360.0f); return n_range_viols; } diff --git a/model.cpp b/model.cpp index 2e16b68e9c9369ccd3c79510467c1bfdd5b1d645..a0a42163b26800a2742a413dc68d8fbedf121250 100644 --- a/model.cpp +++ b/model.cpp @@ -1,317 +1,389 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +#include <cstring> #include <fstream> #include <iostream> #include <stdio.h> #include <stdlib.h> -#include <cstring> #include "model.h" #include "param.h" using namespace std; -bioem_model::bioem_model() -{ - points = NULL; -} +bioem_model::bioem_model() { points = NULL; } bioem_model::~bioem_model() { - if (points) free(points); + if (points) + free(points); } -int bioem_model::readModel(bioem_param& param, const char* filemodel) +int bioem_model::readModel(bioem_param ¶m, const char *filemodel) { // ************************************************************************************** - // ***************Reading reference Models either PDB or x,y,z,r,d format**************** + // ***************Reading reference Models either PDB or x,y,z,r,d + // format**************** // ************************************************************************************** ofstream exampleReadCoor; - exampleReadCoor.open ("COORDREAD"); + exampleReadCoor.open("COORDREAD"); - exampleReadCoor << "Text --- Number ---- x ---- y ---- z ---- radius ---- number of electron\n"; - + exampleReadCoor << "Text --- Number ---- x ---- y ---- z ---- radius ---- " + "number of electron\n"; int allocsize = 0; std::ifstream input(filemodel); - if(readPDB) + if (readPDB) + { + //************** Reading PDB files ********************** + + ifstream input(filemodel); + if (!input.good()) { - //************** Reading PDB files ********************** + cout << "PDB Failed to open file" + << endl; // pdbfilename << " ("<<filename<<")\n"; + exit(1); + } - ifstream input(filemodel); - if (!input.good()) - { - cout << "PDB Failed to open file" << endl ; // pdbfilename << " ("<<filename<<")\n"; - exit(1); - } + char line[512] = {0}; + char tmpLine[512] = {0}; + int numres = 0; + NormDen = 0.0; - char line[512] = {0}; - char tmpLine[512] = {0}; - int numres = 0; - NormDen = 0.0; + string strfilename(filemodel); - string strfilename(filemodel); + size_t foundpos = strfilename.find(".pdb"); + size_t endpos = strfilename.find_last_not_of(" \t"); - size_t foundpos= strfilename.find(".pdb"); - size_t endpos = strfilename.find_last_not_of(" \t"); + if (foundpos > endpos) + { + cout << "Warining:::: .pdb extension NOT dectected in file name \n"; + cout << "Warining:::: Are you sure you want to read a PDB? \n"; + } - if(foundpos > endpos){ - cout << "Warining:::: .pdb extension NOT dectected in file name \n"; - cout << "Warining:::: Are you sure you want to read a PDB? \n"; - } + // cout << " HERE " << filemodel ; + // for eachline in the file + while (!input.eof()) + { + input.getline(line, 511); + strncpy(tmpLine, line, strlen(line)); + char *token = strtok(tmpLine, " "); - // cout << " HERE " << filemodel ; - // for eachline in the file - while (!input.eof()) - { - input.getline(line, 511); - - strncpy(tmpLine, line, strlen(line)); - char *token = strtok(tmpLine, " "); - - if (strcmp(token, "ATOM") == 0) // Optional,Mandatory if standard residues exist - { - /* - 1-6 "ATOM " - 7 - 11 Integer serial Atom serial number. - 13 - 16 Atom name Atom name. - 17 Character altLoc Alternate location indicator. - 18 - 20 Residue name resName Residue name. - 22 Character chainID Chain identifier. - 23 - 26 Integer resSeq Residue sequence number. - 27 AChar iCode Code for insertion of residues. - 31 - 38 Real(8.3) x Orthogonal coordinates for X in - 39 - 46 Real(8.3) y Orthogonal coordinates for Y in - 47 - 54 Real(8.3) z Orthogonal coordinates for Z in - */ - - char name[5] = {0}; - char resName[4] = {0}; - float x = 0.0; - float y = 0.0; - float z = 0.0; - char tmp[6] = {0}; - - // parse name - strncpy(tmp, line + 12, 4); - sscanf (tmp, "%s", name); - - // parse resName - strncpy(tmp, line + 17, 3); - sscanf (tmp, "%s", resName); - - // parse x, y, z - char tmpVals[36] = {0}; - - strncpy (tmpVals, line + 30, 8); - sscanf (tmpVals, "%f", &x); - - strncpy (tmpVals, line + 38, 8); - sscanf (tmpVals, "%f", &y); - - strncpy (tmpVals, line + 46, 8); - sscanf (tmpVals, "%f", &z); - - if (strcmp(name, "CA") == 0) - { - if (allocsize == 0) - { - allocsize = 64; - points = (bioem_model_point*) mallocchk(sizeof(bioem_model_point) * allocsize); - } - else if (numres + 1 >= allocsize) - { - allocsize *= 2; - points = (bioem_model_point*) reallocchk(points, sizeof(bioem_model_point) * allocsize); - } - - //Getting residue Radius and electron density - points[numres].radius = getAminoAcidRad(resName); - points[numres].density = getAminoAcidDensity(resName); - NormDen += points[numres].density; - - //Getting the coordinates - points[numres].point.pos[0] = (myfloat_t) x; - points[numres].point.pos[1] = (myfloat_t) y; - points[numres].point.pos[2] = (myfloat_t) z; - exampleReadCoor << "RESIDUE " << numres << " " << points[numres].point.pos[0] << " " << points[numres].point.pos[1] << " " << points[numres].point.pos[2] << " " << points[numres].radius << " " << points[numres].density << "\n"; - numres++; - } - } - - - } - nPointsModel = numres; - cout << "Protein structure read from PDB\n"; + if (strcmp(token, "ATOM") == + 0) // Optional,Mandatory if standard residues exist + { + /* + 1-6 "ATOM " + 7 - 11 Integer serial Atom + serial + number. + 13 - 16 Atom name Atom + name. + 17 Character altLoc Alternate + location indicator. + 18 - 20 Residue name resName Residue name. + 22 Character chainID Chain + identifier. + 23 - 26 Integer resSeq Residue + sequence number. + 27 AChar iCode Code + for + insertion of residues. + 31 - 38 Real(8.3) x Orthogonal + coordinates for X in + 39 - 46 Real(8.3) y Orthogonal + coordinates for Y in + 47 - 54 Real(8.3) z Orthogonal + coordinates for Z in + */ + + char name[5] = {0}; + char resName[4] = {0}; + float x = 0.0; + float y = 0.0; + float z = 0.0; + char tmp[6] = {0}; + + // parse name + strncpy(tmp, line + 12, 4); + sscanf(tmp, "%s", name); + + // parse resName + strncpy(tmp, line + 17, 3); + sscanf(tmp, "%s", resName); + + // parse x, y, z + char tmpVals[36] = {0}; + + strncpy(tmpVals, line + 30, 8); + sscanf(tmpVals, "%f", &x); + + strncpy(tmpVals, line + 38, 8); + sscanf(tmpVals, "%f", &y); + + strncpy(tmpVals, line + 46, 8); + sscanf(tmpVals, "%f", &z); + + if (strcmp(name, "CA") == 0) + { + if (allocsize == 0) + { + allocsize = 64; + points = (bioem_model_point *) mallocchk(sizeof(bioem_model_point) * + allocsize); + } + else if (numres + 1 >= allocsize) + { + allocsize *= 2; + points = (bioem_model_point *) reallocchk( + points, sizeof(bioem_model_point) * allocsize); + } + + // Getting residue Radius and electron density + points[numres].radius = getAminoAcidRad(resName); + points[numres].density = getAminoAcidDensity(resName); + NormDen += points[numres].density; + + // Getting the coordinates + points[numres].point.pos[0] = (myfloat_t) x; + points[numres].point.pos[1] = (myfloat_t) y; + points[numres].point.pos[2] = (myfloat_t) z; + exampleReadCoor << "RESIDUE " << numres << " " + << points[numres].point.pos[0] << " " + << points[numres].point.pos[1] << " " + << points[numres].point.pos[2] << " " + << points[numres].radius << " " + << points[numres].density << "\n"; + numres++; + } + } } - else //Reading model from FILE FORMAT x,y,z,rad,density - { - //**************** Reading Text FILES *********************** + nPointsModel = numres; + cout << "Protein structure read from PDB\n"; + } + else // Reading model from FILE FORMAT x,y,z,rad,density + { + //**************** Reading Text FILES *********************** - char line[128]; - int numres = 0; - NormDen = 0.0; + char line[128]; + int numres = 0; + NormDen = 0.0; - string strfilename(filemodel); + string strfilename(filemodel); - size_t foundpos= strfilename.find(".pdb"); - size_t endpos = strfilename.find_last_not_of(" \t"); + size_t foundpos = strfilename.find(".pdb"); + size_t endpos = strfilename.find_last_not_of(" \t"); + + if (foundpos < endpos) + { + cout << "Warining:::: .pdb dectected in file name whilst using text read " + "\n"; + cout << "Warining:::: Are you sure you do not need --ReadPDB? \n"; + cout << "If so then you must include the keyword IGNORE_PDB in " + "inputfile\n"; + if (not param.ignorePDB) + exit(1); + } - if(foundpos < endpos){ - cout << "Warining:::: .pdb dectected in file name whilst using text read \n"; - cout << "Warining:::: Are you sure you do not need --ReadPDB? \n"; - cout << "If so then you must include the keyword IGNORE_PDB in inputfile\n"; - if(not param.ignorePDB)exit(1); + FILE *file = fopen(filemodel, "r"); + if (file == NULL) + { + cout << "Error opening file " << filemodel << "\n"; + exit(1); + } + while (fgets(line, sizeof line, file) != NULL) + { + if (allocsize == 0) + { + allocsize = 64; + points = (bioem_model_point *) mallocchk(sizeof(bioem_model_point) * + allocsize); + } + else if (numres + 1 >= allocsize) + { + allocsize *= 2; + points = (bioem_model_point *) reallocchk( + points, sizeof(bioem_model_point) * allocsize); } - - FILE *file = fopen ( filemodel , "r" ); - if (file == NULL) - { - cout << "Error opening file " << filemodel << "\n"; - exit(1); - } - while ( fgets ( line, sizeof line, file ) != NULL ) - { - if (allocsize == 0) - { - allocsize = 64; - points = (bioem_model_point*) mallocchk(sizeof(bioem_model_point) * allocsize); - } - else if (numres + 1 >= allocsize) - { - allocsize *= 2; - points = (bioem_model_point*) reallocchk(points, sizeof(bioem_model_point) * allocsize); - } - - float tmpval[5]; - sscanf(line, "%f %f %f %f %f", &tmpval[0], &tmpval[1], &tmpval[2], &tmpval[3], &tmpval[4]); - points[numres].point.pos[0] = (myfloat_t) tmpval[0]; - points[numres].point.pos[1] = (myfloat_t) tmpval[1]; - points[numres].point.pos[2] = (myfloat_t) tmpval[2]; - points[numres].radius = (myfloat_t) tmpval[3]; - points[numres].density = (myfloat_t) tmpval[4]; - - exampleReadCoor << "RESIDUE " << numres << " " << points[numres].point.pos[0] << " " << points[numres].point.pos[1] << " " << points[numres].point.pos[2] << " " << points[numres].radius << " " << points[numres].density << "\n"; - NormDen += points[numres].density; - numres++; - } - fclose(file); - nPointsModel = numres; - cout << "Protein structure read from Standard File\n"; + + float tmpval[5]; + sscanf(line, "%f %f %f %f %f", &tmpval[0], &tmpval[1], &tmpval[2], + &tmpval[3], &tmpval[4]); + points[numres].point.pos[0] = (myfloat_t) tmpval[0]; + points[numres].point.pos[1] = (myfloat_t) tmpval[1]; + points[numres].point.pos[2] = (myfloat_t) tmpval[2]; + points[numres].radius = (myfloat_t) tmpval[3]; + points[numres].density = (myfloat_t) tmpval[4]; + + exampleReadCoor << "RESIDUE " << numres << " " + << points[numres].point.pos[0] << " " + << points[numres].point.pos[1] << " " + << points[numres].point.pos[2] << " " + << points[numres].radius << " " << points[numres].density + << "\n"; + NormDen += points[numres].density; + numres++; } - points = (bioem_model_point*) reallocchk(points, sizeof(bioem_model_point) * nPointsModel); - cout << "Total Number of Voxels " << nPointsModel ; - cout << "\nTotal Number of Electrons " << NormDen ; + fclose(file); + nPointsModel = numres; + cout << "Protein structure read from Standard File\n"; + } + points = (bioem_model_point *) reallocchk(points, sizeof(bioem_model_point) * + nPointsModel); + cout << "Total Number of Voxels " << nPointsModel; + cout << "\nTotal Number of Electrons " << NormDen; cout << "\n+++++++++++++++++++++++++++++++++++++++++ \n"; - exampleReadCoor.close(); + exampleReadCoor.close(); //******************** Moving to Model to its center of density mass: myfloat3_t r_cm; - if(not(param.nocentermass)){ //by default it is normally done + if (not(param.nocentermass)) + { // by default it is normally done - for(int n = 0; n < 3; n++)r_cm.pos[n] = 0.0; + for (int n = 0; n < 3; n++) + r_cm.pos[n] = 0.0; - for(int n = 0; n < nPointsModel; n++) - { - r_cm.pos[0] += points[n].point.pos[0]*points[n].density; - r_cm.pos[1] += points[n].point.pos[1]*points[n].density; - r_cm.pos[2] += points[n].point.pos[2]*points[n].density; - } - r_cm.pos[0] = r_cm.pos[0] / NormDen ; - r_cm.pos[1] = r_cm.pos[1] / NormDen ; + for (int n = 0; n < nPointsModel; n++) + { + r_cm.pos[0] += points[n].point.pos[0] * points[n].density; + r_cm.pos[1] += points[n].point.pos[1] * points[n].density; + r_cm.pos[2] += points[n].point.pos[2] * points[n].density; + } + r_cm.pos[0] = r_cm.pos[0] / NormDen; + r_cm.pos[1] = r_cm.pos[1] / NormDen; r_cm.pos[2] = r_cm.pos[2] / NormDen; - for(int n = 0; n < nPointsModel; n++) - { - points[n].point.pos[0] -= r_cm.pos[0]; - points[n].point.pos[1] -= r_cm.pos[1]; - points[n].point.pos[2] -= r_cm.pos[2]; - - } + for (int n = 0; n < nPointsModel; n++) + { + points[n].point.pos[0] -= r_cm.pos[0]; + points[n].point.pos[1] -= r_cm.pos[1]; + points[n].point.pos[2] -= r_cm.pos[2]; + } } - return(0); + return (0); } myfloat_t bioem_model::getAminoAcidRad(char *name) { - // *************** Function that gets the radius for each amino acid **************** + // *************** Function that gets the radius for each amino acid + // **************** myfloat_t iaa = 0; - if(std::strcmp(name, "CYS") == 0)iaa = 2.75; - else if(std::strcmp(name, "PHE") == 0)iaa = 3.2; - else if(std::strcmp(name, "LEU") == 0)iaa = 3.1; - else if(std::strcmp(name, "TRP") == 0)iaa = 3.4; - else if(std::strcmp(name, "VAL") == 0)iaa = 2.95; - else if(std::strcmp(name, "ILE") == 0)iaa = 3.1; - else if(std::strcmp(name, "MET") == 0)iaa = 3.1; - else if(std::strcmp(name, "HIS") == 0)iaa = 3.05; - else if(std::strcmp(name, "TYR") == 0)iaa = 3.25; - else if(std::strcmp(name, "ALA") == 0)iaa = 2.5; - else if(std::strcmp(name, "GLY") == 0)iaa = 2.25; - else if(std::strcmp(name, "PRO") == 0)iaa = 2.8; - else if(std::strcmp(name, "ASN") == 0)iaa = 2.85; - else if(std::strcmp(name, "THR") == 0)iaa = 2.8; - else if(std::strcmp(name, "SER") == 0)iaa = 2.6; - else if(std::strcmp(name, "ARG") == 0)iaa = 3.3; - else if(std::strcmp(name, "GLN") == 0)iaa = 3.0; - else if(std::strcmp(name, "ASP") == 0)iaa = 2.8; - else if(std::strcmp(name, "LYS") == 0)iaa = 3.2; - else if(std::strcmp(name, "GLU") == 0)iaa = 2.95; - - if(iaa == 0) - { - cout << "PROBLEM WITH AMINO ACID " << name << endl; - exit(1); - } + if (std::strcmp(name, "CYS") == 0) + iaa = 2.75; + else if (std::strcmp(name, "PHE") == 0) + iaa = 3.2; + else if (std::strcmp(name, "LEU") == 0) + iaa = 3.1; + else if (std::strcmp(name, "TRP") == 0) + iaa = 3.4; + else if (std::strcmp(name, "VAL") == 0) + iaa = 2.95; + else if (std::strcmp(name, "ILE") == 0) + iaa = 3.1; + else if (std::strcmp(name, "MET") == 0) + iaa = 3.1; + else if (std::strcmp(name, "HIS") == 0) + iaa = 3.05; + else if (std::strcmp(name, "TYR") == 0) + iaa = 3.25; + else if (std::strcmp(name, "ALA") == 0) + iaa = 2.5; + else if (std::strcmp(name, "GLY") == 0) + iaa = 2.25; + else if (std::strcmp(name, "PRO") == 0) + iaa = 2.8; + else if (std::strcmp(name, "ASN") == 0) + iaa = 2.85; + else if (std::strcmp(name, "THR") == 0) + iaa = 2.8; + else if (std::strcmp(name, "SER") == 0) + iaa = 2.6; + else if (std::strcmp(name, "ARG") == 0) + iaa = 3.3; + else if (std::strcmp(name, "GLN") == 0) + iaa = 3.0; + else if (std::strcmp(name, "ASP") == 0) + iaa = 2.8; + else if (std::strcmp(name, "LYS") == 0) + iaa = 3.2; + else if (std::strcmp(name, "GLU") == 0) + iaa = 2.95; + + if (iaa == 0) + { + cout << "PROBLEM WITH AMINO ACID " << name << endl; + exit(1); + } return iaa; - } myfloat_t bioem_model::getAminoAcidDensity(char *name) { - // *************** Function that gets the number of electrons for each amino acid **************** + // *************** Function that gets the number of electrons for each amino + // acid **************** myfloat_t iaa = 0.0; - if(std::strcmp(name, "CYS") == 0)iaa = 64.0; - else if(std::strcmp(name, "PHE") == 0)iaa = 88.0; - else if(std::strcmp(name, "LEU") == 0)iaa = 72.0; - else if(std::strcmp(name, "TRP") == 0)iaa = 108.0; - else if(std::strcmp(name, "VAL") == 0)iaa = 64.0; - else if(std::strcmp(name, "ILE") == 0)iaa = 72.0; - else if(std::strcmp(name, "MET") == 0)iaa = 80.0; - else if(std::strcmp(name, "HIS") == 0)iaa = 82.0; - else if(std::strcmp(name, "TYR") == 0)iaa = 96.0; - else if(std::strcmp(name, "ALA") == 0)iaa = 48.0; - else if(std::strcmp(name, "GLY") == 0)iaa = 40.0; - else if(std::strcmp(name, "PRO") == 0)iaa = 62.0; - else if(std::strcmp(name, "ASN") == 0)iaa = 66.0; - else if(std::strcmp(name, "THR") == 0)iaa = 64.0; - else if(std::strcmp(name, "SER") == 0)iaa = 56.0; - else if(std::strcmp(name, "ARG") == 0)iaa = 93.0; - else if(std::strcmp(name, "GLN") == 0)iaa = 78.0; - else if(std::strcmp(name, "ASP") == 0)iaa = 59.0; - else if(std::strcmp(name, "LYS") == 0)iaa = 79.0; - else if(std::strcmp(name, "GLU") == 0)iaa = 53.0; - - if(iaa == 0.0) - { - cout << "PROBLEM WITH AMINO ACID " << name << endl; - exit(1); - } + if (std::strcmp(name, "CYS") == 0) + iaa = 64.0; + else if (std::strcmp(name, "PHE") == 0) + iaa = 88.0; + else if (std::strcmp(name, "LEU") == 0) + iaa = 72.0; + else if (std::strcmp(name, "TRP") == 0) + iaa = 108.0; + else if (std::strcmp(name, "VAL") == 0) + iaa = 64.0; + else if (std::strcmp(name, "ILE") == 0) + iaa = 72.0; + else if (std::strcmp(name, "MET") == 0) + iaa = 80.0; + else if (std::strcmp(name, "HIS") == 0) + iaa = 82.0; + else if (std::strcmp(name, "TYR") == 0) + iaa = 96.0; + else if (std::strcmp(name, "ALA") == 0) + iaa = 48.0; + else if (std::strcmp(name, "GLY") == 0) + iaa = 40.0; + else if (std::strcmp(name, "PRO") == 0) + iaa = 62.0; + else if (std::strcmp(name, "ASN") == 0) + iaa = 66.0; + else if (std::strcmp(name, "THR") == 0) + iaa = 64.0; + else if (std::strcmp(name, "SER") == 0) + iaa = 56.0; + else if (std::strcmp(name, "ARG") == 0) + iaa = 93.0; + else if (std::strcmp(name, "GLN") == 0) + iaa = 78.0; + else if (std::strcmp(name, "ASP") == 0) + iaa = 59.0; + else if (std::strcmp(name, "LYS") == 0) + iaa = 79.0; + else if (std::strcmp(name, "GLU") == 0) + iaa = 53.0; + + if (iaa == 0.0) + { + cout << "PROBLEM WITH AMINO ACID " << name << endl; + exit(1); + } return iaa; } - diff --git a/param.cpp b/param.cpp index a6437bd6f3fb3b42d27caa604432709581cb576a..ec5d1515d8e2ee58b2ccfc2f57d6b6bf70926fe1 100644 --- a/param.cpp +++ b/param.cpp @@ -1,47 +1,48 @@ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ < BioEM software for Bayesian inference of Electron Microscopy images> - Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, - Volker Lindenstruth and Gerhard Hummer. + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany. - Max Planck Computing and Data Facility, Garching, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. - Released under the GNU Public License, v3. + Released under the GNU Public License, v3. See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <fstream> #include <cstring> -#include <math.h> #include <fftw3.h> +#include <fstream> +#include <iostream> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> #ifdef WITH_OPENMP #include <omp.h> #endif -#include "param.h" #include "map.h" +#include "param.h" using namespace std; bioem_param::bioem_param() { - + //**************** Initializing Variables and defaults **************** - //Number of Pixels + // Number of Pixels param_device.NumberPixels = 0; param_device.NumberFFTPixels1D = 0; // Euler angle grid spacing angleGridPointsAlpha = 0; angleGridPointsBeta = 0; - //Envelop function paramters + // Envelop function paramters numberGridPointsEnvelop = 0; - //Contrast transfer function paramters + // Contrast transfer function paramters numberGridPointsCTF_amp = 0; numberGridPointsCTF_phase = 0; @@ -57,61 +58,60 @@ bioem_param::bioem_param() angprior = NULL; printModel = false; + BestmapCalcCC = false; } -int bioem_param::readParameters(const char* fileinput) -{ // ************************************************************************************** - // ***************************** Reading Input Parameters ****************************** - // ************************************************************************************** +int bioem_param::readParameters(const char *fileinput) +{ // ************************************************************************************** + // ***************************** Reading Input Parameters + // ****************************** + // ************************************************************************************** - // Control for Parameters + // Control for Parameters bool yesPixSi = false; bool yesNumPix = false; bool yesGPal = false; bool yesGPbe = false; - bool yesMDC = false ; - bool yesBFact=false; - bool yesDefocus=false; - bool yesAMP=false; - bool yesPSFenv=false; - bool yesPSFpha=false; - bool yesquatgrid=false; + bool yesMDC = false; + bool yesBFact = false; + bool yesDefocus = false; + bool yesAMP = false; + bool yesPSFenv = false; + bool yesPSFpha = false; + bool yesquatgrid = false; //***************** Default VALUES - param_device.flipped=false; - param_device.debugterm=false; - param_device.writeCC=false; - param_device.tousepsf=false; - param_device.CCwithBayes=true; - writeCTF=false; - elecwavel=0.019866; - ignoreCCoff=false; - doquater=false; - nocentermass=false; - printrotmod=false; - readquatlist=false; - doaaradius=true; - notnormmap=false; - usepsf=false; - yespriorAngles=false; - ignorepointsout=false; - printrotmod=false; - ignorePDB=false; - - NotUn_angles=0; - priorMod=1; //Default - shiftX=0; - shiftY=0; - param_device.sigmaPriorbctf=100.; - param_device.sigmaPriordefo=1.0; - param_device.Priordefcent=3.0; + param_device.tousepsf = false; + writeCTF = false; + elecwavel = 0.019866; + doquater = false; + nocentermass = false; + printrotmod = false; + readquatlist = false; + doaaradius = true; + notnormmap = false; + usepsf = false; + yespriorAngles = false; + ignorepointsout = false; + printrotmod = false; + ignorePDB = false; + + NotUn_angles = 0; + priorMod = 1; // Default + shiftX = 0; + shiftY = 0; + param_device.sigmaPriorbctf = 100.; + param_device.sigmaPriordefo = 2.0; + param_device.Priordefcent = 3.0; + param_device.sigmaPrioramp = 0.5; + param_device.Priorampcent = 0.; ifstream input(fileinput); if (!input.good()) - { - cout << "Failed to open file: " << fileinput << "\n"; - exit(1); - } + { + cout << "Failed to open file: " << fileinput << "\n"; + exit(1); + } char line[512] = {0}; char saveline[512]; @@ -119,1016 +119,1343 @@ int bioem_param::readParameters(const char* fileinput) cout << "\n +++++++++++++++++++++++++++++++++++++++++ \n"; cout << "\n READING BioEM PARAMETERS \n\n"; cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; - - while (!input.eof()) - { - input.getline(line, 512); - strcpy(saveline, line); - char *token = strtok(line, " "); - - if (token == NULL || line[0] == '#' || strlen(token) == 0) - { - // comment or blank line - } - else if (strcmp(token, "PIXEL_SIZE") == 0) - { - token = strtok(NULL, " "); - pixelSize = atof(token); - if (pixelSize < 0 ) { cout << "*** Error: Negative pixelSize "; exit(1);} - cout << "Pixel Sixe " << pixelSize << "\n"; - yesPixSi= true; - } - else if (strcmp(token, "NOT_SQUARE_IMAGE") == 0) - { - notsqure=true; - } - else if (strcmp(token, "NUMBER_PIXELS") == 0) - { - token = strtok(NULL, " "); - param_device.NumberPixels = int(atoi(token)); - if (param_device.NumberPixels < 0 ) { cout << "*** Error: Negative Number of Pixels "; exit(1);} - cout << "Number of Pixels " << param_device.NumberPixels << "\n"; - yesNumPix= true ; - } - else if (strcmp(token, "GRIDPOINTS_ALPHA") == 0) - { - token = strtok(NULL, " "); - angleGridPointsAlpha = int(atoi(token)); - if (angleGridPointsAlpha < 0 ) { cout << "*** Error: Negative GRIDPOINTS_ALPHA "; exit(1);} - cout << "Grid points alpha " << angleGridPointsAlpha << "\n"; - yesGPal= true; - } - else if (strcmp(token, "GRIDPOINTS_BETA") == 0) - { - token = strtok(NULL, " "); - angleGridPointsBeta = int(atoi(token)); - if (angleGridPointsBeta < 0 ) { cout << "*** Error: Negative GRIDPOINTS_BETA "; exit(1);} - cout << "Grid points in Cosine ( beta ) " << angleGridPointsBeta << "\n"; - yesGPbe= true; - } - else if (strcmp(token, "USE_QUATERNIONS") == 0) - // else if (token=="USE_QUATERNIONS") - { - cout << "Orientations with Quaternions. \n"; - doquater= true; - } - else if (strcmp(token, "GRIDPOINTS_QUATERNION") == 0) - { - if(!notuniformangles){ - token = strtok(NULL, " "); - GridPointsQuatern = int(atoi(token)); - cout << "Gridpoints Quaternions " << GridPointsQuatern << "\n"; - }else{ - cout << "Inconsitent Input: Grid or List with Quaternions?\n"; - exit(1);} - yesquatgrid=true; - doquater= true; - } - //CTF PARAMETERS - else if (strcmp(token, "CTF_B_ENV") == 0) - { - token = strtok(NULL, " "); - startBfactor = atof(token); - if (startBfactor < 0 ) { cout << "*** Error: Negative START B Env "; exit(1);} - token = strtok(NULL, " "); - endBfactor = atof(token); - if (endBfactor < 0 ) { cout << "*** Error: Negative END B Env "; exit(1);} - token = strtok(NULL, " "); - numberGridPointsEnvelop = int(atoi(token)); - if (numberGridPointsEnvelop < 0 ) { cout << "*** Error: Negative Number of Grid points BEnv "; exit(1);} - cout << "Grid CTF B-ENV: " << startBfactor << " " << endBfactor <<" " << numberGridPointsEnvelop<< "\n"; - if(startBfactor > endBfactor){ cout << "Error: Grid ill defined END > START\n"; exit(1);}; - yesBFact = true; - } - else if (strcmp(token,"CTF_DEFOCUS")==0) - { - token = strtok(NULL, " "); - startDefocus = atof(token); - if (startDefocus < 0 ) { cout << "*** Error: Negative START Defocus "; exit(1);} - token = strtok(NULL, " "); - endDefocus = atof(token); - if (endDefocus < 0 ) { cout << "*** Error: Negative END Defocus "; exit(1);} - token = strtok(NULL, " "); - numberGridPointsCTF_phase = int(atoi(token)); - if (numberGridPointsCTF_phase < 0 ) { cout << "*** Error: Negative Number of Grid points Defocus "; exit(1);} - cout << "Grid CTF Defocus: " << startDefocus << " " << endDefocus <<" " << numberGridPointsCTF_phase << "\n"; - if(startDefocus > endDefocus){ cout << "Error: Grid ill defined END > START\n"; exit(1);}; - if(endDefocus> 8.){cout << "Defocus beyond 8micro-m range is not allowed \n"; exit(1);} - yesDefocus = true; - } - else if (strcmp(token,"CTF_AMPLITUDE")==0) - { - token = strtok(NULL, " "); - startGridCTF_amp = atof(token); - if (startGridCTF_amp < 0 ) { cout << "*** Error: Negative START Amplitude "; exit(1);} - token = strtok(NULL, " "); - endGridCTF_amp = atof(token); - if (endGridCTF_amp < 0 ) { cout << "*** Error: Negative END Amplitude"; exit(1);} - token = strtok(NULL, " "); - numberGridPointsCTF_amp=int(atoi(token)); - if (numberGridPointsCTF_amp < 0 ) { cout << "*** Error: Negative Number of grid points amplitude "; exit(1);} - cout << "Grid Amplitude: " << startGridCTF_amp << " " << endGridCTF_amp <<" " << numberGridPointsCTF_amp << "\n"; - if(startGridCTF_amp > endGridCTF_amp){ cout << "Error: Grid ill defined END > START\n"; exit(1);}; - yesAMP = true; - } - else if (strcmp(token,"ELECTRON_WAVELENGTH")==0) - { - token = strtok(NULL," "); - elecwavel=atof(token); - if(elecwavel < 0.0150 ){ - cout << "Wrong electron wave length " << elecwavel << "\n"; - cout << "Has to be in Angstrom (A)\n"; - exit(1);} - cout << "Electron wave length in (A) is: " << elecwavel << "\n"; - } - //PSF PARAMETERS - else if (strcmp(token, "USE_PSF") == 0) - { - usepsf=true; - param_device.tousepsf=true; - cout << "IMPORTANT: Using Point Spread Function. Thus, all parameters are in Real Space. \n"; - } - else if (strcmp(token,"PSF_AMPLITUDE")==0) - { - token = strtok(NULL, " "); - startGridCTF_amp = atof(token); - if (startGridCTF_amp < 0 ) { cout << "*** Error: Negative START Amplitude "; exit(1);} - token = strtok(NULL, " "); - endGridCTF_amp = atof(token); - if (endGridCTF_amp < 0 ) { cout << "*** Error: Negative END Amplitude"; exit(1);} - token = strtok(NULL, " "); - numberGridPointsCTF_amp= int(atoi(token)); - if (numberGridPointsCTF_amp < 0 ) { cout << "*** Error: Negative Number of grid points amplitude "; exit(1);} - cout << "Grid Amplitude: " << startGridCTF_amp << " " << endGridCTF_amp <<" " << numberGridPointsCTF_amp << "\n"; - if(startGridCTF_amp > endGridCTF_amp){ cout << "Error: Grid ill defined END > START\n"; exit(1);}; - yesAMP = true; - } - else if (strcmp(token,"PSF_ENVELOPE")==0) - { - token = strtok(NULL, " "); - startGridEnvelop = atof(token); - if (startGridEnvelop < 0 ) { cout << "*** Error: Negative START PSF Env. "; exit(1);} - token = strtok(NULL, " "); - endGridEnvelop = atof(token); - if (endGridEnvelop < 0 ) { cout << "*** Error: Negative END PSF Env. "; exit(1);} - token = strtok(NULL, " "); - numberGridPointsEnvelop=int(atoi(token)); - if (numberGridPointsEnvelop < 0 ) { cout << "*** Error: Negative Number of grid points PSF Env. "; exit(1);} - cout << "Grid PSF Envelope: " << startGridEnvelop << " " << endGridEnvelop <<" " << numberGridPointsEnvelop << "\n"; - if(startGridEnvelop > endGridEnvelop){ cout << "Error: Grid ill defined END > START\n"; exit(1);}; - yesPSFenv = true; - } - else if (strcmp(token,"PSF_PHASE")==0) - { - token = strtok(NULL, " "); - startGridCTF_phase = atof(token); - if (startGridCTF_phase < 0 ) { cout << "*** Error: Negative START Amplitud "; exit(1);} - token = strtok(NULL, " "); - endGridCTF_phase = atof(token); - if (endGridCTF_phase < 0 ) { cout << "*** Error: Negative END Amplitud"; exit(1);} - token = strtok(NULL, " "); - numberGridPointsCTF_phase= int(atoi(token)); - if (numberGridPointsCTF_phase< 0 ) { cout << "*** Error: Negative Number of grid points amplitud "; exit(1);} - cout << "Grid PSF phase: " << startGridCTF_phase << " " << endGridCTF_phase <<" " << numberGridPointsCTF_phase << "\n"; - if(startGridCTF_phase > endGridCTF_phase){ cout << "Error: Grid ill defined END > START\n"; exit(1);}; - yesPSFpha = true; - } - else if (strcmp(token, "DISPLACE_CENTER") == 0) - { - token = strtok(NULL, " "); - param_device.maxDisplaceCenter = int(atoi(token)); - if (param_device.maxDisplaceCenter < 0 ) { cout << "*** Error: Negative MAX_D_CENTER "; exit(1);} - cout << "Maximum displacement Center " << param_device.maxDisplaceCenter << "\n"; - token = strtok(NULL, " "); - param_device.GridSpaceCenter = int(atoi(token)); - if (param_device.GridSpaceCenter < 0 ) { cout << "*** Error: Negative PIXEL_GRID_CENTER "; exit(1);} - cout << "Grid space displacement center " << param_device.GridSpaceCenter << "\n"; - yesMDC = true; - } - else if (strcmp(token, "WRITE_PROB_ANGLES") == 0) //Key word if writing down each angle probabilities - { - param_device.writeAngles = true; - cout << "Writing Probabilies of each angle \n"; - } - else if (strcmp(token, "WRITE_CROSSCOR") == 0)//Key word if writing down full micrograph cross correlation - { - param_device.writeCC = true; - param_device.CCdisplace=10; - cout << "Writing CrossCorrelations every 10 pixels\n"; - } - else if (strcmp(token, "#CROSSCOR_GRID_SPACE") == 0) - { - token = strtok(NULL, " "); - param_device.CCdisplace=int(atoi(token)); - if (param_device.CCdisplace < 0 ) { cout << "*** Error: Negative CROSSCOR_DISPLACE "; exit(1);} - cout << "Writing Cross Correlation Displacement " << param_device.CCdisplace << " \n"; - } - else if (strcmp(token, "CROSSCOR_NOTBAYESIAN") == 0) - { - param_device.CCwithBayes=false; - cout << "Not using Bayesian Analysis to write Cross Correlation \n"; - } - else if (strcmp(token, "FLIPPED") == 0) //Key word if images are flipped for cross-correlation - { - param_device.flipped = true; - cout << "Micrograph Flipped Intensities \n"; - } - else if (strcmp(token, "IGNORE_CROSSCORR_OFFSET") == 0) //Key word if images are flipped for cross-correlation - { - ignoreCCoff = true; - cout << "Ignoring Cross-Correlation offset \n"; - } - else if (strcmp(token, "IGNORE_PDB") == 0) //Ignore PDB extension - { - ignorePDB = true; - cout << "Ignoring PDB extension in model file \n"; - } - else if (strcmp(token, "NO_PROJECT_RADIUS") == 0) //If projecting CA with amino-acid radius - { - doaaradius = false; - cout << "Not Projecting corresponding radius \n"; - } - else if (strcmp(token, "DEBUG_INDI_PROB_TERM") == 0)//writing out each term of the probability - { - param_device.debugterm = true; - cout << "Debugging Individual Probability Terms \n"; - } - else if (strcmp(token, "WRITE_CTF_PARAM") == 0)//Number of Euler angle tripplets in non uniform Euler angle sampling - { - writeCTF=true; - token = strtok(NULL," "); - cout << "Writing CTF parameters from PSF parameters that maximize the posterior. \n"; - } - else if (strcmp(token, "NO_CENTEROFMASS") == 0)//Number of Euler angle tripplets in non uniform Euler angle sampling - { - nocentermass=true; - cout << "BE CAREFUL CENTER OF MASS IS NOT REMOVED \n Calculated images might be out of range \n"; - } - else if (strcmp(token, "PRINT_ROTATED_MODELS") == 0)//Number of Euler angle tripplets in non uniform Euler angle sampling - { - printrotmod=true; - cout << "PRINTING out rotatted models (best for debugging)\n"; - } - else if (strcmp(token, "NO_MAP_NORM") == 0) - { - notnormmap=true; - cout << "NOT NORMALIZING MAP\n" ; - } - else if (strcmp(token, "PRIOR_MODEL") == 0) - { - token = strtok(NULL, " "); - priorMod = atof(token); - cout << "MODEL PRIOR Probability " << priorMod << "\n" ; - } - else if (strcmp(token, "PRIOR_ANGLES") == 0) - { - yespriorAngles=true; - cout << "READING Priors for Orientations in additonal orientation file\n" ; - } - else if (strcmp(token, "SHIFT_X") == 0) - { - token = strtok(NULL, " "); - shiftX=atoi(token); - cout << "Shifting initial model X by "<< shiftX << "\n" ; - } - else if (strcmp(token, "SHIFT_Y") == 0) - { - token = strtok(NULL, " "); - shiftY=atoi(token); - cout << "Shifting initial model Y by "<< shiftY << "\n" ; - } - else if (strcmp(token, "SIGMA_PRIOR_B_CTF") == 0) - { - token = strtok(NULL, " "); - param_device.sigmaPriorbctf=atof(token); - cout << "Chainging Gaussian width in Prior of Envelope b parameter: " << param_device.sigmaPriorbctf << "\n"; - } - else if (strcmp(token, "SIGMA_PRIOR_DEFOCUS") == 0) - { - token = strtok(NULL, " "); - param_device.sigmaPriordefo=atof(token); - cout << "Gaussian Width in Prior of defocus parameter: " << param_device.sigmaPriordefo << "\n"; - } - else if (strcmp(token, "PRIOR_DEFOCUS_CENTER") == 0) - { - token = strtok(NULL, " "); - param_device.Priordefcent=atof(token); - cout << "Gaussian Center in Prior of defocus parameter: " << param_device.Priordefcent << "\n"; - } - else if (strcmp(token, "IGNORE_POINTSOUT") == 0) - { - ignorepointsout=true; - cout << "Ignoring model points outside the map\n" ; - } - else if (strcmp(token, "PRINT_ROTATED_MODELS") == 0)//Number of Euler angle tripplets in non uniform Euler angle sampling - { - printrotmod=true; - cout << "PRINTING out rotatted models (best for debugging)\n"; - } + while (!input.eof()) + { + input.getline(line, 512); + strcpy(saveline, line); + char *token = strtok(line, " "); + if (token == NULL || line[0] == '#' || strlen(token) == 0) + { + // comment or blank line + } + else if (strcmp(token, "PIXEL_SIZE") == 0) + { + token = strtok(NULL, " "); + pixelSize = atof(token); + if (pixelSize < 0) + { + cout << "*** Error: Negative pixelSize "; + exit(1); + } + cout << "Pixel Sixe " << pixelSize << "\n"; + yesPixSi = true; + } + else if (strcmp(token, "NUMBER_PIXELS") == 0) + { + token = strtok(NULL, " "); + param_device.NumberPixels = int(atoi(token)); + if (param_device.NumberPixels < 0) + { + cout << "*** Error: Negative Number of Pixels "; + exit(1); + } + cout << "Number of Pixels " << param_device.NumberPixels << "\n"; + yesNumPix = true; + } + else if (strcmp(token, "GRIDPOINTS_ALPHA") == 0) + { + token = strtok(NULL, " "); + angleGridPointsAlpha = int(atoi(token)); + if (angleGridPointsAlpha < 0) + { + cout << "*** Error: Negative GRIDPOINTS_ALPHA "; + exit(1); + } + cout << "Grid points alpha " << angleGridPointsAlpha << "\n"; + yesGPal = true; + } + else if (strcmp(token, "GRIDPOINTS_BETA") == 0) + { + token = strtok(NULL, " "); + angleGridPointsBeta = int(atoi(token)); + if (angleGridPointsBeta < 0) + { + cout << "*** Error: Negative GRIDPOINTS_BETA "; + exit(1); + } + cout << "Grid points in Cosine ( beta ) " << angleGridPointsBeta << "\n"; + yesGPbe = true; + } + else if (strcmp(token, "USE_QUATERNIONS") == 0) + // else if (token=="USE_QUATERNIONS") + { + cout << "Orientations with Quaternions. \n"; + doquater = true; + } + else if (strcmp(token, "GRIDPOINTS_QUATERNION") == 0) + { + if (!notuniformangles) + { + token = strtok(NULL, " "); + GridPointsQuatern = int(atoi(token)); + cout << "Gridpoints Quaternions " << GridPointsQuatern << "\n"; + } + else + { + cout << "Inconsitent Input: Grid or List with Quaternions?\n"; + exit(1); + } + yesquatgrid = true; + doquater = true; + } + // CTF PARAMETERS + else if (strcmp(token, "CTF_B_ENV") == 0) + { + token = strtok(NULL, " "); + startBfactor = atof(token); + if (startBfactor < 0) + { + cout << "*** Error: Negative START B Env "; + exit(1); + } + token = strtok(NULL, " "); + endBfactor = atof(token); + if (endBfactor < 0) + { + cout << "*** Error: Negative END B Env "; + exit(1); + } + token = strtok(NULL, " "); + numberGridPointsEnvelop = int(atoi(token)); + if (numberGridPointsEnvelop < 0) + { + cout << "*** Error: Negative Number of Grid points BEnv "; + exit(1); + } + cout << "Grid CTF B-ENV: " << startBfactor << " " << endBfactor << " " + << numberGridPointsEnvelop << "\n"; + if (startBfactor > endBfactor) + { + cout << "Error: Grid ill defined END > START\n"; + exit(1); + }; + yesBFact = true; + } + else if (strcmp(token, "CTF_DEFOCUS") == 0) + { + token = strtok(NULL, " "); + startDefocus = atof(token); + if (startDefocus < 0) + { + cout << "*** Error: Negative START Defocus "; + exit(1); + } + token = strtok(NULL, " "); + endDefocus = atof(token); + if (endDefocus < 0) + { + cout << "*** Error: Negative END Defocus "; + exit(1); + } + token = strtok(NULL, " "); + numberGridPointsCTF_phase = int(atoi(token)); + if (numberGridPointsCTF_phase < 0) + { + cout << "*** Error: Negative Number of Grid points Defocus "; + exit(1); + } + cout << "Grid CTF Defocus: " << startDefocus << " " << endDefocus << " " + << numberGridPointsCTF_phase << "\n"; + if (startDefocus > endDefocus) + { + cout << "Error: Grid ill defined END > START\n"; + exit(1); + }; + if (endDefocus > 8.) + { + cout << "Defocus beyond 8micro-m range is not allowed \n"; + exit(1); + } + yesDefocus = true; + } + else if (strcmp(token, "CTF_AMPLITUDE") == 0) + { + token = strtok(NULL, " "); + startGridCTF_amp = atof(token); + if (startGridCTF_amp < 0) + { + cout << "*** Error: Negative START Amplitude "; + exit(1); + } + token = strtok(NULL, " "); + endGridCTF_amp = atof(token); + if (endGridCTF_amp < 0) + { + cout << "*** Error: Negative END Amplitude"; + exit(1); + } + token = strtok(NULL, " "); + numberGridPointsCTF_amp = int(atoi(token)); + if (numberGridPointsCTF_amp < 0) + { + cout << "*** Error: Negative Number of grid points amplitude "; + exit(1); + } + cout << "Grid Amplitude: " << startGridCTF_amp << " " << endGridCTF_amp + << " " << numberGridPointsCTF_amp << "\n"; + if (startGridCTF_amp > endGridCTF_amp) + { + cout << "Error: Grid ill defined END > START\n"; + exit(1); + }; + yesAMP = true; + } + else if (strcmp(token, "ELECTRON_WAVELENGTH") == 0) + { + token = strtok(NULL, " "); + elecwavel = atof(token); + if (elecwavel < 0.0150) + { + cout << "Wrong electron wave length " << elecwavel << "\n"; + cout << "Has to be in Angstrom (A)\n"; + exit(1); + } + cout << "Electron wave length in (A) is: " << elecwavel << "\n"; + } + // PSF PARAMETERS + else if (strcmp(token, "USE_PSF") == 0) + { + usepsf = true; + param_device.tousepsf = true; + cout << "IMPORTANT: Using Point Spread Function. Thus, all parameters " + "are in Real Space. \n"; + } + else if (strcmp(token, "PSF_AMPLITUDE") == 0) + { + token = strtok(NULL, " "); + startGridCTF_amp = atof(token); + if (startGridCTF_amp < 0) + { + cout << "*** Error: Negative START Amplitude "; + exit(1); + } + token = strtok(NULL, " "); + endGridCTF_amp = atof(token); + if (endGridCTF_amp < 0) + { + cout << "*** Error: Negative END Amplitude"; + exit(1); + } + token = strtok(NULL, " "); + numberGridPointsCTF_amp = int(atoi(token)); + if (numberGridPointsCTF_amp < 0) + { + cout << "*** Error: Negative Number of grid points amplitude "; + exit(1); + } + cout << "Grid Amplitude: " << startGridCTF_amp << " " << endGridCTF_amp + << " " << numberGridPointsCTF_amp << "\n"; + if (startGridCTF_amp > endGridCTF_amp) + { + cout << "Error: Grid ill defined END > START\n"; + exit(1); + }; + yesAMP = true; + } + else if (strcmp(token, "PSF_ENVELOPE") == 0) + { + token = strtok(NULL, " "); + startGridEnvelop = atof(token); + if (startGridEnvelop < 0) + { + cout << "*** Error: Negative START PSF Env. "; + exit(1); + } + token = strtok(NULL, " "); + endGridEnvelop = atof(token); + if (endGridEnvelop < 0) + { + cout << "*** Error: Negative END PSF Env. "; + exit(1); + } + token = strtok(NULL, " "); + numberGridPointsEnvelop = int(atoi(token)); + if (numberGridPointsEnvelop < 0) + { + cout << "*** Error: Negative Number of grid points PSF Env. "; + exit(1); + } + cout << "Grid PSF Envelope: " << startGridEnvelop << " " << endGridEnvelop + << " " << numberGridPointsEnvelop << "\n"; + if (startGridEnvelop > endGridEnvelop) + { + cout << "Error: Grid ill defined END > START\n"; + exit(1); + }; + yesPSFenv = true; + } + else if (strcmp(token, "PSF_PHASE") == 0) + { + token = strtok(NULL, " "); + startGridCTF_phase = atof(token); + if (startGridCTF_phase < 0) + { + cout << "*** Error: Negative START Amplitud "; + exit(1); + } + token = strtok(NULL, " "); + endGridCTF_phase = atof(token); + if (endGridCTF_phase < 0) + { + cout << "*** Error: Negative END Amplitud"; + exit(1); + } + token = strtok(NULL, " "); + numberGridPointsCTF_phase = int(atoi(token)); + if (numberGridPointsCTF_phase < 0) + { + cout << "*** Error: Negative Number of grid points amplitud "; + exit(1); + } + cout << "Grid PSF phase: " << startGridCTF_phase << " " + << endGridCTF_phase << " " << numberGridPointsCTF_phase << "\n"; + if (startGridCTF_phase > endGridCTF_phase) + { + cout << "Error: Grid ill defined END > START\n"; + exit(1); + }; + yesPSFpha = true; + } + else if (strcmp(token, "DISPLACE_CENTER") == 0) + { + token = strtok(NULL, " "); + param_device.maxDisplaceCenter = int(atoi(token)); + if (param_device.maxDisplaceCenter < 0) + { + cout << "*** Error: Negative MAX_D_CENTER "; + exit(1); + } + cout << "Maximum displacement Center " << param_device.maxDisplaceCenter + << "\n"; + token = strtok(NULL, " "); + param_device.GridSpaceCenter = int(atoi(token)); + if (param_device.GridSpaceCenter < 0) + { + cout << "*** Error: Negative PIXEL_GRID_CENTER "; + exit(1); + } + cout << "Grid space displacement center " << param_device.GridSpaceCenter + << "\n"; + yesMDC = true; + } + else if (strcmp(token, "WRITE_PROB_ANGLES") == + 0) // Key word if writing down each angle probabilities + { + token = strtok(NULL, " "); + param_device.writeAngles = int(atoi(token)); + if (param_device.writeAngles < 0) + { + cout << "*** Error: Negative WRITE_PROB_ANGLES "; + exit(1); + } + cout << "Writing " << param_device.writeAngles + << " Probabilies of each angle \n"; + } + else if (strcmp(token, "IGNORE_PDB") == 0) // Ignore PDB extension + { + ignorePDB = true; + cout << "Ignoring PDB extension in model file \n"; + } + else if (strcmp(token, "NO_PROJECT_RADIUS") == + 0) // If projecting CA with amino-acid radius + { + doaaradius = false; + cout << "Not Projecting corresponding radius \n"; + } + else if (strcmp(token, "WRITE_CTF_PARAM") == 0) // Number of Euler angle + // tripplets in non uniform + // Euler angle sampling + { + writeCTF = true; + token = strtok(NULL, " "); + cout << "Writing CTF parameters from PSF parameters that maximize the " + "posterior. \n"; + } + else if (strcmp(token, "NO_CENTEROFMASS") == 0) // Number of Euler angle + // tripplets in non uniform + // Euler angle sampling + { + nocentermass = true; + cout << "BE CAREFUL CENTER OF MASS IS NOT REMOVED \n Calculated images " + "might be out of range \n"; + } + else if (strcmp(token, "PRINT_ROTATED_MODELS") == 0) // Number of Euler + // angle tripplets in + // non uniform Euler + // angle sampling + { + printrotmod = true; + cout << "PRINTING out rotatted models (best for debugging)\n"; + } + else if (strcmp(token, "NO_MAP_NORM") == 0) + { + notnormmap = true; + cout << "NOT NORMALIZING MAP\n"; + } + else if (strcmp(token, "PRIOR_MODEL") == 0) + { + token = strtok(NULL, " "); + priorMod = atof(token); + cout << "MODEL PRIOR Probability " << priorMod << "\n"; + } + else if (strcmp(token, "PRIOR_ANGLES") == 0) + { + yespriorAngles = true; + cout << "READING Priors for Orientations in additonal orientation file\n"; + } + else if (strcmp(token, "SHIFT_X") == 0) + { + token = strtok(NULL, " "); + shiftX = atoi(token); + cout << "Shifting initial model X by " << shiftX << "\n"; + } + else if (strcmp(token, "SHIFT_Y") == 0) + { + token = strtok(NULL, " "); + shiftY = atoi(token); + cout << "Shifting initial model Y by " << shiftY << "\n"; + } + else if (strcmp(token, "SIGMA_PRIOR_B_CTF") == 0) + { + token = strtok(NULL, " "); + param_device.sigmaPriorbctf = atof(token); + cout << "Chainging Gaussian width in Prior of Envelope b parameter: " + << param_device.sigmaPriorbctf << "\n"; + } + else if (strcmp(token, "SIGMA_PRIOR_DEFOCUS") == 0) + { + token = strtok(NULL, " "); + param_device.sigmaPriordefo = atof(token); + cout << "Gaussian Width in Prior of defocus parameter: " + << param_device.sigmaPriordefo << "\n"; + } + else if (strcmp(token, "PRIOR_DEFOCUS_CENTER") == 0) + { + token = strtok(NULL, " "); + param_device.Priordefcent = atof(token); + cout << "Gaussian Center in Prior of defocus parameter: " + << param_device.Priordefcent << "\n"; + } + else if (strcmp(token, "SIGMA_PRIOR_AMP_CTF") == 0) + { + token = strtok(NULL, " "); + param_device.sigmaPrioramp = atof(token); + cout << "Gaussian Width in Prior of defocus parameter: " + << param_device.sigmaPriordefo << "\n"; + } + else if (strcmp(token, "PRIOR_AMP_CTF_CENTER") == 0) + { + token = strtok(NULL, " "); + param_device.Priorampcent = atof(token); + cout << "Gaussian Center in Prior of defocus parameter: " + << param_device.Priordefcent << "\n"; + } + else if (strcmp(token, "IGNORE_POINTSOUT") == 0) + { + ignorepointsout = true; + cout << "Ignoring model points outside the map\n"; + } + else if (strcmp(token, "PRINT_ROTATED_MODELS") == 0) // Number of Euler + // angle tripplets in + // non uniform Euler + // angle sampling + { + printrotmod = true; + cout << "PRINTING out rotatted models (best for debugging)\n"; } + } input.close(); //************** Checks/Controlls for INPUT - if( not ( yesPixSi ) ){ cout << "**** INPUT MISSING: Please provide PIXEL_SIZE\n" ; exit (1);}; - if( not ( yesNumPix ) ){ cout << "**** INPUT MISSING: Please provide NUMBER_PIXELS \n" ; exit (1);}; - if(!notuniformangles){ - if(!doquater){ - if( not ( yesGPal) ) { cout << "**** INPUT MISSING: Please provide GRIDPOINTS_ALPHA \n" ; exit (1);}; - if( not ( yesGPbe )) { cout << "**** INPUT MISSING: Please provide GRIDPOINTS_BETA \n" ; exit (1);}; - }else if (!yesquatgrid){ - cout << "**** INPUT MISSING: Please provide GRIDPOINTS_QUATERNION \n" ; exit (1); + if (not(yesPixSi)) + { + cout << "**** INPUT MISSING: Please provide PIXEL_SIZE\n"; + exit(1); + }; + if (not(yesNumPix)) + { + + cout << "**** INPUT MISSING: Please provide NUMBER_PIXELS \n"; + exit(1); + }; + if (!notuniformangles) + { + if (!doquater) + { + if (not(yesGPal)) + { + cout << "**** INPUT MISSING: Please provide GRIDPOINTS_ALPHA \n"; + exit(1); + }; + if (not(yesGPbe)) + { + cout << "**** INPUT MISSING: Please provide GRIDPOINTS_BETA \n"; + exit(1); + }; + } + else if (!yesquatgrid) + { + cout << "**** INPUT MISSING: Please provide GRIDPOINTS_QUATERNION \n"; + exit(1); } } - if( not ( yesMDC ) ) { cout << "**** INPUT MISSING: Please provide GRID Displacement CENTER \n" ; exit (1);}; - if( param_device.writeCC && param_device.CCdisplace < 1 ){ cout << "**** INPUT MISSING: Please provide CROSSCOR_DISPLACE \n" ; exit (1);}; - if( param_device.writeCC) {if(!param_device.CCwithBayes ){ cout << "Remark:: Not Using Bayesian method to store Cross-Correlation.\n Only Printing out Maximum\n";} - if(param_device.flipped){ cout << "Remark:: Micrographs are Flipped = Particles are white\n";} else { cout << "Remark:: Micrographs are NOT Flipped = Particles are dark\n";} - if(param_device.writeAngles){cout << "Calculate Cross-cor and write prob angles are mutualy exclusive options\n"; exit(1);} - } - + if (not(yesMDC)) + { + cout << "**** INPUT MISSING: Please provide GRID Displacement CENTER \n"; + exit(1); + }; - cout << "To verify input of Priors:\n" ; + cout << "To verify input of Priors:\n"; cout << "Sigma Prior B-Env: " << param_device.sigmaPriorbctf << "\n"; cout << "Sigma Prior Defocus: " << param_device.sigmaPriordefo << "\n"; - cout << "Center Prior Defocus: " <<param_device.Priordefcent <<"\n"; + cout << "Center Prior Defocus: " << param_device.Priordefcent << "\n"; // PSF or CTF Checks and asigments - if(usepsf){ - if( not ( yesPSFpha ) ){ cout << "**** INPUT MISSING: Please provide Grid PSF PHASE \n" ; exit (1);}; - if( not ( yesPSFenv ) ){ cout << "**** INPUT MISSING: Please provide Grid PSF ENVELOPE \n" ; exit (1);}; - if( not ( yesAMP ) ){ cout << "**** INPUT MISSING: Please provide Grid PSF AMPLITUD \n" ; exit (1);}; - } else { - //cout << "**Note:: Calculation using CTF values (not PSF). If this is not correct then key word: USE_PSF missing in inputfile**\n"; - if( not ( yesBFact ) ){ cout << "**** INPUT MISSING: Please provide Grid CTF B-ENV \n" ; exit (1);}; - if( not ( yesDefocus ) ){ cout << "**** INPUT MISSING: Please provide Grid CTF Defocus \n" ; exit (1);}; - if( not ( yesAMP ) ){ cout << "**** INPUT MISSING: Please provide Grid CTF AMPLITUD \n" ; exit (1);}; - // Asigning values of phase according to defocus - startGridCTF_phase= startDefocus * M_PI * 2.f * 10000 * elecwavel ; - endGridCTF_phase= endDefocus * M_PI * 2.f * 10000 * elecwavel ; - //Asigning values of envelope according to b-envelope (not b-factor) - startGridEnvelop = startBfactor ;// 2.f; - endGridEnvelop = endBfactor ; // / 2.f; - param_device.Priordefcent *= M_PI * 2.f * 10000 * elecwavel ; - param_device.sigmaPriordefo *= M_PI * 2.f * 10000 * elecwavel ; + if (usepsf) + { + if (not(yesPSFpha)) + { + cout << "**** INPUT MISSING: Please provide Grid PSF PHASE \n"; + exit(1); + }; + if (not(yesPSFenv)) + { + cout << "**** INPUT MISSING: Please provide Grid PSF ENVELOPE \n"; + exit(1); + }; + if (not(yesAMP)) + { + cout << "**** INPUT MISSING: Please provide Grid PSF AMPLITUD \n"; + exit(1); + }; + } + else + { + // cout << "**Note:: Calculation using CTF values (not PSF). If this is not + // correct then key word: USE_PSF missing in inputfile**\n"; + if (not(yesBFact)) + { + cout << "**** INPUT MISSING: Please provide Grid CTF B-ENV \n"; + exit(1); + }; + if (not(yesDefocus)) + { + cout << "**** INPUT MISSING: Please provide Grid CTF Defocus \n"; + exit(1); + }; + if (not(yesAMP)) + { + cout << "**** INPUT MISSING: Please provide Grid CTF AMPLITUD \n"; + exit(1); + }; + // Asigning values of phase according to defocus + startGridCTF_phase = startDefocus * M_PI * 2.f * 10000 * elecwavel; + endGridCTF_phase = endDefocus * M_PI * 2.f * 10000 * elecwavel; + // Asigning values of envelope according to b-envelope (not b-factor) + startGridEnvelop = startBfactor; // 2.f; + endGridEnvelop = endBfactor; // / 2.f; + param_device.Priordefcent *= M_PI * 2.f * 10000 * elecwavel; + param_device.sigmaPriordefo *= M_PI * 2.f * 10000 * elecwavel; } - if(elecwavel==0.019688)cout << "Using default electron wave length: 0.019688 (A) of 300kV microscope\n"; + if (elecwavel == 0.019688) + cout << "Using default electron wave length: 0.019688 (A) of 300kV " + "microscope\n"; param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1; FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D; - if(writeCTF && !usepsf){ - cout << "Writing CTF is only valid when integrating over the PSF\n"; exit(1); + nTotParallelMaps = CUDA_FFTS_AT_ONCE; + + if (writeCTF && !usepsf) + { + cout << "Writing CTF is only valid when integrating over the PSF\n"; + exit(1); } cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; - return(0); + return (0); } -int bioem_param::forprintBest(const char* fileinput) +int bioem_param::forprintBest(const char *fileinput) { // ************************************************************************************** - // **********Alternative parameter routine for only printing out a map ****************** + // **********Alternative parameter routine for only printing out a map + // ****************** ifstream input(fileinput); - withnoise=false; - showrotatemod=false; - - param_device.flipped=false; - param_device.debugterm=false; - param_device.writeCC=false; - param_device.CCwithBayes=true; - writeCTF=false; - elecwavel=0.019866; - ignoreCCoff=false; - doquater=false; - nocentermass=false; - printrotmod=false; - readquatlist=false; - doaaradius=true; - shiftX=0; - shiftY=0; - + withnoise = false; + showrotatemod = false; + + writeCTF = false; + elecwavel = 0.019866; + doquater = false; + nocentermass = false; + printrotmod = false; + readquatlist = false; + doaaradius = true; + shiftX = 0; + shiftY = 0; + stnoise = 1; //**** Different keywords! For printing MAP ************ if (!input.good()) - { - cout << "Failed to open Best Parameter file: " << fileinput << "\n"; - exit(1); - } + { + cout << "Failed to open Best Parameter file: " << fileinput << "\n"; + exit(1); + } delete[] angles; - angles = new myfloat3_t[ 1 ] ; //Only best orientation + angles = new myfloat3_t[1]; // Only best orientation char line[512] = {0}; char saveline[512]; - bool ctfparam=false; + bool ctfparam = false; - usepsf=false; + usepsf = false; cout << "\n +++++++++++++++++++++++++++++++++++++++++ \n"; cout << "\n ONLY READING BEST PARAMETERS \n"; cout << "\n FOR PRINTING MAXIMIZED MAP \n"; cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; while (!input.eof()) - { - input.getline(line, 512); - strcpy(saveline, line); - char *token = strtok(line, " "); - - if (token == NULL || line[0] == '#' || strlen(token) == 0) - { - // comment or blank line - } - else if (strcmp(token, "PIXEL_SIZE") == 0) - { - token = strtok(NULL, " "); - pixelSize = atof(token); - if (pixelSize < 0 ) { cout << "*** Error: Negative pixelSize "; exit(1);} - cout << "Pixel Sixe " << pixelSize << "\n"; - } - else if (strcmp(token, "NUMBER_PIXELS") == 0) - { - token = strtok(NULL, " "); - param_device.NumberPixels = int(atoi(token)); - if (param_device.NumberPixels < 0 ) { cout << "*** Error: Negative Number of Pixels "; exit(1);} - cout << "Number of Pixels " << param_device.NumberPixels << "\n"; - } - else if (strcmp(token, "BEST_ALPHA") == 0) - { - token = strtok(NULL, " "); - angles[0].pos[0] = atof(token); - cout << "Best Alpha " << angles[0].pos[0] << "\n"; - } - else if (strcmp(token, "BEST_BETA") == 0) - { - token = strtok(NULL, " "); - angles[0].pos[1] = atof(token); - cout << "Best beta " << angles[0].pos[1] << "\n"; - } - else if (strcmp(token, "BEST_GAMMA") == 0) - { - token = strtok(NULL, " "); - angles[0].pos[2] = atof(token); - cout << "Best Gamma " << angles[0].pos[2] << "\n"; - } - else if (strcmp(token, "USE_QUATERNIONS") == 0) - { - cout << "Orientations with Quaternions. \n"; - doquater= true; - } - else if (strcmp(token, "BEST_Q1") == 0) - { - token = strtok(NULL, " "); - angles[0].pos[0] = atof(token); - cout << "Best q1 " << angles[0].pos[0] << "\n"; - } - else if (strcmp(token, "BEST_Q2") == 0) - { - token = strtok(NULL, " "); - angles[0].pos[1] = atof(token); - cout << "Best q2 " << angles[0].pos[1] << "\n"; - } - else if (strcmp(token, "BEST_Q3") == 0) - { - token = strtok(NULL, " "); - angles[0].pos[2] = atof(token); - cout << "Best Q3 " << angles[0].pos[2] << "\n"; - } - else if (strcmp(token, "BEST_Q4") == 0) - { - token = strtok(NULL, " "); - angles[0].quat4= atof(token); - cout << "Best Q3 " << angles[0].quat4 << "\n"; - } + { + input.getline(line, 512); + strcpy(saveline, line); + char *token = strtok(line, " "); - else if (strcmp(token, "USE_PSF") == 0) - { - usepsf=true; - cout << "IMPORTANT: Using Point Spread Function. Thus, all parameters are in Real Space. \n"; - } - else if (strcmp(token, "BEST_PSF_ENVELOPE") == 0) - { - token = strtok(NULL, " "); - startGridEnvelop = atof(token); - if (startGridEnvelop < 0 ) { cout << "*** Error: Negative START_ENVELOPE "; exit(1);} - cout << "Best Envelope PSF " << startGridEnvelop << "\n"; - } - else if (strcmp(token,"BEST_PSF_PHASE")==0) - { - token = strtok(NULL," "); - startGridCTF_phase=atof(token); - cout << "Best Phase PSF " << startGridCTF_phase << "\n"; - } - else if (strcmp(token,"BEST_PSF_AMP")==0) - { - token = strtok(NULL," "); - startGridCTF_amp=atof(token); - if(startGridCTF_amp <0){cout << "Error Negative Amplitud\n";exit(1);} - cout << "Best Amplitud PSF " << startGridCTF_amp << "\n"; - } - else if (strcmp(token, "BEST_CTF_B_ENV") == 0) - { - token = strtok(NULL, " "); - startGridEnvelop = atof(token);// / 2.f; - if (startGridEnvelop < 0 ) { cout << "*** Error: Negative START B-Env "; exit(1);} - cout << "Best B- Env " << startGridEnvelop << "\n"; - ctfparam=true; - } - else if (strcmp(token,"BEST_CTF_DEFOCUS")==0) - { - token = strtok(NULL," "); - startGridCTF_phase=atof(token)* M_PI * 2.f * 10000 * elecwavel; - cout << "Best Defocus " << startGridCTF_phase << "\n"; - ctfparam=true; - } - else if (strcmp(token,"BEST_CTF_AMP")==0) - { - token = strtok(NULL," "); - startGridCTF_amp=atof(token); - if(startGridCTF_amp <0){cout << "Error Negative Amplitud\n";exit(1);} - cout << "Best Amplitud " << startGridCTF_amp << "\n"; - ctfparam=true; - } - else if (strcmp(token, "BEST_DX") == 0) - { - token = strtok(NULL, " "); - ddx = atoi(token); - cout << "Best dx " << ddx << "\n"; - } - else if (strcmp(token, "BEST_DY") == 0) - { - token = strtok(NULL, " "); - ddy = atoi(token); - cout << "Best dy " << ddy << "\n"; - } - else if (strcmp(token, "BEST_NORM") == 0) - { - token = strtok(NULL, " "); - bestnorm= atof(token); - cout << "Best norm " << bestnorm << "\n"; - } - else if (strcmp(token, "BEST_OFFSET") == 0) - { - token = strtok(NULL, " "); - bestoff = atof(token); - cout << "Best offset " << bestoff << "\n"; - } - else if (strcmp(token, "WITHNOISE") == 0) - { - token = strtok(NULL, " "); - stnoise = atof(token); - withnoise=true; - cout << "Including noise with standard deviation " << stnoise << "\n"; - } - else if (strcmp(token, "NO_PROJECT_RADIUS") == 0) //If projecting CA with amino-acid radius - { - doaaradius = false; - cout << "Not projecting corresponding radius \n"; - } - else if (strcmp(token, "PRINT_ROTATED_MODELS") == 0)//Number of Euler angle tripplets in non uniform Euler angle sampling - { - printrotmod=true; - cout << "PRINTING out rotatted models (best for debugging)\n"; - } - else if (strcmp(token, "SHIFT_X") == 0) - { - token = strtok(NULL, " "); - shiftX=atoi(token); - cout << "Shifting initial model X by "<< shiftX << "\n" ; - } - else if (strcmp(token, "SHIFT_Y") == 0) - { - token = strtok(NULL, " "); - shiftY=atoi(token); - cout << "Shifting initial model Y by "<< shiftY << "\n" ; - } + if (token == NULL || line[0] == '#' || strlen(token) == 0) + { + // comment or blank line + } + else if (strcmp(token, "PIXEL_SIZE") == 0) + { + token = strtok(NULL, " "); + pixelSize = atof(token); + if (pixelSize < 0) + { + cout << "*** Error: Negative pixelSize "; + exit(1); + } + cout << "Pixel Sixe " << pixelSize << "\n"; + } + else if (strcmp(token, "NUMBER_PIXELS") == 0) + { + token = strtok(NULL, " "); + param_device.NumberPixels = int(atoi(token)); + if (param_device.NumberPixels < 0) + { + cout << "*** Error: Negative Number of Pixels "; + exit(1); + } + cout << "Number of Pixels " << param_device.NumberPixels << "\n"; + } + else if (strcmp(token, "BEST_ALPHA") == 0) + { + token = strtok(NULL, " "); + angles[0].pos[0] = atof(token); + cout << "Best Alpha " << angles[0].pos[0] << "\n"; + } + else if (strcmp(token, "BEST_BETA") == 0) + { + token = strtok(NULL, " "); + angles[0].pos[1] = atof(token); + cout << "Best beta " << angles[0].pos[1] << "\n"; + } + else if (strcmp(token, "BEST_GAMMA") == 0) + { + token = strtok(NULL, " "); + angles[0].pos[2] = atof(token); + cout << "Best Gamma " << angles[0].pos[2] << "\n"; + } + else if (strcmp(token, "USE_QUATERNIONS") == 0) + { + cout << "Orientations with Quaternions. \n"; + doquater = true; + } + else if (strcmp(token, "BEST_Q1") == 0) + { + token = strtok(NULL, " "); + angles[0].pos[0] = atof(token); + cout << "Best q1 " << angles[0].pos[0] << "\n"; + } + else if (strcmp(token, "BEST_Q2") == 0) + { + token = strtok(NULL, " "); + angles[0].pos[1] = atof(token); + cout << "Best q2 " << angles[0].pos[1] << "\n"; + } + else if (strcmp(token, "BEST_Q3") == 0) + { + token = strtok(NULL, " "); + angles[0].pos[2] = atof(token); + cout << "Best Q3 " << angles[0].pos[2] << "\n"; + } + else if (strcmp(token, "BEST_Q4") == 0) + { + token = strtok(NULL, " "); + angles[0].quat4 = atof(token); + cout << "Best Q3 " << angles[0].quat4 << "\n"; + } + else if (strcmp(token, "USE_PSF") == 0) + { + usepsf = true; + cout << "IMPORTANT: Using Point Spread Function. Thus, all parameters " + "are in Real Space. \n"; + } + else if (strcmp(token, "BEST_PSF_ENVELOPE") == 0) + { + token = strtok(NULL, " "); + startGridEnvelop = atof(token); + if (startGridEnvelop < 0) + { + cout << "*** Error: Negative START_ENVELOPE "; + exit(1); + } + cout << "Best Envelope PSF " << startGridEnvelop << "\n"; + } + else if (strcmp(token, "BEST_PSF_PHASE") == 0) + { + token = strtok(NULL, " "); + startGridCTF_phase = atof(token); + cout << "Best Phase PSF " << startGridCTF_phase << "\n"; + } + else if (strcmp(token, "BEST_PSF_AMP") == 0) + { + token = strtok(NULL, " "); + startGridCTF_amp = atof(token); + if (startGridCTF_amp < 0) + { + cout << "Error Negative Amplitud\n"; + exit(1); + } + cout << "Best Amplitud PSF " << startGridCTF_amp << "\n"; + } + else if (strcmp(token, "BEST_CTF_B_ENV") == 0) + { + token = strtok(NULL, " "); + startGridEnvelop = atof(token); // / 2.f; + if (startGridEnvelop < 0) + { + cout << "*** Error: Negative START B-Env "; + exit(1); + } + cout << "Best B- Env " << startGridEnvelop << "\n"; + ctfparam = true; + } + else if (strcmp(token, "BEST_CTF_DEFOCUS") == 0) + { + token = strtok(NULL, " "); + startGridCTF_phase = atof(token) * M_PI * 2.f * 10000 * elecwavel; + cout << "Best Defocus " << startGridCTF_phase << "\n"; + ctfparam = true; + } + else if (strcmp(token, "BEST_CTF_AMP") == 0) + { + token = strtok(NULL, " "); + startGridCTF_amp = atof(token); + if (startGridCTF_amp < 0) + { + cout << "Error Negative Amplitud\n"; + exit(1); + } + cout << "Best Amplitud " << startGridCTF_amp << "\n"; + ctfparam = true; } + else if (strcmp(token, "BEST_DX") == 0) + { + token = strtok(NULL, " "); + ddx = atoi(token); + cout << "Best dx " << ddx << "\n"; + } + else if (strcmp(token, "BEST_DY") == 0) + { + token = strtok(NULL, " "); + ddy = atoi(token); + cout << "Best dy " << ddy << "\n"; + } + else if (strcmp(token, "BEST_NORM") == 0) + { + token = strtok(NULL, " "); + bestnorm = atof(token); + cout << "Best norm " << bestnorm << "\n"; + } + else if (strcmp(token, "BEST_OFFSET") == 0) + { + token = strtok(NULL, " "); + bestoff = atof(token); + cout << "Best offset " << bestoff << "\n"; + } + else if (strcmp(token, "WITHNOISE") == 0) + { + token = strtok(NULL, " "); + stnoise = atof(token); + withnoise = true; + cout << "Including noise with standard deviation " << stnoise << "\n"; + } + else if (strcmp(token, "NO_PROJECT_RADIUS") == + 0) // If projecting CA with amino-acid radius + { + doaaradius = false; + cout << "Not projecting corresponding radius \n"; + } + else if (strcmp(token, "PRINT_ROTATED_MODELS") == 0) // Number of Euler + // angle tripplets in + // non uniform Euler + // angle sampling + { + printrotmod = true; + cout << "PRINTING out rotatted models (best for debugging)\n"; + } + else if (strcmp(token, "SHIFT_X") == 0) + { + token = strtok(NULL, " "); + shiftX = atoi(token); + cout << "Shifting initial model X by " << shiftX << "\n"; + } + else if (strcmp(token, "SHIFT_Y") == 0) + { + token = strtok(NULL, " "); + shiftY = atoi(token); + cout << "Shifting initial model Y by " << shiftY << "\n"; + } + } - if(doquater){ - if(angles[0].quat4*angles[0].quat4>1){cout << " Problem with quaternion "<< angles[0].quat4 << "\n";exit(1);} - if(angles[0].pos[0]*angles[0].pos[0]>1){cout << " Problem with quaternion "<< angles[0].pos[0] << "\n";exit(1);} - if(angles[0].pos[1]*angles[0].pos[1]>1){cout << " Problem with quaternion "<< angles[0].pos[1] << "\n";exit(1);} - if(angles[0].pos[2]*angles[0].pos[2]>1){cout << " Problem with quaternion "<< angles[0].pos[2] << "\n";exit(1);} - } + if (doquater) + { + if (angles[0].quat4 * angles[0].quat4 > 1) + { + cout << " Problem with quaternion " << angles[0].quat4 << "\n"; + exit(1); + } + if (angles[0].pos[0] * angles[0].pos[0] > 1) + { + cout << " Problem with quaternion " << angles[0].pos[0] << "\n"; + exit(1); + } + if (angles[0].pos[1] * angles[0].pos[1] > 1) + { + cout << " Problem with quaternion " << angles[0].pos[1] << "\n"; + exit(1); + } + if (angles[0].pos[2] * angles[0].pos[2] > 1) + { + cout << " Problem with quaternion " << angles[0].pos[2] << "\n"; + exit(1); + } + } input.close(); - if(usepsf && ctfparam){ - cout << "Inconsitent Input: Using both PSF and CTF ?\n"; exit(1); + if (usepsf && ctfparam) + { + cout << "Inconsitent Input: Using both PSF and CTF ?\n"; + exit(1); } - //Automatic definitions - numberGridPointsCTF_amp = 1 ; + // Automatic definitions + numberGridPointsCTF_amp = 1; gridCTF_amp = startGridCTF_amp; numberGridPointsCTF_phase = 1; gridCTF_phase = startGridCTF_phase; - numberGridPointsEnvelop = 1 ; + numberGridPointsEnvelop = 1; gridEnvelop = startGridEnvelop; - doquater=false; + // doquater = false; param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1; FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D; - return 0; + nTotParallelMaps = CUDA_FFTS_AT_ONCE; + return 0; } void bioem_param::PrepareFFTs() { //********** PREPARING THE PLANS FOR THE FFTS ****************** - if (mpi_rank == 0) cout << "Preparing FFTs\n"; + if (mpi_rank == 0) + cout << "Preparing FFTs\n"; releaseFFTPlans(); mycomplex_t *tmp_map, *tmp_map2; - tmp_map = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels); - tmp_map2 = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels); + tmp_map = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * + param_device.NumberPixels * + param_device.NumberPixels); + tmp_map2 = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * + param_device.NumberPixels * + param_device.NumberPixels); Alignment = 64; - fft_plan_c2c_forward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_FORWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT); - fft_plan_c2c_backward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_BACKWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT); - fft_plan_r2c_forward = myfftw_plan_dft_r2c_2d(param_device.NumberPixels, param_device.NumberPixels, (myfloat_t*) tmp_map, tmp_map2, FFTW_MEASURE | FFTW_DESTROY_INPUT); - fft_plan_c2r_backward = myfftw_plan_dft_c2r_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, (myfloat_t*) tmp_map2, FFTW_MEASURE | FFTW_DESTROY_INPUT); - - if (fft_plan_c2c_forward == 0 || fft_plan_c2c_backward == 0 || fft_plan_r2c_forward == 0 || fft_plan_c2r_backward == 0) - { - cout << "Error planing FFTs\n"; - exit(1); - } + fft_plan_c2c_forward = myfftw_plan_dft_2d( + param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, + FFTW_FORWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT); + fft_plan_c2c_backward = myfftw_plan_dft_2d( + param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, + FFTW_BACKWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT); + fft_plan_r2c_forward = myfftw_plan_dft_r2c_2d( + param_device.NumberPixels, param_device.NumberPixels, + (myfloat_t *) tmp_map, tmp_map2, FFTW_MEASURE | FFTW_DESTROY_INPUT); + fft_plan_c2r_backward = myfftw_plan_dft_c2r_2d( + param_device.NumberPixels, param_device.NumberPixels, tmp_map, + (myfloat_t *) tmp_map2, FFTW_MEASURE | FFTW_DESTROY_INPUT); + + if (fft_plan_c2c_forward == 0 || fft_plan_c2c_backward == 0 || + fft_plan_r2c_forward == 0 || fft_plan_c2r_backward == 0) + { + cout << "Error planing FFTs\n"; + exit(1); + } myfftw_free(tmp_map); myfftw_free(tmp_map2); const int count = omp_get_max_threads(); - fft_scratch_complex = new mycomplex_t*[count]; - fft_scratch_real = new myfloat_t*[count]; + fft_scratch_complex = new mycomplex_t *[count]; + fft_scratch_real = new myfloat_t *[count]; #pragma omp parallel { #pragma omp critical { const int i = omp_get_thread_num(); - fft_scratch_complex[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberFFTPixels1D); - fft_scratch_real[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param_device.NumberPixels * param_device.NumberPixels); + fft_scratch_complex[i] = (mycomplex_t *) myfftw_malloc( + sizeof(mycomplex_t) * param_device.NumberPixels * + param_device.NumberFFTPixels1D); + fft_scratch_real[i] = (myfloat_t *) myfftw_malloc( + sizeof(myfloat_t) * param_device.NumberPixels * + param_device.NumberPixels); } } fft_plans_created = 1; } - void bioem_param::releaseFFTPlans() { if (fft_plans_created) + { + const int count = omp_get_max_threads(); + for (int i = 0; i < count; i++) { - const int count = omp_get_max_threads(); - for (int i = 0;i < count;i++) - { - myfftw_free(fft_scratch_complex[i]); - myfftw_free(fft_scratch_real[i]); - } - delete[] fft_scratch_complex; - delete[] fft_scratch_real; - - myfftw_destroy_plan(fft_plan_c2c_forward); - myfftw_destroy_plan(fft_plan_c2c_backward); - myfftw_destroy_plan(fft_plan_r2c_forward); - myfftw_destroy_plan(fft_plan_c2r_backward); - myfftw_cleanup(); + myfftw_free(fft_scratch_complex[i]); + myfftw_free(fft_scratch_real[i]); } + delete[] fft_scratch_complex; + delete[] fft_scratch_real; + + myfftw_destroy_plan(fft_plan_c2c_forward); + myfftw_destroy_plan(fft_plan_c2c_backward); + myfftw_destroy_plan(fft_plan_r2c_forward); + myfftw_destroy_plan(fft_plan_c2r_backward); + myfftw_cleanup(); + } fft_plans_created = 0; } -int bioem_param::CalculateGridsParam(const char* fileangles) //TO DO FOR QUATERNIONS +int bioem_param::CalculateGridsParam( + const char *fileangles) // TO DO FOR QUATERNIONS { // ************************************************************************************** - // **************** Routine that pre-calculates Orientation Grids********************** + // **************** Routine that pre-calculates Orientation + // Grids********************** // ************************************************************************************ - - if(!doquater){ + if (!doquater) + { //*********** With Euler angles ******************* cout << "Analysis Using Default Euler Angles\n"; - if(!notuniformangles){ + if (!notuniformangles) + { - if(yespriorAngles){ - cout << "Error: This option is not valid with prior for orientations\nPlease provide separate file with orientations and priors"; - exit(1); + if (yespriorAngles) + { + cout << "Error: This option is not valid with prior for " + "orientations\nPlease provide separate file with orientations " + "and priors"; + exit(1); } - + cout << "Calculating Grids in Euler Angles\n"; - + myfloat_t grid_alpha, cos_grid_beta; int n = 0; - //alpha and gamma are uniform in -PI,PI + // alpha and gamma are uniform in -PI,PI grid_alpha = 2.f * M_PI / (myfloat_t) angleGridPointsAlpha; - //cosine beta is uniform in -1,1 + // cosine beta is uniform in -1,1 cos_grid_beta = 2.f / (myfloat_t) angleGridPointsBeta; // Euler Angle Array - angles = (myfloat3_t*) mallocchk( angleGridPointsAlpha * angleGridPointsBeta * angleGridPointsAlpha * sizeof(myfloat3_t)); - - - for (int ialpha = 0; ialpha < angleGridPointsAlpha; ialpha ++) - { - for (int ibeta = 0; ibeta < angleGridPointsBeta; ibeta ++) - { - for (int igamma = 0; igamma < angleGridPointsAlpha; igamma ++) - { - angles[n].pos[0] = (myfloat_t) ialpha * grid_alpha - M_PI + grid_alpha * 0.5f; //ALPHA centered in the middle - angles[n].pos[1] = acos((myfloat_t) ibeta * cos_grid_beta - 1 + cos_grid_beta * 0.5f); //BETA centered in the middle - angles[n].pos[2] = (myfloat_t) igamma * grid_alpha - M_PI + grid_alpha * 0.5f; //GAMMA centered in the middle - angles[n].quat4 =0.0; - n++; - } - } - } - nTotGridAngles = n; - voluang= grid_alpha * grid_alpha * cos_grid_beta / (2.f * M_PI) / (2.f * M_PI) / 2.f * priorMod; + angles = + (myfloat3_t *) mallocchk(angleGridPointsAlpha * angleGridPointsBeta * + angleGridPointsAlpha * sizeof(myfloat3_t)); - } else{ + for (int ialpha = 0; ialpha < angleGridPointsAlpha; ialpha++) + { + for (int ibeta = 0; ibeta < angleGridPointsBeta; ibeta++) + { + for (int igamma = 0; igamma < angleGridPointsAlpha; igamma++) + { + angles[n].pos[0] = + (myfloat_t) ialpha * grid_alpha - M_PI + + grid_alpha * 0.5f; // ALPHA centered in the middle + angles[n].pos[1] = + acos((myfloat_t) ibeta * cos_grid_beta - 1 + + cos_grid_beta * 0.5f); // BETA centered in the middle + angles[n].pos[2] = + (myfloat_t) igamma * grid_alpha - M_PI + + grid_alpha * 0.5f; // GAMMA centered in the middle + angles[n].quat4 = 0.0; + n++; + } + } + } + nTotGridAngles = n; + voluang = grid_alpha * grid_alpha * cos_grid_beta / (2.f * M_PI) / + (2.f * M_PI) / 2.f * priorMod; + } + else + { - //************ Reading Euler Angles From File ************************** + //************ Reading Euler Angles From File ************************** ifstream input(fileangles); if (!input.good()) - { - cout << "Euler Angle File Failed to open file " << fileangles << " " << endl ; - exit(1); - } + { + cout << "Euler Angle File Failed to open file " << fileangles << " " + << endl; + exit(1); + } char line[512] = {0}; // char saveline[512]; - int n=0; + int n = 0; // First line tels the number of rows input.getline(line, 511); - char tmpVals[36] = {0}; + char tmpVals[36] = {0}; - strncpy (tmpVals, line, 12); - sscanf (tmpVals, "%d", &NotUn_angles); + strncpy(tmpVals, line, 12); + sscanf(tmpVals, "%d", &NotUn_angles); cout << "Number of Euler angles " << NotUn_angles << "\n"; - - if(NotUn_angles<1) { - cout << "\nNot defined number of Euler angles in INPUT file:" << endl ; - // cout << "Use key word: NOT_UNIFORM_TOTAL_ANGS\n"; - exit(1); + if (NotUn_angles < 1) + { + cout << "\nNot defined number of Euler angles in INPUT file:" << endl; + // cout << "Use key word: NOT_UNIFORM_TOTAL_ANGS\n"; + exit(1); } // NotUn_angles=NotUn_angles+1; + angles = (myfloat3_t *) mallocchk(NotUn_angles * sizeof(myfloat3_t)); - angles = (myfloat3_t*) mallocchk( NotUn_angles * sizeof(myfloat3_t)); - - if(yespriorAngles){ - delete[] angprior; - angprior = new myfloat_t[NotUn_angles]; + if (yespriorAngles) + { + delete[] angprior; + angprior = new myfloat_t[NotUn_angles]; } while (!input.eof()) - { + { + + input.getline(line, 511); + + if (n < NotUn_angles) + { - input.getline(line, 511); + float a = 0., b = 0., g = 0., pp = 0.; - if(n< NotUn_angles){ + char tmpVals[60] = {0}; - float a=0.,b=0.,g=0.,pp=0.; - - char tmpVals[60] = {0}; + strncpy(tmpVals, line, 12); + sscanf(tmpVals, "%f", &a); - strncpy (tmpVals, line, 12); - sscanf (tmpVals, "%f", &a); + strncpy(tmpVals, line + 12, 12); + sscanf(tmpVals, "%f", &b); - strncpy (tmpVals, line + 12, 12); - sscanf (tmpVals, "%f", &b); + strncpy(tmpVals, line + 24, 12); + sscanf(tmpVals, "%f", &g); - strncpy (tmpVals, line + 24, 12); - sscanf (tmpVals, "%f", &g); + if (yespriorAngles) + { + strncpy(tmpVals, line + 36, 12); + sscanf(tmpVals, "%f", &pp); + if (pp < 0.0000001) + cout << "Sure you're input is correct? Very small prior.\n"; + angprior[n] = (myfloat_t) pp; + } - if(yespriorAngles){ - strncpy (tmpVals, line + 36, 12); - sscanf (tmpVals, "%f", &pp); - if(pp <0.0000001)cout << "Sure you're input is correct? Very small prior.\n"; - angprior[n] = (myfloat_t) pp; - } - - angles[n].pos[0] = (myfloat_t) a; - angles[n].pos[1] = (myfloat_t) b; - angles[n].pos[2] = (myfloat_t) g; - angles[n].quat4 =0.0;//just to be sure */ + angles[n].pos[0] = (myfloat_t) a; + angles[n].pos[1] = (myfloat_t) b; + angles[n].pos[2] = (myfloat_t) g; + angles[n].quat4 = 0.0; // just to be sure */ #ifdef DEBUG -// if(yespriorAngles) -cout << "check orient: " << n << " " << " " << angles[n].pos[0] << " " << angles[n].pos[1] << " " << angles[n].pos[2] << " prior:\n ";// << angprior[n]<< "\n"; + // if(yespriorAngles) + cout << "check orient: " << n << " " + << " " << angles[n].pos[0] << " " << angles[n].pos[1] << " " + << angles[n].pos[2] << " prior:\n "; // << angprior[n]<< "\n"; #endif - } - n++; - if(NotUn_angles+1 < n) { - cout << "Not properly defined total Euler angles " << n << " instead of " << NotUn_angles << "\n"; - exit(1); - } - } + } + n++; + if (NotUn_angles + 1 < n) + { + cout << "Not properly defined total Euler angles " << n + << " instead of " << NotUn_angles << "\n"; + exit(1); + } + } nTotGridAngles = NotUn_angles; - voluang= 1./ (myfloat_t) NotUn_angles * priorMod; + voluang = 1. / (myfloat_t) NotUn_angles * priorMod; input.close(); } - - } else { + } + else + { //************** Analysis with Quaternions - if(!notuniformangles){ + if (!notuniformangles) + { //************* Grid of Quaternions ******************* cout << "Calculating Grids in Quaterions\n "; - if(yespriorAngles){ - cout << "This option is not valid with prior for orientations\n It is necessary to provide a separate file with the angles and priors"; - exit(1); + if (yespriorAngles) + { + cout << "This option is not valid with prior for orientations\n It is " + "necessary to provide a separate file with the angles and " + "priors"; + exit(1); } - if (GridPointsQuatern < 0 ) { cout << "*** Missing Gridpoints Quaternions \n after QUATERNIONS (int)\n (int)=Number of gridpoins per dimension"; exit(1);} + if (GridPointsQuatern < 0) + { + cout << "*** Missing Gridpoints Quaternions \n after QUATERNIONS " + "(int)\n (int)=Number of gridpoins per dimension"; + exit(1); + } - myfloat_t dgridq,q1,q2,q3; - int n=0; + myfloat_t dgridq, q1, q2, q3; + int n = 0; - dgridq=2.f/(myfloat_t) (GridPointsQuatern +1); + dgridq = 2.f / (myfloat_t)(GridPointsQuatern + 1); // loop to calculate the number ofpoints in the quaternion shpere rad < 1 - for (int ialpha = 0; ialpha < GridPointsQuatern + 1 ; ialpha ++) - { - q1=(myfloat_t) ialpha * dgridq -1.f + 0.5 * dgridq; - for (int ibeta = 0; ibeta < GridPointsQuatern + 1 ; ibeta ++) - { - q2=(myfloat_t) ibeta * dgridq -1.f + 0.5 * dgridq; - for (int igamma = 0; igamma < GridPointsQuatern + 1; igamma ++) - { - q3= (myfloat_t) igamma * dgridq -1.f + 0.5 * dgridq; - if(q1*q1+q2*q2+q3*q3 <= 1.f)n=n+2; - - } - } - } - - //allocating angles + for (int ialpha = 0; ialpha < GridPointsQuatern + 1; ialpha++) + { + q1 = (myfloat_t) ialpha * dgridq - 1.f + 0.5 * dgridq; + for (int ibeta = 0; ibeta < GridPointsQuatern + 1; ibeta++) + { + q2 = (myfloat_t) ibeta * dgridq - 1.f + 0.5 * dgridq; + for (int igamma = 0; igamma < GridPointsQuatern + 1; igamma++) + { + q3 = (myfloat_t) igamma * dgridq - 1.f + 0.5 * dgridq; + if (q1 * q1 + q2 * q2 + q3 * q3 <= 1.f) + n = n + 2; + } + } + } + + // allocating angles nTotGridAngles = n; - angles = (myfloat3_t*) mallocchk( nTotGridAngles * sizeof(myfloat3_t)); - - voluang= dgridq * dgridq * dgridq * priorMod; + angles = (myfloat3_t *) mallocchk(nTotGridAngles * sizeof(myfloat3_t)); + + voluang = dgridq * dgridq * dgridq * priorMod; - n=0; + n = 0; // assigning values - for (int ialpha = 0; ialpha < GridPointsQuatern + 1; ialpha ++) - { - q1=(myfloat_t) ialpha * dgridq -1.f + 0.5 * dgridq; - for (int ibeta = 0; ibeta < GridPointsQuatern + 1; ibeta ++) - { - q2=(myfloat_t) ibeta * dgridq -1.f + 0.5 * dgridq; - for (int igamma = 0; igamma < GridPointsQuatern + 1 ; igamma ++) - { - q3= (myfloat_t) igamma * dgridq -1.f + 0.5 * dgridq; - if(q1*q1+q2*q2+q3*q3 <= 1.f){ - - angles[n].pos[0] = q1; - angles[n].pos[1] = q2; - angles[n].pos[2] = q3; - angles[n].quat4=sqrt(1.f-q1*q1-q2*q2-q3*q3); - n++; - //Adding the negative - angles[n].pos[0] = q1; - angles[n].pos[1] = q2; - angles[n].pos[2] = q3; - angles[n].quat4=-sqrt(1.f-q1*q1-q2*q2-q3*q3); - n++; - } - } - } - } - - } else{ + for (int ialpha = 0; ialpha < GridPointsQuatern + 1; ialpha++) + { + q1 = (myfloat_t) ialpha * dgridq - 1.f + 0.5 * dgridq; + for (int ibeta = 0; ibeta < GridPointsQuatern + 1; ibeta++) + { + q2 = (myfloat_t) ibeta * dgridq - 1.f + 0.5 * dgridq; + for (int igamma = 0; igamma < GridPointsQuatern + 1; igamma++) + { + q3 = (myfloat_t) igamma * dgridq - 1.f + 0.5 * dgridq; + if (q1 * q1 + q2 * q2 + q3 * q3 <= 1.f) + { + + angles[n].pos[0] = q1; + angles[n].pos[1] = q2; + angles[n].pos[2] = q3; + angles[n].quat4 = sqrt(1.f - q1 * q1 - q2 * q2 - q3 * q3); + n++; + // Adding the negative + angles[n].pos[0] = q1; + angles[n].pos[1] = q2; + angles[n].pos[2] = q3; + angles[n].quat4 = -sqrt(1.f - q1 * q1 - q2 * q2 - q3 * q3); + n++; + } + } + } + } + } + else + { //******** Reading Quaternions From a File *************************** ifstream input(fileangles); if (!input.good()) - { - cout << "Problem with Quaterion List file " << fileangles << " " << endl ; - exit(1); - } + { + cout << "Problem with Quaterion List file " << fileangles << " " + << endl; + exit(1); + } char line[512] = {0}; - int n=0; + int n = 0; // First line tels the number of rows input.getline(line, 511); int ntotquat; - char tmpVals[60] = {0}; + char tmpVals[60] = {0}; - strncpy (tmpVals, line, 12); - sscanf (tmpVals, "%d", &ntotquat); - if(ntotquat <1){ - cout << "Invalid Number of quaternions " << ntotquat << "\n"; exit(1); - }else{ - cout << "Number of quaternions " << ntotquat << "\n"; + strncpy(tmpVals, line, 12); + sscanf(tmpVals, "%d", &ntotquat); + if (ntotquat < 1) + { + cout << "Invalid Number of quaternions " << ntotquat << "\n"; + exit(1); + } + else + { + cout << "Number of quaternions " << ntotquat << "\n"; } - angles = (myfloat3_t*) mallocchk( ntotquat * sizeof(myfloat3_t)); + angles = (myfloat3_t *) mallocchk(ntotquat * sizeof(myfloat3_t)); // delete[] angles; // angles = new myfloat3_t[ ntotquat] ; - if(yespriorAngles){ - delete[] angprior; - angprior = new myfloat_t[ ntotquat ]; - } + if (yespriorAngles) + { + delete[] angprior; + angprior = new myfloat_t[ntotquat]; + } while (!input.eof()) - { - input.getline(line, 511); - if(n< ntotquat){ - myfloat_t q1,q2,q3,q4,pp; - - q1=-99999; q2=-99999;q3=-99999;q4=-99999; - char tmpVals[60] = {0}; - - strncpy (tmpVals, line, 12); - sscanf (tmpVals, "%f", &q1); - - strncpy (tmpVals, line + 12, 12); - sscanf (tmpVals, "%f", &q2); - - strncpy (tmpVals, line + 24, 12); - sscanf (tmpVals, "%f", &q3); - - strncpy (tmpVals, line + 36, 12); - sscanf (tmpVals, "%f", &q4); - - angles[n].pos[0] = q1; - angles[n].pos[1] = q2; - angles[n].pos[2] = q3; - angles[n].quat4 = q4; - - if(q1<-1 || q1 > 1){ cout << "Error reading quaterions from list. Value out of range " << q1 << " row " << n << "\n"; exit(1);}; - if(q2<-1 || q2 > 1){ cout << "Error reading quaterions from list. Value out of range " << q2 << " row " << n << "\n"; exit(1);}; - if(q3<-1 || q3 > 1){ cout << "Error reading quaterions from list. Value out of range " << q3 << " row " << n << "\n"; exit(1);}; - if(q4<-1 || q4 > 1){ cout << "Error reading quaterions from list. Value out of range " << q4 << " row " << n << "\n"; exit(1);}; - - - if(yespriorAngles){ - strncpy (tmpVals, line + 48, 12); - sscanf (tmpVals, "%f", &pp); - if(pp <0.0000001)cout << "Sure you're input is correct? Very small prior.\n"; - angprior[n] = pp;} + { + input.getline(line, 511); + if (n < ntotquat) + { + float q1, q2, q3, q4, pp; + + q1 = -99999.; + q2 = -99999.; + q3 = -99999.; + q4 = -99999.; + char tmpVals[60] = {0}; + + strncpy(tmpVals, line, 12); + sscanf(tmpVals, "%f", &q1); + + strncpy(tmpVals, line + 12, 12); + sscanf(tmpVals, "%f", &q2); + + strncpy(tmpVals, line + 24, 12); + sscanf(tmpVals, "%f", &q3); + + strncpy(tmpVals, line + 36, 12); + sscanf(tmpVals, "%f", &q4); + + angles[n].pos[0] = q1; + angles[n].pos[1] = q2; + angles[n].pos[2] = q3; + angles[n].quat4 = q4; + + if (q1 < -1 || q1 > 1) + { + cout << "Error reading quaterions from list. Value out of range " + << q1 << " row " << n << "\n"; + exit(1); + }; + if (q2 < -1 || q2 > 1) + { + cout << "Error reading quaterions from list. Value out of range " + << q2 << " row " << n << "\n"; + exit(1); + }; + if (q3 < -1 || q3 > 1) + { + cout << "Error reading quaterions from list. Value out of range " + << q3 << " row " << n << "\n"; + exit(1); + }; + if (q4 < -1 || q4 > 1) + { + cout << "Error reading quaterions from list. Value out of range " + << q4 << " row " << n << "\n"; + exit(1); + }; + + if (yespriorAngles) + { + strncpy(tmpVals, line + 48, 12); + sscanf(tmpVals, "%f", &pp); + if (pp < 0.0000001) + cout << "Sure you're input is correct? Very small prior.\n"; + angprior[n] = pp; + } #ifdef DEBUG - // if(yespriorAngles) - cout << "check orient: " << n << " " << angles[n].pos[0] << " " << angles[n].pos[1] << " " << angles[n].pos[2] << " prior: " << angles[n].quat4 << "\n"; + // if(yespriorAngles) + cout << "check orient: " << n << " " << angles[n].pos[0] << " " + << angles[n].pos[1] << " " << angles[n].pos[2] + << " prior: " << angles[n].quat4 << "\n"; #endif - - } - n++; - if(ntotquat+1 < n) { - cout << "More quaternions than expected in header " << n << " instead of " << NotUn_angles << "\n"; - exit(1); - } - } + } + n++; + if (ntotquat + 1 < n) + { + cout << "More quaternions than expected in header " << n + << " instead of " << NotUn_angles << "\n"; + exit(1); + } + } nTotGridAngles = ntotquat; - voluang= 1./ (myfloat_t) ntotquat * priorMod; + voluang = 1. / (myfloat_t) ntotquat * priorMod; - input.close(); + input.close(); } - - cout << "Analysis with Quaternions. Total number of quaternions " << nTotGridAngles << "\n"; + cout << "Analysis with Quaternions. Total number of quaternions " + << nTotGridAngles << "\n"; } - - return(0); - + return (0); } int bioem_param::CalculateRefCTF() { // ************************************************************************************** - // ********** Routine that pre-calculates Kernels for Convolution ********************** + // ********** Routine that pre-calculates Kernels for Convolution + // ********************** // ************************************************************************************ myfloat_t amp, env, phase, ctf, radsq; - myfloat_t* localCTF; - mycomplex_t* localout; + myfloat_t *localCTF; + mycomplex_t *localout; int nctfmax = param_device.NumberPixels / 2; int n = 0; - localCTF = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param_device.NumberPixels * param_device.NumberPixels); - localout = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberFFTPixels1D); + localCTF = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * + param_device.NumberPixels * + param_device.NumberPixels); + localout = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * + param_device.NumberPixels * + param_device.NumberFFTPixels1D); - nTotCTFs = numberGridPointsCTF_amp * numberGridPointsCTF_phase * numberGridPointsEnvelop; + nTotCTFs = numberGridPointsCTF_amp * numberGridPointsCTF_phase * + numberGridPointsEnvelop; delete[] refCTF; refCTF = new mycomplex_t[getRefCtfCount()]; delete[] CtfParam; @@ -1136,202 +1463,265 @@ int bioem_param::CalculateRefCTF() myfloat_t normctf; - - gridCTF_amp = (endGridCTF_amp - startGridCTF_amp) / (myfloat_t) numberGridPointsCTF_amp; - gridCTF_phase = (endGridCTF_phase - startGridCTF_phase) / (myfloat_t) numberGridPointsCTF_phase; - gridEnvelop = (endGridEnvelop - startGridEnvelop) / (myfloat_t) numberGridPointsEnvelop; - + gridCTF_amp = + (endGridCTF_amp - startGridCTF_amp) / (myfloat_t) numberGridPointsCTF_amp; + gridCTF_phase = (endGridCTF_phase - startGridCTF_phase) / + (myfloat_t) numberGridPointsCTF_phase; + gridEnvelop = + (endGridEnvelop - startGridEnvelop) / (myfloat_t) numberGridPointsEnvelop; - //if only one grid point for PSF kernel: - if( (myfloat_t) numberGridPointsCTF_amp == 1 ) { - gridCTF_amp = startGridCTF_amp;} - else if ( (endGridCTF_amp - startGridCTF_amp) < 0. ){ - cout << "Error: Interval of amplitude in CTF/PSF Negative"; exit(1); + // if only one grid point for PSF kernel: + if ((myfloat_t) numberGridPointsCTF_amp == 1) + { + gridCTF_amp = startGridCTF_amp; } - if( (myfloat_t) numberGridPointsCTF_phase == 1 ) { + else if ((endGridCTF_amp - startGridCTF_amp) < 0.) + { + cout << "Error: Interval of amplitude in CTF/PSF Negative"; + exit(1); + } + if ((myfloat_t) numberGridPointsCTF_phase == 1) + { gridCTF_phase = startGridCTF_phase; - }else if ( (endGridCTF_phase - startGridCTF_phase) < 0.){ - cout << "Error: Interval of PHASE in CTF/PSF is Negative"; exit(1); } - if( (myfloat_t) numberGridPointsEnvelop == 1 ) { + else if ((endGridCTF_phase - startGridCTF_phase) < 0.) + { + cout << "Error: Interval of PHASE in CTF/PSF is Negative"; + exit(1); + } + if ((myfloat_t) numberGridPointsEnvelop == 1) + { gridEnvelop = startGridEnvelop; - } else if ( (endGridEnvelop - startGridEnvelop) < 0.) { - cout << "Error: Interval of Envelope in CTF/PSF is Negative"; exit(1); + } + else if ((endGridEnvelop - startGridEnvelop) < 0.) + { + cout << "Error: Interval of Envelope in CTF/PSF is Negative"; + exit(1); } - - //More checks with input parameters + // More checks with input parameters // Envelope should not have a standard deviation greater than Npix/2 - if(sqrt(1./( (myfloat_t) numberGridPointsEnvelop * gridEnvelop + startGridEnvelop))> float(param_device.NumberPixels)/2.0 && usepsf) { - cout << "MAX Standard deviation of envelope is larger than Allowed KERNEL Length\n"; + if (sqrt(1. / ((myfloat_t) numberGridPointsEnvelop * gridEnvelop + + startGridEnvelop)) > float(param_device.NumberPixels) / 2.0 && + usepsf) + { + cout << "MAX Standard deviation of envelope is larger than Allowed KERNEL " + "Length\n"; exit(1); } // Envelop param should be positive - if(!printModel && (startGridCTF_amp < 0 || endGridCTF_amp > 1)){ + if (!printModel && (startGridCTF_amp < 0 || endGridCTF_amp > 1)) + { cout << "Error: PSF Amplitud should be between 0 and 1\n"; cout << "start: " << startGridCTF_amp << "End: " << endGridCTF_amp << "\n"; exit(1); } - if(!printModel && endGridCTF_amp < startGridCTF_amp){ - cout << "Error: values of amplitud starting is larger than ending points\n" ; + if (!printModel && endGridCTF_amp < startGridCTF_amp) + { + cout << "Error: values of amplitud starting is larger than ending points\n"; cout << "start: " << startGridCTF_amp << " End: " << endGridCTF_amp << "\n"; exit(1); } + for (int iamp = 0; iamp < numberGridPointsCTF_amp; + iamp++) // Loop over amplitud + { + amp = (myfloat_t) iamp * gridCTF_amp + startGridCTF_amp; - for (int iamp = 0; iamp < numberGridPointsCTF_amp ; iamp++) //Loop over amplitud - { - amp = (myfloat_t) iamp * gridCTF_amp + startGridCTF_amp; - - for (int iphase = 0; iphase < numberGridPointsCTF_phase ; iphase++)//Loop over phase - { - phase = (myfloat_t) iphase * gridCTF_phase + startGridCTF_phase; - - for (int ienv = 0; ienv < numberGridPointsEnvelop ; ienv++)//Loop over envelope - { - env = (myfloat_t) ienv * gridEnvelop + startGridEnvelop; - - memset(localCTF, 0, param_device.NumberPixels * param_device.NumberPixels * sizeof(myfloat_t)); - - normctf = 0.0; - - // cout <<"values " << amp << " " << phase << " " << env <<"\n"; - //Complex CTF - mycomplex_t* curRef = &refCTF[n * FFTMapSize]; - - // Initialzing everything to zero just to be sure - for(int i = 0; i < param_device.NumberPixels * param_device.NumberFFTPixels1D; i++ ){ - curRef[i][0] =0.f; - curRef[i][1] =0.f; - } - - for(int i = 0; i < param_device.NumberPixels; i++) - { - for(int j = 0; j < param_device.NumberPixels; j++) - { - localCTF[i * param_device.NumberPixels + j]=0.f; - } - } - - if(usepsf){ - normctf=0.0; - - for(int i = 0; i < param_device.NumberPixels; i++) - { - for(int j = 0; j < param_device.NumberPixels; j++) - { - int ri=0,rj=0; - - //Calculating the distance from the periodic center at 0,0 - - if(i<nctfmax+1){ ri=i; }else{ ri=param_device.NumberPixels-i;}; - if(j<nctfmax+1){ rj=j; }else{ rj=param_device.NumberPixels-j;}; - radsq = (myfloat_t) ((ri) * (ri) + (rj) *(rj)) * pixelSize * pixelSize; - - ctf = exp(-radsq * env / 2.0) * (- amp * cos(radsq * phase / 2.0) - sqrt((1 - amp * amp)) * sin(radsq * phase / 2.0)) ; - - localCTF[i * param_device.NumberPixels + j] = (myfloat_t) ctf; - - normctf += localCTF[i * param_device.NumberPixels + j]; + for (int iphase = 0; iphase < numberGridPointsCTF_phase; + iphase++) // Loop over phase + { + phase = (myfloat_t) iphase * gridCTF_phase + startGridCTF_phase; - // cout << "TT " << i << " " << j << " " << localCTF[i * param_device.NumberPixels + j] << "\n"; - } - } + for (int ienv = 0; ienv < numberGridPointsEnvelop; + ienv++) // Loop over envelope + { + env = (myfloat_t) ienv * gridEnvelop + startGridEnvelop; - //Normalization - for(int i = 0; i < param_device.NumberPixels; i++) - { - for(int j = 0; j < param_device.NumberPixels; j++) - { - localCTF[i * param_device.NumberPixels + j]= localCTF[i * param_device.NumberPixels + j]/normctf; - } - } + memset(localCTF, 0, param_device.NumberPixels * + param_device.NumberPixels * sizeof(myfloat_t)); - - - //Calling FFT_Forward - myfftw_execute_dft_r2c(fft_plan_r2c_forward, localCTF, localout); + normctf = 0.0; - // Saving the Reference PSFs + // cout <<"values " << amp << " " << phase << " " << env + //<<"\n"; + // Complex CTF + mycomplex_t *curRef = &refCTF[n * FFTMapSize]; - for(int i = 0; i < param_device.NumberPixels * param_device.NumberFFTPixels1D; i++ ) - { - curRef[i][0] = localout[i][0]; - curRef[i][1] = localout[i][1]; - // cout << "PSFFOU " << i << " " << curRef[i][0] << " " << curRef[i][1] << " " << param_device.NumberFFTPixels1D << " " << FFTMapSize <<"\n"; - } + // Initialzing everything to zero just to be sure + for (int i = 0; + i < param_device.NumberPixels * param_device.NumberFFTPixels1D; + i++) + { + curRef[i][0] = 0.f; + curRef[i][1] = 0.f; + } - }else{ + for (int i = 0; i < param_device.NumberPixels; i++) + { + for (int j = 0; j < param_device.NumberPixels; j++) + { + localCTF[i * param_device.NumberPixels + j] = 0.f; + } + } - //*******CTF************* - normctf = 0.0; + if (usepsf) + { + normctf = 0.0; + + for (int i = 0; i < param_device.NumberPixels; i++) + { + for (int j = 0; j < param_device.NumberPixels; j++) + { + int ri = 0, rj = 0; + + // Calculating the distance from the periodic center at 0,0 + + if (i < nctfmax + 1) + { + ri = i; + } + else + { + ri = param_device.NumberPixels - i; + }; + if (j < nctfmax + 1) + { + rj = j; + } + else + { + rj = param_device.NumberPixels - j; + }; + radsq = (myfloat_t)((ri) * (ri) + (rj) * (rj)) * pixelSize * + pixelSize; + + ctf = exp(-radsq * env / 2.0) * + (-amp * cos(radsq * phase / 2.0) - + sqrt((1 - amp * amp)) * sin(radsq * phase / 2.0)); + + localCTF[i * param_device.NumberPixels + j] = (myfloat_t) ctf; + + normctf += localCTF[i * param_device.NumberPixels + j]; + + // cout << "TT " << i << " " << j << " " << localCTF[i + //* param_device.NumberPixels + j] << "\n"; + } + } + + // Normalization + for (int i = 0; i < param_device.NumberPixels; i++) + { + for (int j = 0; j < param_device.NumberPixels; j++) + { + localCTF[i * param_device.NumberPixels + j] = + localCTF[i * param_device.NumberPixels + j] / normctf; + } + } + + // Calling FFT_Forward + myfftw_execute_dft_r2c(fft_plan_r2c_forward, localCTF, localout); + + // Saving the Reference PSFs + + for (int i = 0; + i < param_device.NumberPixels * param_device.NumberFFTPixels1D; + i++) + { + curRef[i][0] = localout[i][0]; + curRef[i][1] = localout[i][1]; + // cout << "PSFFOU " << i << " " << curRef[i][0] << " " << + // curRef[i][1] << " " << param_device.NumberFFTPixels1D << " " << + // FFTMapSize <<"\n"; + } + } + else + { - if(amp <0.0000000001){ - cout << "Problem with CTF normalization AMP less than threshold < 10^-10 \n"; - exit(1); - } + //*******CTF************* + normctf = 0.0; + + if (amp < 0.0000000001) + { + cout << "Problem with CTF normalization AMP less than threshold < " + "10^-10 \n"; + exit(1); + } + + // Directly calculating CTF IN FOURIER SPACE + for (int i = 0; i < param_device.NumberFFTPixels1D; i++) + { + for (int j = 0; j < param_device.NumberFFTPixels1D; j++) + { + radsq = (myfloat_t)(i * i + j * j) / param_device.NumberPixels / + param_device.NumberPixels / pixelSize / pixelSize; + ctf = exp(-env * radsq / 2.) * + (-amp * cos(phase * radsq / 2.) - + sqrt((1 - amp * amp)) * sin(phase * radsq / 2.)); + if (i == 0 && j == 0) + normctf = + (myfloat_t) ctf; // component 0 0 should be the norm in 1d + curRef[i * param_device.NumberFFTPixels1D + j][0] = ctf / normctf; + curRef[i * param_device.NumberFFTPixels1D + j][1] = 0; + // On symmetric side + curRef[(param_device.NumberPixels - i - 1) * + param_device.NumberFFTPixels1D + + j][0] = ctf / normctf; + curRef[(param_device.NumberPixels - i - 1) * + param_device.NumberFFTPixels1D + + j][1] = 0; + } + } + + // for(int i = 0; i < param_device.NumberPixels * + // param_device.NumberFFTPixels1D; i++ )curRef[i][0]/= normctf; + } - //Directly calculating CTF IN FOURIER SPACE - for(int i = 0; i < param_device.NumberFFTPixels1D ; i++ ) - { - for(int j = 0; j < param_device.NumberFFTPixels1D; j++ ) - { - radsq = (myfloat_t) (i * i + j * j) / param_device.NumberPixels / param_device.NumberPixels / pixelSize / pixelSize ; - ctf = exp(- env * radsq / 2.) * ( -amp * cos( phase * radsq / 2.) - sqrt((1 - amp * amp)) * sin( phase * radsq / 2.)); - if( i==0 && j==0 ) normctf = (myfloat_t) ctf; // component 0 0 should be the norm in 1d - curRef[i * param_device.NumberFFTPixels1D + j ][0] = ctf / normctf; - curRef[i * param_device.NumberFFTPixels1D + j ][1] = 0; - //On symmetric side - curRef[ (param_device.NumberPixels - i - 1) * param_device.NumberFFTPixels1D + j ][0] = ctf / normctf; - curRef[ (param_device.NumberPixels - i - 1) * param_device.NumberFFTPixels1D + j ][1] = 0; - } - } - - - // for(int i = 0; i < param_device.NumberPixels * param_device.NumberFFTPixels1D; i++ )curRef[i][0]/= normctf; - } - - - CtfParam[n].pos[0] = amp; - CtfParam[n].pos[1] = phase; - CtfParam[n].pos[2] = env; - n++; - //exit(1); - } - } + CtfParam[n].pos[0] = amp; + CtfParam[n].pos[1] = phase; + CtfParam[n].pos[2] = env; + n++; + // exit(1); + } } - + } myfftw_free(localCTF); myfftw_free(localout); if (nTotCTFs != n) - { - cout << "Internal error during CTF preparation\n"; - exit(1); - } - + { + cout << "Internal error during CTF preparation\n"; + exit(1); + } // ********** Calculating normalized volumen element ********* - if(!printModel){ - // All priors (uniform or not) normalized to 1 - // The volume is the grid-spacing of the parameter / normalization - // the order is angles, displacement, ctf amplitud (all uniform) then env b & phase (non uniform) the sqrt(2) cancel out (see SI) - param_device.volu = voluang * - (myfloat_t) param_device.GridSpaceCenter * pixelSize * (myfloat_t) param_device.GridSpaceCenter * pixelSize / ((2.f * (myfloat_t) param_device.maxDisplaceCenter+1.)) / (2.f * (myfloat_t) (param_device.maxDisplaceCenter + 1.)) - / (myfloat_t) numberGridPointsCTF_amp - * gridEnvelop * gridCTF_phase / M_PI / param_device.sigmaPriorbctf / param_device.sigmaPriordefo ; + if (!printModel) + { + // All priors (uniform or not) normalized to 1 + // The volume is the grid-spacing of the parameter / normalization + // the order is angles, displacement, ctf amplitud (all uniform) then env b + // & phase (non uniform) the sqrt(2) cancel out (see SI) + param_device.volu = + voluang * (myfloat_t) param_device.GridSpaceCenter * pixelSize * + (myfloat_t) param_device.GridSpaceCenter * pixelSize / + ((2.f * (myfloat_t) param_device.maxDisplaceCenter + 1.)) / + (2.f * (myfloat_t)(param_device.maxDisplaceCenter + 1.)) / + (myfloat_t) numberGridPointsCTF_amp * gridEnvelop * gridCTF_phase / + M_PI / param_device.sigmaPriorbctf / param_device.sigmaPriordefo; // cout << "VOLU " << param_device.volu << " " << gridCTF_amp << "\n"; // *** Number of total pixels*** - param_device.Ntotpi = (myfloat_t) (param_device.NumberPixels * param_device.NumberPixels); - param_device.NtotDist = (2 * (int) (param_device.maxDisplaceCenter / param_device.GridSpaceCenter) + 1 ) * (2 * (int) (param_device.maxDisplaceCenter / param_device.GridSpaceCenter) + 1); - + param_device.Ntotpi = + (myfloat_t)(param_device.NumberPixels * param_device.NumberPixels); + param_device.NxDisp = 2 * (int) (param_device.maxDisplaceCenter / + param_device.GridSpaceCenter) + + 1; + param_device.NtotDisp = param_device.NxDisp * param_device.NxDisp; } - nTotCC = (int) ((myfloat_t) param_device.NumberPixels / (myfloat_t) param_device.CCdisplace + 1) * (int) ((myfloat_t) param_device.NumberPixels / (myfloat_t) param_device.CCdisplace + 1); - return(0); + return (0); } bioem_param::~bioem_param() @@ -1345,11 +1735,15 @@ bioem_param::~bioem_param() numberGridPointsCTF_phase = 0; param_device.maxDisplaceCenter = 0; numberGridPointsDisplaceCenter = 0; - if (refCTF) delete[] refCTF; - if (CtfParam) delete[] CtfParam; - if (angles) free(angles); - if(angprior) delete[] angprior; - refCTF= NULL; + if (refCTF) + delete[] refCTF; + if (CtfParam) + delete[] CtfParam; + if (angles) + free(angles); + if (angprior) + delete[] angprior; + refCTF = NULL; CtfParam = NULL; angles = NULL; angprior = NULL; diff --git a/timer.cpp b/timer.cpp index c02c0a0763ee38c870dfb996da964b747550f219..d58ff05340b774e67deee765d6a03bf6de315117 100644 --- a/timer.cpp +++ b/timer.cpp @@ -1,15 +1,29 @@ +/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + < BioEM software for Bayesian inference of Electron Microscopy images> + Copyright (C) 2017 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, + Luka Stanisic, Volker Lindenstruth and Gerhard Hummer. + Max Planck Institute of Biophysics, Frankfurt, Germany. + Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, + Germany. + Max Planck Computing and Data Facility, Garching, Germany. + + Released under the GNU Public License, v3. + See license statement for terms of distribution. + + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ + #include "timer.h" #ifdef _WIN32 -#include <windows.h> #include <winbase.h> +#include <windows.h> #else #include <time.h> #endif HighResTimer::HighResTimer() { - ElapsedTime = 0; - running = 0; + ElapsedTime = 0; + running = 0; } HighResTimer::~HighResTimer() {} @@ -17,76 +31,75 @@ HighResTimer::~HighResTimer() {} void HighResTimer::Start() { #ifdef _WIN32 - __int64 istart; - QueryPerformanceCounter((LARGE_INTEGER*)&istart); - StartTime = (double) istart; + __int64 istart; + QueryPerformanceCounter((LARGE_INTEGER *) &istart); + StartTime = (double) istart; #else - timespec tv; - clock_gettime(CLOCK_REALTIME, &tv); - StartTime = (double) tv.tv_sec * 1.0E9 + (double) tv.tv_nsec; + timespec tv; + clock_gettime(CLOCK_REALTIME, &tv); + StartTime = (double) tv.tv_sec * 1.0E9 + (double) tv.tv_nsec; #endif - running = 1; + running = 1; } void HighResTimer::ResetStart() { - ElapsedTime = 0; - Start(); + ElapsedTime = 0; + Start(); } void HighResTimer::Stop() { - if (running == 0) return; - running = 0; - double EndTime = 0; + if (running == 0) + return; + running = 0; + double EndTime = 0; #ifdef _WIN32 - __int64 iend; - QueryPerformanceCounter((LARGE_INTEGER*) &iend); - EndTime = (double) iend; + __int64 iend; + QueryPerformanceCounter((LARGE_INTEGER *) &iend); + EndTime = (double) iend; #else - timespec tv; - clock_gettime(CLOCK_REALTIME, &tv); - EndTime = (double) tv.tv_sec * 1.0E9 + (double) tv.tv_nsec; + timespec tv; + clock_gettime(CLOCK_REALTIME, &tv); + EndTime = (double) tv.tv_sec * 1.0E9 + (double) tv.tv_nsec; #endif - ElapsedTime += EndTime - StartTime; + ElapsedTime += EndTime - StartTime; } void HighResTimer::Reset() { - ElapsedTime = 0; - StartTime = 0; - running = 0; + ElapsedTime = 0; + StartTime = 0; + running = 0; } -double HighResTimer::GetElapsedTime() -{ - return ElapsedTime / Frequency; -} +double HighResTimer::GetElapsedTime() { return ElapsedTime / Frequency; } double HighResTimer::GetCurrentElapsedTime() { - if (running == 0) return(GetElapsedTime()); - double CurrentTime = 0; + if (running == 0) + return (GetElapsedTime()); + double CurrentTime = 0; #ifdef _WIN32 - __int64 iend; - QueryPerformanceCounter((LARGE_INTEGER*) &iend); - CurrentTime = (double) iend; + __int64 iend; + QueryPerformanceCounter((LARGE_INTEGER *) &iend); + CurrentTime = (double) iend; #else - timespec tv; - clock_gettime(CLOCK_REALTIME, &tv); - CurrentTime = (double) tv.tv_sec * 1.0E9 + (double) tv.tv_nsec; + timespec tv; + clock_gettime(CLOCK_REALTIME, &tv); + CurrentTime = (double) tv.tv_sec * 1.0E9 + (double) tv.tv_nsec; #endif - return((CurrentTime - StartTime + ElapsedTime) / Frequency); + return ((CurrentTime - StartTime + ElapsedTime) / Frequency); } double HighResTimer::GetFrequency() { #ifdef _WIN32 - __int64 ifreq; - QueryPerformanceFrequency((LARGE_INTEGER*)&ifreq); - return((double) ifreq); + __int64 ifreq; + QueryPerformanceFrequency((LARGE_INTEGER *) &ifreq); + return ((double) ifreq); #else - return(1.0E9); + return (1.0E9); #endif } @@ -114,9 +127,10 @@ void TimeStat::InitTimeStat(int nlogs) void TimeStat::EmptyTimeStat() { - if (tl == NULL) return; + if (tl == NULL) + return; - delete [ ] tl; + delete[] tl; tl = NULL; time = 0.; } @@ -127,22 +141,25 @@ void TimeStat::ComputeTimeStat() vector<double> diff; for (int i = 0; i < total_logs; i++) - { - tl[i].sum = std::accumulate(tl[i].vec.begin(), tl[i].vec.end(), 0.0); - mean = tl[i].sum / tl[i].vec.size(); - - diff.resize(tl[i].vec.size()); - std::transform(tl[i].vec.begin(), tl[i].vec.end(), diff.begin(), std::bind2nd(std::minus<double>(), mean)); - sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); - tl[i].stdev = std::sqrt(sq_sum / tl[i].vec.size()); - } + { + tl[i].sum = std::accumulate(tl[i].vec.begin(), tl[i].vec.end(), 0.0); + mean = tl[i].sum / tl[i].vec.size(); + + diff.resize(tl[i].vec.size()); + std::transform(tl[i].vec.begin(), tl[i].vec.end(), diff.begin(), + std::bind2nd(std::minus<double>(), mean)); + sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); + tl[i].stdev = std::sqrt(sq_sum / tl[i].vec.size()); + } } void TimeStat::PrintTimeStat(int mpi_rank) { ComputeTimeStat(); for (int i = 0; i < total_logs; i++) - { - printf("SUMMARY -> %s: Total %f sec; Mean %f sec; Std.Dev. %f (rank %d)\n", tl[i].name.c_str(), tl[i].sum, tl[i].sum / tl[i].vec.size(), tl[i].stdev, mpi_rank); - } + { + printf("SUMMARY -> %s: Total %f sec; Mean %f sec; Std.Dev. %f (rank %d)\n", + tl[i].name.c_str(), tl[i].sum, tl[i].sum / tl[i].vec.size(), + tl[i].stdev, mpi_rank); + } }