bioem.cpp 41 KB
Newer Older
Pilar Cossio's avatar
License    
Pilar Cossio committed
1
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2
3
4
   < BioEM software for Bayesian inference of Electron Microscopy images>
   Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
   Max Planck Institute of Biophysics, Frankfurt, Germany.
5

6
   See license statement for terms of distribution.
Pilar Cossio's avatar
License    
Pilar Cossio committed
7
8
9

   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

10
11
#include <mpi.h>

12
13
14
15
16
#define MPI_CHK(expr)							\
  if (expr != MPI_SUCCESS)						\
    {									\
      fprintf(stderr, "Error in MPI function %s: %d\n", __FILE__, __LINE__); \
    }
17

18
19
20
21
22
23
24
25
26
#include <fstream>
#include <boost/program_options.hpp>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <cmath>
27

28
#ifdef WITH_OPENMP
29
#include <omp.h>
30
#endif
31
32
33
34
35
36
37
38
39

#include <fftw3.h>
#include <math.h>
#include "cmodules/timer.h"

#include "param.h"
#include "bioem.h"
#include "model.h"
#include "map.h"
40
#include "MersenneTwister.h"
41

42
43
44
45
46
47
#ifdef BIOEM_USE_NVTX
#include "nvToolsExt.h"

const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
const int num_colors = sizeof(colors)/sizeof(colors[0]);

48
49
50
51
52
53
54
55
56
57
58
59
#define cuda_custom_timeslot(name,cid) {		\
    int color_id = cid;					\
    color_id = color_id%num_colors;			\
    nvtxEventAttributes_t eventAttrib = {0};		\
    eventAttrib.version = NVTX_VERSION;			\
    eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;	\
    eventAttrib.colorType = NVTX_COLOR_ARGB;		\
    eventAttrib.color = colors[color_id];		\
    eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;	\
    eventAttrib.message.ascii = name;			\
    nvtxRangePushEx(&eventAttrib);			\
  }
60
61
62
63
64
#define cuda_custom_timeslot_end nvtxRangePop();
#else
#define cuda_custom_timeslot(name,cid)
#define cuda_custom_timeslot_end
#endif
65

66
67
68
69
70
71
72
73
74
75
76
#include "bioem_algorithm.h"

using namespace boost;
namespace po = boost::program_options;

using namespace std;

// A helper function of Boost
template<class T>
ostream& operator<<(ostream& os, const vector<T>& v)
{
77
78
  copy(v.begin(), v.end(), ostream_iterator<T>(os, " "));
  return os;
79
80
81
82
}

bioem::bioem()
{
83
84
85
  FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO"));
  DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT"));
  nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE"));
86
87
88
89
90
91
92
93
}

bioem::~bioem()
{
}

int bioem::configure(int ac, char* av[])
{
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
  // **************************************************************************************
  // **** Configuration Routine using boost for extracting parameters, models and maps ****
  // **************************************************************************************
  // ****** And Precalculating necessary grids, map crosscorrelations and kernels  ********
  // *************************************************************************************

  HighResTimer timer;

  if (mpi_rank == 0)
    {
      // *** Inizialzing default variables ***
      std::string infile, modelfile, mapfile,Inputanglefile,Inputbestmap;
      Model.readPDB = false;
      param.param_device.writeAngles = false;
      param.dumpMap = false;
      param.loadMap = false;
      RefMap.readMRC = false;
      RefMap.readMultMRC = false;
      param.notuniformangles=false;

      // *************************************************************************************
      cout << " ++++++++++++ FROM COMMAND LINE +++++++++++\n\n";
      // *************************************************************************************

      // ********************* Command line reading input with BOOST ************************

      try {
	po::options_description desc("Command line inputs");
	desc.add_options()
	  ("Modelfile", po::value< std::string>() , "(Mandatory) Name of model file")
	  ("Particlesfile", po::value< std::string>(), "if BioEM (Mandatory) Name of paricles file")
	  ("Inputfile", po::value<std::string>(), "if BioEM (Mandatory) Name of input parameter file") 
	  ("PrintBestCalMap", po::value< std::string>(), "(Optional) Only print best calculated map (file nec.). NO BioEM (!)")
	  ("ReadEulerAngles", po::value< std::string>(), "(Optional) Read Euler angle list instead of uniform grid (file nec.)")
	  ("ReadPDB", "(Optional) If reading model file in PDB format")
	  ("ReadMRC", "(Optional) If reading particle file in MRC format")
	  ("ReadMultipleMRC", "(Optional) If reading Multiple MRCs")
	  ("DumpMaps", "(Optional) Dump maps after they were red from maps file")
	  ("LoadMapDump", "(Optional) Read Maps from dump instead of maps file")
	  ("help", "(Optional) Produce help message")
	  ;


	po::positional_options_description p;
	p.add("Inputfile", -1);
	p.add("Modelfile", -1);
	p.add("Particlesfile", -1);
	p.add("ReadPDB", -1);
	p.add("ReadMRC", -1);
	p.add("ReadMultipleMRC", -1);
	p.add("ReadEulerAngles",-1);
	p.add("PrintBestCalMap",-1);
	p.add("DumpMaps", -1);
	p.add("LoadMapDump", -1);

	po::variables_map vm;
	po::store(po::command_line_parser(ac, av).
		  options(desc).positional(p).run(), vm);
	po::notify(vm);

	if((ac < 4)) {
	  std::cout << desc << std::endl;
	  return 1;
	}
	if (vm.count("help")) {
	  cout << "Usage: options_description [options]\n";
	  cout << desc;
	  return 1;
	}
163

164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
	if (vm.count("Inputfile"))
	  {
	    cout << "Input file is: ";
	    cout << vm["Inputfile"].as< std::string >() << "\n";
	    infile = vm["Inputfile"].as< std::string >();
	  }
	if (vm.count("Modelfile"))
	  {
	    cout << "Model file is: "
		 << vm["Modelfile"].as<  std::string  >() << "\n";
	    modelfile = vm["Modelfile"].as<  std::string  >();
	  }
	if (vm.count("ReadPDB"))
	  {
	    cout << "Reading model file in PDB format.\n";
	    Model.readPDB = true;
	  }
	if (vm.count("ReadEulerAngles"))
	  {
	    cout << "Reading Euler Angles from file: "
		 << vm["ReadEulerAngles"].as<  std::string  >() << "\n";
	    cout << "Note: Format should be alpha (12.6f) beta (12.6f) gamma (12.6f)\n";
	    Inputanglefile = vm["ReadEulerAngles"].as<  std::string  >();
	    param.notuniformangles=true;
	  }
	if (vm.count("PrintBestCalMap"))
	  {
	    cout << "Reading Euler Angles from file: "
		 << vm["PrintBestCalMap"].as<  std::string  >() << "\n";
	    Inputbestmap = vm["PrintBestCalMap"].as<  std::string  >();
	    param.printModel=true;
	  }

	if (vm.count("ReadMRC"))
	  {
	    cout << "Reading particle file in MRC format.\n";
	    RefMap.readMRC=true;
	  }

	if (vm.count("ReadMultipleMRC"))
	  {
	    cout << "Reading Multiple MRCs.\n";
	    RefMap.readMultMRC=true;
	  }

	if (vm.count("DumpMaps"))
	  {
	    cout << "Dumping Maps after reading from file.\n";
	    param.dumpMap = true;
	  }

	if (vm.count("LoadMapDump"))
	  {
	    cout << "Loading Map dump.\n";
	    param.loadMap = true;
	  }

	if (vm.count("Particlesfile"))
	  {
	    cout << "Paricle file is: "
		 << vm["Particlesfile"].as< std::string >() << "\n";
	    mapfile = vm["Particlesfile"].as< std::string >();
	  }
      }
      catch(std::exception& e)
	{
	  cout << e.what() << "\n";
	  return 1;
	}
David Rohr's avatar
David Rohr committed
233

234
235
      //check for consitency in multiple MRCs
      if(RefMap.readMultMRC && not(RefMap.readMRC))
David Rohr's avatar
David Rohr committed
236
	{
237
238
239
	  cout << "For Multiple MRCs command --ReadMRC is necesary too";
	  exit(1);
	}
David Rohr's avatar
David Rohr committed
240

241
242
243
244
      if(!Model.readPDB){
	cout << "Note: Reading model in simple text format (not PDB)\n";
	cout << "----  x   y   z  radius  density ------- \n";
      } 
David Rohr's avatar
David Rohr committed
245

246
247
248
249
250
      if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
      // ********************* Reading Parameter Input ***************************
      if(!param.printModel){
	// Standard definition for BioEM
	param.readParameters(infile.c_str(),Inputanglefile.c_str());
David Rohr's avatar
David Rohr committed
251

252
253
	// ********************* Reading Particle Maps Input **********************
	RefMap.readRefMaps(param, mapfile.c_str());
David Rohr's avatar
David Rohr committed
254

255

256
257
      } else{
	// Reading parameters for only writting down Best projection
258

259
260
	param.forprintBest(Inputbestmap.c_str());
      }	
261

262
263
      // ********************* Reading Model Input ******************************
      Model.readModel(modelfile.c_str());
David Rohr's avatar
David Rohr committed
264

265
      cout << "Remark: look at file COORDREAD to confirm that the Model coordinates are correct\n";
David Rohr's avatar
David Rohr committed
266

267
268
      if (DebugOutput >= 2 && mpi_rank == 0) printf("Reading Input Data %f\n", timer.GetCurrentElapsedTime());
    }
269

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#ifdef WITH_MPI
  if (mpi_size > 1)
    {
      if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
      MPI_Bcast(&param, sizeof(param), MPI_BYTE, 0, MPI_COMM_WORLD);
      //refCtf, CtfParam, angles automatically filled by precalculare function below

      MPI_Bcast(&Model, sizeof(Model), MPI_BYTE, 0, MPI_COMM_WORLD);
      if (mpi_rank != 0) Model.points = (bioem_model::bioem_model_point*) mallocchk(sizeof(bioem_model::bioem_model_point) * Model.nPointsModel);
      MPI_Bcast(Model.points, sizeof(bioem_model::bioem_model_point) * Model.nPointsModel, MPI_BYTE, 0, MPI_COMM_WORLD);

      MPI_Bcast(&RefMap, sizeof(RefMap), MPI_BYTE, 0, MPI_COMM_WORLD);
      if (mpi_rank != 0) RefMap.maps = (myfloat_t*) mallocchk(RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap);
      MPI_Bcast(RefMap.maps, RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap, MPI_BYTE, 0, MPI_COMM_WORLD);
      if (DebugOutput >= 2 && mpi_rank == 0) printf("MPI Broadcast of Input Data %f\n", timer.GetCurrentElapsedTime());
    }
#endif
287

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  // ****************** Precalculating Necessary Stuff *********************

  if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
  param.PrepareFFTs();
  if (DebugOutput >= 2 && mpi_rank == 0)
    {
      printf("Time Prepare FFTs %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  precalculate();

  if (getenv("BIOEM_DEBUG_BREAK"))
    {
      const int cut = atoi(getenv("BIOEM_DEBUG_BREAK"));
      if (param.nTotGridAngles > cut) param.nTotGridAngles = cut;
      if (param.nTotCTFs > cut) param.nTotCTFs = cut;
    }

  if (DebugOutput >= 2 && mpi_rank == 0)
    {
      printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  if(!param.printModel)pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, *this);

  if (DebugOutput >= 2 && mpi_rank == 0)
    {
      printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  deviceInit();
  if (DebugOutput >= 2 && mpi_rank == 0) printf("Time Device Init %f\n", timer.GetCurrentElapsedTime());

  return(0);
322
323
}

324
325
void bioem::cleanup()
{
326
327
328
  //Deleting allocated pointers
  free_device_host(pProb.ptr);
  RefMap.freePointers();
329
330
}

331
332
int bioem::precalculate()
{
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  // **************************************************************************************
  // **Precalculating Routine of Orientation grids, Map crosscorrelations and CTF Kernels**
  // **************************************************************************************
  HighResTimer timer;

  // Generating Grids of orientations 
  param.CalculateGridsParam();                

  if (DebugOutput >= 3)
    {
      printf("\tTime Precalculate Grids Param: %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  // Precalculating CTF Kernels stored in class Param
  param.CalculateRefCTF();

  if (DebugOutput >= 3)
    {
      printf("\tTime Precalculate CTFs: %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  //Precalculate Maps
  if(!param.printModel) RefMap.precalculate(param, *this);
  if (DebugOutput >= 3) printf("\tTime Precalculate Maps: %f\n", timer.GetCurrentElapsedTime());

  return(0);
359
360
361
362
}

int bioem::run()
{
David Rohr's avatar
David Rohr committed
363

364
365
366
  // **************************************************************************************
  // ********** Secondary routine for printing out the only best projection ***************
  // **************************************************************************************
David Rohr's avatar
David Rohr committed
367

368
  if(mpi_rank == 0 && param.printModel){ //Only works for 1 MPI process (not parallelized)
369

370
371
372
373
374
    cout << "\nAnalysis for printing best projection::: \n \n" ; 
    mycomplex_t* proj_mapsFFT;
    myfloat_t* conv_map = NULL;
    mycomplex_t* conv_mapFFT;
    myfloat_t sumCONV, sumsquareCONV;
375

376
377
378
    proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
    conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
    conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
379

380
    cout << "...... Calculating Projection .......................\n " ;
381

382
    createProjection(0, proj_mapsFFT);
383

384
    cout << "...... Calculating Convolution .......................\n " ;
David Rohr's avatar
David Rohr committed
385

386
    createConvolutedProjectionMap(0, 0, proj_mapsFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
387

388
  }
389

390
391
392
  // **************************************************************************************
  // **** Main BioEM routine, projects, convolutes and compares with Map using OpenMP ****
  // **************************************************************************************
393

394
395
  // **** If we want to control the number of threads -> omp_set_num_threads(XX); ******
  // ****************** Declarying class of Probability Pointer  *************************
David Rohr's avatar
David Rohr committed
396

397
398
399
400
401
  if (mpi_rank == 0) printf("\tInitializing Probabilities\n");
  // Inizialzing Probabilites to zero and constant to -Infinity
  for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++)
    {
      bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
David Rohr's avatar
David Rohr committed
402

403
404
405
      pProbMap.Total = 0.0;
      pProbMap.Constoadd = -9999999;
      if (param.param_device.writeAngles)
406
	{
407
408
409
410
411
412
413
414
415
416
417
418
419
	  for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient ++)
	    {
	      bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

	      pProbAngle.forAngles = 0.0;
	      pProbAngle.ConstAngle = -99999999;
	    }
	}
      if (param.param_device.writeCC)
	{      int  cc=0;
	  for (int cent_x = 0; cent_x < param.param_device.NumberPixels -param.param_device.CCdisplace ; cent_x = cent_x + param.param_device.CCdisplace)
	    {
	      for (int cent_y = 0; cent_y < param.param_device.NumberPixels - param.param_device.CCdisplace ; cent_y = cent_y + param.param_device.CCdisplace)
420
		{
421
422
423
424
		  bioem_Probability_cc& pProbCC = pProb.getProbCC(iRefMap, cc);
		  pProbCC.forCC = 0.0;
 
		  cc++;
425
		}
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
	    }
	}                 
    }
  // **************************************************************************************
  deviceStartRun();

  // ******************************** MAIN CYCLE ******************************************

  mycomplex_t* proj_mapsFFT;
  myfloat_t* conv_map = NULL;
  mycomplex_t* conv_mapFFT;
  myfloat_t sumCONV, sumsquareCONV;

  //allocating fftw_complex vector
  const int ProjMapSize = (param.FFTMapSize + 64) & ~63;	//Make sure this is properly aligned for fftw..., Actually this should be ensureb by using FFTMapSize, but it is not due to a bug in CUFFT which cannot handle padding properly
  proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce);
  conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
  if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
             

  HighResTimer timer, timer2;

  if (DebugOutput >= 1 && mpi_rank == 0) printf("\tMain Loop GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)², Pixels %d², OMP Threads %d, MPI Ranks %d\n", param.nTotGridAngles, param.nTotCTFs, RefMap.ntotRefMap, 2 * param.param_device.maxDisplaceCenter + param.param_device.GridSpaceCenter, param.param_device.GridSpaceCenter, param.param_device.NumberPixels, omp_get_max_threads(), mpi_size);

  const int iOrientStart = (int) ((long long int) mpi_rank * param.nTotGridAngles / mpi_size);
  int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size);
  if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles;

  for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; iOrientAtOnce += nProjectionsAtOnce)
    {
      // ***************************************************************************************
      // ***** Creating Projection for given orientation and transforming to Fourier space *****
      if (DebugOutput >= 1) timer2.ResetStart();
      if (DebugOutput >= 2) timer.ResetStart();
      int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce);
#pragma omp parallel for
      for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
	{
	  createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]);
	}
      if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, timer.GetCurrentElapsedTime(), mpi_rank);
467

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
      for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
	{
	  mycomplex_t* proj_mapFFT = &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize];
	  // ***************************************************************************************
	  // ***** **** Internal Loop over convolutions **** *****
	  for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
	    {
	      // *** Calculating convolutions of projection map and crosscorrelations ***

	      if (DebugOutput >= 2) timer.ResetStart();
	      createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
	      if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);

	      // ***************************************************************************************
	      // *** Comparing each calculated convoluted map with all experimental maps ***
	      if (DebugOutput >= 2) timer.ResetStart();
	      compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);

	      if (DebugOutput >= 2)
487
		{
488
489
490
491
492
493
494
495
496
		  const double compTime = timer.GetCurrentElapsedTime();
		  const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
		  const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
		    (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime;
		  const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
		    (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
		  const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;

		  printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
497
		}
498
499
500
501
502
503
	    }
	  if (DebugOutput >= 1)
	    {
	      printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
	      timer2.ResetStart();
	    }
504
	}
505
506
507
508
509
    }
  //deallocating fftw_complex vector
  myfftw_free(proj_mapsFFT);
  myfftw_free(conv_mapFFT);
  if (!FFTAlgo) myfftw_free(conv_map);
David Rohr's avatar
David Rohr committed
510

511
  deviceFinishRun();
512

513
  // ************* Writing Out Probabilities ***************
514

515
  // *** Angular Probability ***
David Rohr's avatar
David Rohr committed
516

David Rohr's avatar
David Rohr committed
517
#ifdef WITH_MPI
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
  if (mpi_size > 1)
    {
      if (DebugOutput >= 1 && mpi_rank == 0) timer.ResetStart();
      //Reduce Constant and summarize probabilities
      {
	myfloat_t* tmp1 = new myfloat_t[RefMap.ntotRefMap];
	myfloat_t* tmp2 = new myfloat_t[RefMap.ntotRefMap];
	myfloat_t* tmp3 = new myfloat_t[RefMap.ntotRefMap];
	for (int i = 0;i < RefMap.ntotRefMap;i++)
	  {
	    tmp1[i] = pProb.getProbMap(i).Constoadd;
	  }
	MPI_Allreduce(tmp1, tmp2, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
	for (int i = 0;i < RefMap.ntotRefMap;i++)
	  {
	    bioem_Probability_map& pProbMap = pProb.getProbMap(i);
	    tmp1[i] = pProbMap.Total * exp(pProbMap.Constoadd - tmp2[i]);
	  }
	MPI_Reduce(tmp1, tmp3, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);

	//Find MaxProb
	MPI_Status mpistatus;
David Rohr's avatar
David Rohr committed
540
	{
541
542
543
544
545
546
547
548
549
550
551
	  int* tmpi1 = new int[RefMap.ntotRefMap];
	  int* tmpi2 = new int[RefMap.ntotRefMap];
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      bioem_Probability_map& pProbMap = pProb.getProbMap(i);
	      tmpi1[i] = tmp2[i] <= pProbMap.Constoadd ? mpi_rank : -1;
	    }
	  MPI_Allreduce(tmpi1, tmpi2, RefMap.ntotRefMap, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      if (tmpi2[i] == -1)
David Rohr's avatar
David Rohr committed
552
		{
553
		  if (mpi_rank == 0) printf("Error: Could not find highest probability\n");
David Rohr's avatar
David Rohr committed
554
		}
555
	      else if (tmpi2[i] != 0) //Skip if rank 0 already has highest probability
David Rohr's avatar
David Rohr committed
556
		{
557
558
559
560
561
562
563
564
		  if (mpi_rank == 0)
		    {
		      MPI_Recv(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, MPI_COMM_WORLD, &mpistatus);
		    }
		  else if (mpi_rank == tmpi2[i])
		    {
		      MPI_Send(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, 0, i, MPI_COMM_WORLD);
		    }
David Rohr's avatar
David Rohr committed
565
		}
566
567
568
	    }
	  delete[] tmpi1;
	  delete[] tmpi2;
569
	}
570

David Rohr's avatar
David Rohr committed
571
	if (mpi_rank == 0)
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
	  {
	    for (int i = 0;i < RefMap.ntotRefMap;i++)
	      {
		bioem_Probability_map& pProbMap = pProb.getProbMap(i);
		pProbMap.Total = tmp3[i];
		pProbMap.Constoadd = tmp2[i];
	      }
	  }

	delete[] tmp1;
	delete[] tmp2;
	delete[] tmp3;
	if (DebugOutput >= 1 && mpi_rank == 0 && mpi_size > 1) printf("Time MPI Reduction: %f\n", timer.GetCurrentElapsedTime());
      }

      //Angle Reduction and Probability summation for individual angles
      if (param.param_device.writeAngles)
589
	{
590
591
592
593
594
595
596
597
598
599
600
601
	  const int count = RefMap.ntotRefMap * param.nTotGridAngles;
	  myfloat_t* tmp1 = new myfloat_t[count];
	  myfloat_t* tmp2 = new myfloat_t[count];
	  myfloat_t* tmp3 = new myfloat_t[count];
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      tmp1[i] = pProb.getProbMap(i).Constoadd;
	    }
	  MPI_Allreduce(tmp1, tmp2, count, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      for (int j = 0;j < param.nTotGridAngles;j++)
David Rohr's avatar
David Rohr committed
602
		{
603
604
		  bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j);
		  tmp1[i * param.nTotGridAngles + j] = pProbAngle.forAngles * exp(pProbAngle.ConstAngle - tmp2[i * param.nTotGridAngles + j]);
David Rohr's avatar
David Rohr committed
605
		}
606
607
608
609
610
	    }
	  MPI_Reduce(tmp1, tmp3, count, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
	  if (mpi_rank == 0)
	    {
	      for (int i = 0;i < RefMap.ntotRefMap;i++)
611
		{
612
613
614
615
616
617
		  for (int j = 0;j < param.nTotGridAngles;j++)
		    {
		      bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j);
		      pProbAngle.forAngles = tmp3[i * param.nTotGridAngles + j];
		      pProbAngle.ConstAngle = tmp2[i * param.nTotGridAngles + j];
		    }
618
		}
619
620
621
622
623
624
625
	    }
	  delete[] tmp1;
	  delete[] tmp2;
	  delete[] tmp3;
	}
    }
#endif
David Rohr's avatar
David Rohr committed
626

627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
  if (mpi_rank == 0)
    {
      ofstream angProbfile;
      if(param.param_device.writeAngles)
	{
	  angProbfile.open ("ANG_PROB");
	}
      ofstream ccProbfile;
      if(param.param_device.writeCC)
	{
	  ccProbfile.open ("CROSS_CORRELATION");
	}

      ofstream outputProbFile;
      outputProbFile.open ("Output_Probabilities");
      for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++)
	{
	  // **** Total Probability ***
	  bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);

	  outputProbFile << "RefMap " << iRefMap << " Probability  "  << log(pProbMap.Total) + pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant " << pProbMap.Constoadd  << "\n";

	  outputProbFile << "RefMap " << iRefMap << " Maximizing Param: ";

	  // *** Param that maximize probability****
	  outputProbFile << (pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " ";
	  outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[0] << " ";
	  outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[1] << " ";
	  outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[2] << " ";
	  outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[0] << " ";
	  outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[1] << " ";
	  outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[2] << " ";
	  outputProbFile << pProbMap.max.max_prob_cent_x << " ";
	  outputProbFile << pProbMap.max.max_prob_cent_y << " " ;
	  outputProbFile << pProbMap.max.max_prob_norm << " " ; // param.param_device.Ntotpi / param.param_device.Ntotpi << " " ;
	  outputProbFile << pProbMap.max.max_prob_mu ;
	  outputProbFile << "\n";

	  // *** For individual files*** //angProbfile.open ("ANG_PROB_"iRefMap);

	  if(param.param_device.writeAngles)
	    {
	      for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++)
David Rohr's avatar
David Rohr committed
670
		{
671
		  bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
672

673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
		  angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << log(pProbAngle.forAngles) + pProbAngle.ConstAngle + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << log(param.param_device.volu) << "\n";
		}
	    }
	  if(param.param_device.writeCC)
	    {
	      int  cc=0;
	      int halfPix;
	      int rx=0;
	      int ry=0;
	      myfloat_t localcc[ (param.param_device.NumberPixels+1) * (param.param_device.NumberPixels+1) ];

	      halfPix = param.param_device.NumberPixels / 2 ;
	      // Ordering the centers of the Cross Correlation

	      for (int rx = param.param_device.CCdisplace; rx < param.param_device.NumberPixels -param.param_device.CCdisplace ; rx = rx + param.param_device.CCdisplace)
David Rohr's avatar
David Rohr committed
688
		{
689
690
691
692
		  for (int ry = param.param_device.CCdisplace; ry < param.param_device.NumberPixels - param.param_device.CCdisplace ; ry = ry + param.param_device.CCdisplace)
		    {
		      localcc[ rx * param.param_device.NumberPixels + ry ] = 0.0;
		    }
David Rohr's avatar
David Rohr committed
693
		}
694

695
	      for (int cent_x = 0; cent_x < param.param_device.NumberPixels -param.param_device.CCdisplace ; cent_x = cent_x + param.param_device.CCdisplace)
696
		{
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
		  for (int cent_y = 0; cent_y < param.param_device.NumberPixels - param.param_device.CCdisplace ; cent_y = cent_y + param.param_device.CCdisplace)
		    {
		      //localcc[ rx * param.param_device.NumberPixels + ry ] = 0.0;
		      bioem_Probability_cc& pProbCC = pProb.getProbCC(iRefMap, cc);
		      if(cent_x < halfPix && cent_y < halfPix){
			//	ccProbfile << " " << iRefMap << " " << (myfloat_t) halfPix  - cent_x << " " << halfPix - cent_y << " " << pProbCC.forCC <<"\n";
			rx = halfPix  - cent_x;
			ry = halfPix  - cent_y;}
		      if(cent_x >= halfPix && cent_y < halfPix){
			//      ccProbfile << " " << iRefMap << " " << (myfloat_t) 3 * halfPix  - cent_x << " " << halfPix - cent_y << " " << pProbCC.forCC <<"\n"; 
			rx = 3 * halfPix  - cent_x;
			ry = halfPix  - cent_y;}
		      if(cent_x < halfPix && cent_y >= halfPix){
			//      ccProbfile << " " << iRefMap << " " << (myfloat_t) halfPix  - cent_x << " " << 3 * halfPix - cent_y << " " << pProbCC.forCC <<"\n";
			rx = halfPix  - cent_x;
			ry = 3 * halfPix  - cent_y;}
		      if(cent_x >= halfPix && cent_y >= halfPix){
			//        ccProbfile << " " << iRefMap << " " << 3* halfPix  - cent_x << " " << 3 * halfPix - cent_y << " " << pProbCC.forCC <<"\n";
			rx = 3 * halfPix  - cent_x;
			ry = 3 * halfPix  - cent_y;}
		      //						cout << " TT " << cent_x << " " << rx << " " << cent_y << " " << ry << " " <<  pProbCC.forCC << "\n";
		      localcc[ rx * param.param_device.NumberPixels + ry ] = pProbCC.forCC;
		      cc++;
		    }
		  //              ccProbfile << "\n";
722
		}
723
724
725
726
727
728
729
730
731
732
733
	      for (int rx = param.param_device.CCdisplace; rx < param.param_device.NumberPixels -param.param_device.CCdisplace ; rx = rx + param.param_device.CCdisplace)
		{
		  for (int ry = param.param_device.CCdisplace; ry < param.param_device.NumberPixels - param.param_device.CCdisplace ; ry = ry + param.param_device.CCdisplace)
		    {
		      ccProbfile << rx << " " << ry << " " << localcc[ rx * param.param_device.NumberPixels + ry ] << "\n" ; 
		    }
		  ccProbfile << "\n";
		}			
  
	    }
	}
734

735
736
737
738
739
740
741
742
      if(param.param_device.writeAngles)
	{
	  angProbfile.close();
	}

      if(param.param_device.writeCC)
	{
	  ccProbfile.close();
743
	}
744

745
746
747
748
      outputProbFile.close();
    }

  return(0);
749
750
}

751
int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
752
{
753
754
755
756
757
758
759
  //***************************************************************************************
  //***** BioEM routine for comparing reference maps to convoluted maps *****
  if (FFTAlgo)
    {
      //With FFT Algorithm
#pragma omp parallel for schedule(dynamic, 1)
      for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
760
	{
761
762
	  const int num = omp_get_thread_num();
	  calculateCCFFT(iRefMap, iOrient, iConv, sumC, sumsquareC, localmultFFT, param.fft_scratch_complex[num], param.fft_scratch_real[num]);
763
	}
764
765
766
767
768
769
    }
  else
    {
      //Without FFT Algorithm
#pragma omp parallel for schedule(dynamic, 1)
      for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
770
	{
771
	  compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap);
772
	}
773
774
    }
  return(0);
775
776
}

777
inline void bioem::calculateCCFFT(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC)
778
{
779
780
  //***************************************************************************************
  //***** Calculating cross correlation in FFTALGOrithm *****
Pilar Cossio's avatar
Pilar Cossio committed
781

782
783
784
785
786
787
  const mycomplex_t* RefMapFFT = &RefMap.RefMapsFFT[iRefMap * param.FFTMapSize];
  for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D; i++)
    {
      localCCT[i][0] = localConvFFT[i][0] * RefMapFFT[i][0] + localConvFFT[i][1] * RefMapFFT[i][1];
      localCCT[i][1] = localConvFFT[i][1] * RefMapFFT[i][0] - localConvFFT[i][0] * RefMapFFT[i][1];
    }
788

789
  myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, localCCT, lCC);
790

791
  doRefMapFFT(iRefMap, iOrient, iConv, lCC, sumC, sumsquareC, pProb, param.param_device, RefMap);
Pilar Cossio's avatar
Pilar Cossio committed
792
793

#ifdef PILAR_DEBUG
794
795
796
797
798
799
800
801
802
803
804
  if (param.param_device.writeCC)
    {      int  cc=0;
      for (int cent_x = 0; cent_x < param.param_device.NumberPixels -param.param_device.CCdisplace ; cent_x = cent_x + param.param_device.CCdisplace)
	{
	  for (int cent_y = 0; cent_y < param.param_device.NumberPixels - param.param_device.CCdisplace ; cent_y = cent_y + param.param_device.CCdisplace)
	    {
	      cout << "CHECKCC " << " " << cent_x << " " << cent_y <<" " << lCC[cent_x * param.param_device.NumberPixels + cent_y] / (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels ) << "\n";
	      cc++;
	    }
	}
    }
Pilar Cossio's avatar
Pilar Cossio committed
805
806
#endif

807
}
808

809
int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
810
{
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
  // **************************************************************************************
  // ****  BioEM Create Projection routine in Euler angle predefined grid******************
  // ********************* and turns projection into Fourier space ************************
  // **************************************************************************************

  cuda_custom_timeslot("Projection", 0);

  myfloat3_t RotatedPointsModel[Model.nPointsModel];
  myfloat_t rotmat[3][3];
  myfloat_t alpha, gam, beta;
  myfloat_t* localproj;

  localproj = param.fft_scratch_real[omp_get_thread_num()];
  memset(localproj, 0, param.param_device.NumberPixels * param.param_device.NumberPixels * sizeof(*localproj));

  alpha = param.angles[iMap].pos[0];
  beta = param.angles[iMap].pos[1];
  gam = param.angles[iMap].pos[2];

  // **** To see how things are going: cout << "Id " << omp_get_thread_num() <<  " Angs: " << alpha << " " << beta << " " << gam << "\n"; ***

  // ********** Creat Rotation with pre-defiend grid of orientations**********
  rotmat[0][0] = cos(gam) * cos(alpha) - cos(beta) * sin(alpha) * sin(gam);
  rotmat[0][1] = cos(gam) * sin(alpha) + cos(beta) * cos(alpha) * sin(gam);
  rotmat[0][2] = sin(gam) * sin(beta);
  rotmat[1][0] = -sin(gam) * cos(alpha) - cos(beta) * sin(alpha) * cos(gam);
  rotmat[1][1] = -sin(gam) * sin(alpha) + cos(beta) * cos(alpha) * cos(gam);
  rotmat[1][2] = cos(gam) * sin(beta);
  rotmat[2][0] = sin(beta) * sin(alpha);
  rotmat[2][1] = -sin(beta) * cos(alpha);
  rotmat[2][2] = cos(beta);

  for(int n = 0; n < Model.nPointsModel; n++)
    {
      RotatedPointsModel[n].pos[0] = 0.0;
      RotatedPointsModel[n].pos[1] = 0.0;
      RotatedPointsModel[n].pos[2] = 0.0;
    }
  for(int n = 0; n < Model.nPointsModel; n++)
    {
      for(int k = 0; k < 3; k++)
852
	{
853
854
855
856
	  for(int j = 0; j < 3; j++)
	    {
	      RotatedPointsModel[n].pos[k] += rotmat[k][j] * Model.points[n].point.pos[j];
	    }
857
	}
858
    }
859

860
  int i, j;
861

862
863
864
865
866
  //********** Projection with radius ***************
  if(param.doaaradius)
    {
      int irad;
      myfloat_t dist, rad2;
Pilar Cossio's avatar
Pilar Cossio committed
867

868
869
870
871
872
873
874
875
876
877
878
879
880
      for(int n = 0; n < Model.nPointsModel; n++)
	{
	  //Getting Centers of Sphere
	  i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
	  j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
	  //Getting the radius
	  irad=int( Model.points[n].radius / param.pixelSize );
	  rad2= Model.points[n].radius * Model.points[n].radius;

	  //Projecting over the radius
	  for(int ii= i - irad; ii < i + irad; ii++)
	    {	
	      for(int jj = j - irad; jj < j + irad; jj++)
881
		{
882
883
884
885
886
887
888
		  dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) *  param.pixelSize ;
		  if( dist < rad2 )
		    {
		      localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density
			//							/ Model.NormDen * 3 / (4 * M_PI * rad2 *  Model.points[n].radius); 	
			* 3 / (4 * M_PI * rad2 *  Model.points[n].radius); 
		    }
Pilar Cossio's avatar
Pilar Cossio committed
889
		}
890
	    }
Pilar Cossio's avatar
Pilar Cossio committed
891
	}
892
893
894
895
896
    }
  else 
    {
      // ************ Simple Projection over the Z axis********************
      for(int n = 0; n < Model.nPointsModel; n++)
897
	{
898
899
900
	  //Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 )
	  i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
	  j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
901

902
903
904
905
906
	  if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels)
	    {
	      if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n";
	      continue;
	    }
907

908
	  localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density;//Model.NormDen;
909
	}
910
    }
911

912
  // **** Output Just to check****
913
#ifdef PILAR_DEBUG
914
915
916
917
918
919
920
921
  if(iMap == 10)
    {
      ofstream myexamplemap;
      ofstream myexampleRot;
      myexamplemap.open ("MAP_i10");
      myexampleRot.open ("Rot_i10");
      myexamplemap << "ANGLES " << alpha << " " << beta << " " << gam << "\n";
      for(int k = 0; k < param.param_device.NumberPixels; k++)
922
	{
923
	  for(int j = 0; j < param.param_device.NumberPixels; j++) myexamplemap << "\nMAP " << k << " " << j << " " << localproj[k * param.param_device.NumberPixels + j];
924
	}
925
926
927
928
929
      myexamplemap << " \n";
      for(int n = 0; n < Model.nPointsModel; n++)myexampleRot << "\nCOOR " << RotatedPointsModel[n].pos[0] << " " << RotatedPointsModel[n].pos[1] << " " << RotatedPointsModel[n].pos[2];
      myexamplemap.close();
      myexampleRot.close();
    }
930
#endif
931

932
933
934
  // ***** Converting projection to Fourier Space for Convolution later with kernel****
  // ********** Omp Critical is necessary with FFTW*******
  myfftw_execute_dft_r2c(param.fft_plan_r2c_forward, localproj, mapFFT);
935

936
  cuda_custom_timeslot_end;
937

938
  return(0);
939
940
}

941
int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj, myfloat_t* Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC)
942
{
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
  // **************************************************************************************
  // ****  BioEM Create Convoluted Projection Map routine, multiplies in Fourier **********
  // **************** calculated Projection with convoluted precalculated Kernel***********
  // *************** and Backtransforming it to real Space ********************************
  // **************************************************************************************


  cuda_custom_timeslot("Convolution", 1);

  mycomplex_t* tmp = param.fft_scratch_complex[omp_get_thread_num()];

  // **** Multiplying FFTmap with corresponding kernel ****
  const mycomplex_t* refCTF = &param.refCTF[iConv * param.FFTMapSize];
  for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D; i++)
    {
      localmultFFT[i][0] = ( lproj[i][0] * refCTF[i][0] + lproj[i][1] * refCTF[i][1] ) ;
      localmultFFT[i][1] = ( lproj[i][1] * refCTF[i][0] - lproj[i][0] * refCTF[i][1] ) ;
      // cout << "GG " << i << " " << j << " " << refCTF[i][0] << " " << refCTF[i][1] <<" " <<lproj[i][0] <<" " <<lproj[i][1] << "\n";
    }

  // *** Calculating Cross-correlations of cal-convoluted map with its self *****
  sumC = localmultFFT[0][0];

  sumsquareC = 0;
  if (FFTAlgo)
    {
      int jloopend = param.param_device.NumberFFTPixels1D;
      if ((param.param_device.NumberPixels & 1) == 0) jloopend--;
      for(int i = 0; i < param.param_device.NumberPixels; i++)
	{
	  for (int j = 1;j < jloopend;j++)
	    {
	      int k = i * param.param_device.NumberFFTPixels1D + j;
	      sumsquareC += (localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1]) * 2;
	    }
	  int k = i * param.param_device.NumberFFTPixels1D;
	  sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1];
	  if ((param.param_device.NumberPixels & 1) == 0)
	    {
	      k += param.param_device.NumberFFTPixels1D - 1;
	      sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1];
	    }
	}
986

987
988
989
990
991
992
993
      myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels);
      sumsquareC = sumsquareC / norm2;
    }
  else
    {
      //FFTW_C2R will destroy the input array, so we have to work on a copy here
      memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
994

995
996
      // **** Bringing convoluted Map to real Space ****
      myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv);
Pilar Cossio's avatar
Pilar Cossio committed
997

998
      for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberPixels; i++)
999
	{
1000
	  sumsquareC += Mapconv[i] * Mapconv[i];
1001
1002
	}

1003
1004
1005
1006
      myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels);
      myfloat_t norm4 = norm2 * norm2;
      sumsquareC = sumsquareC / norm4;
    }
David Rohr's avatar
David Rohr committed
1007

1008
1009
1010
  // **************************************************************************************
  // *********** Routine for printing out the best projetion ******************************
  // **************************************************************************************
David Rohr's avatar
David Rohr committed
1011

1012
1013
1014
  if (mpi_rank == 0 && param.printModel)
    {
      MTRand mtr;
1015

1016
      memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
1017

1018
1019
      // **** Bringing convoluted Map to real Space ****
      myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv);
1020

1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
      myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels);

      ofstream myexamplemap;
      myexamplemap.open ("BESTMAP");
      for(int k = 0; k < param.param_device.NumberPixels; k++)
	{
	  for(int j = 0; j < param.param_device.NumberPixels; j++) {
	    if(!param.withnoise){
	      myexamplemap << "\nMAP " << k+param.ddx << " " << j+param.ddy << " " <<  Mapconv[k * param.param_device.NumberPixels + j] / norm2 *param.bestnorm +param.bestoff ; 
	    } else {
	      myexamplemap << "\nMAP " << k+param.ddx << " " << j+param.ddy << " " <<  Mapconv[k * param.param_device.NumberPixels + j] / norm2 *param.bestnorm +param.bestoff+ mtr.randNorm(0.0,param.stnoise);
	    }
	  }
	  myexamplemap << " \n";
1035
	}
1036
1037
1038
1039
      myexamplemap.close();

      cout << "\n\nBest map printed in file: BESTMAP with gnuplot format in columns 2, 3 and 4. \n\n\n";
      exit(1);
David Rohr's avatar
David Rohr committed
1040

1041
    }
1042

1043
1044
1045
  cuda_custom_timeslot_end;

  return(0);
1046
1047
}

1048
int bioem::calcross_cor(myfloat_t* localmap, myfloat_t& sum, myfloat_t& sumsquare)
1049
{
1050
  // *********************** Routine to calculate Cross correlations***********************
1051

1052
1053
1054
1055
1056
  sum = 0.0;
  sumsquare = 0.0;
  for (int i = 0; i < param.param_device.NumberPixels; i++)
    {
      for (int j = 0; j < param.param_device.NumberPixels; j++)
1057
	{
1058
1059
1060
1061
	  // Calculate Sum of pixels
	  sum += localmap[i * param.param_device.NumberPixels + j];
	  // Calculate Sum of pixels squared
	  sumsquare += localmap[i * param.param_device.NumberPixels + j] * localmap[i * param.param_device.NumberPixels + j];
1062
	}
1063
1064
    }
  return(0);
1065
1066
1067
1068
}

int bioem::deviceInit()
{
1069
  return(0);
1070
1071
1072
1073
}

int bioem::deviceStartRun()
{
1074
  return(0);
1075
1076
1077
1078
}

int bioem::deviceFinishRun()
{
1079
  return(0);
1080
}
1081
1082
1083

void* bioem::malloc_device_host(size_t size)
{
1084
  return(mallocchk(size));
1085
1086
1087
1088
}

void bioem::free_device_host(void* ptr)
{
1089
  free(ptr);
1090
}