bioem.cpp 42.3 KB
Newer Older
Pilar Cossio's avatar
License    
Pilar Cossio committed
1
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2
3
4
   < BioEM software for Bayesian inference of Electron Microscopy images>
   Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
   Max Planck Institute of Biophysics, Frankfurt, Germany.
5

6
   See license statement for terms of distribution.
Pilar Cossio's avatar
License    
Pilar Cossio committed
7
8
9

   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

10
11
#include <mpi.h>

12
13
14
15
16
#define MPI_CHK(expr)							\
  if (expr != MPI_SUCCESS)						\
    {									\
      fprintf(stderr, "Error in MPI function %s: %d\n", __FILE__, __LINE__); \
    }
17

18
19
20
21
22
23
24
25
26
#include <fstream>
#include <boost/program_options.hpp>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <cmath>
27

28
#ifdef WITH_OPENMP
29
#include <omp.h>
30
#endif
31
32
33
34
35
36
37
38
39

#include <fftw3.h>
#include <math.h>
#include "cmodules/timer.h"

#include "param.h"
#include "bioem.h"
#include "model.h"
#include "map.h"
40
#include "MersenneTwister.h"
41

42
43
44
45
46
47
#ifdef BIOEM_USE_NVTX
#include "nvToolsExt.h"

const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
const int num_colors = sizeof(colors)/sizeof(colors[0]);

48
49
50
51
52
53
54
55
56
57
58
59
#define cuda_custom_timeslot(name,cid) {		\
    int color_id = cid;					\
    color_id = color_id%num_colors;			\
    nvtxEventAttributes_t eventAttrib = {0};		\
    eventAttrib.version = NVTX_VERSION;			\
    eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;	\
    eventAttrib.colorType = NVTX_COLOR_ARGB;		\
    eventAttrib.color = colors[color_id];		\
    eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;	\
    eventAttrib.message.ascii = name;			\
    nvtxRangePushEx(&eventAttrib);			\
  }
60
61
62
63
64
#define cuda_custom_timeslot_end nvtxRangePop();
#else
#define cuda_custom_timeslot(name,cid)
#define cuda_custom_timeslot_end
#endif
65

66
67
68
69
70
71
72
73
74
75
76
#include "bioem_algorithm.h"

using namespace boost;
namespace po = boost::program_options;

using namespace std;

// A helper function of Boost
template<class T>
ostream& operator<<(ostream& os, const vector<T>& v)
{
77
78
  copy(v.begin(), v.end(), ostream_iterator<T>(os, " "));
  return os;
79
80
81
82
}

bioem::bioem()
{
83
84
85
  FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO"));
  DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT"));
  nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE"));
86
87
88
89
90
91
92
93
}

bioem::~bioem()
{
}

int bioem::configure(int ac, char* av[])
{
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
  // **************************************************************************************
  // **** Configuration Routine using boost for extracting parameters, models and maps ****
  // **************************************************************************************
  // ****** And Precalculating necessary grids, map crosscorrelations and kernels  ********
  // *************************************************************************************

  HighResTimer timer;

  if (mpi_rank == 0)
    {
      // *** Inizialzing default variables ***
      std::string infile, modelfile, mapfile,Inputanglefile,Inputbestmap;
      Model.readPDB = false;
      param.param_device.writeAngles = false;
      param.dumpMap = false;
      param.loadMap = false;
      RefMap.readMRC = false;
      RefMap.readMultMRC = false;
      param.notuniformangles=false;

      // *************************************************************************************
      cout << " ++++++++++++ FROM COMMAND LINE +++++++++++\n\n";
      // *************************************************************************************

      // ********************* Command line reading input with BOOST ************************

      try {
	po::options_description desc("Command line inputs");
	desc.add_options()
	  ("Modelfile", po::value< std::string>() , "(Mandatory) Name of model file")
	  ("Particlesfile", po::value< std::string>(), "if BioEM (Mandatory) Name of paricles file")
	  ("Inputfile", po::value<std::string>(), "if BioEM (Mandatory) Name of input parameter file") 
	  ("PrintBestCalMap", po::value< std::string>(), "(Optional) Only print best calculated map (file nec.). NO BioEM (!)")
	  ("ReadEulerAngles", po::value< std::string>(), "(Optional) Read Euler angle list instead of uniform grid (file nec.)")
	  ("ReadPDB", "(Optional) If reading model file in PDB format")
	  ("ReadMRC", "(Optional) If reading particle file in MRC format")
	  ("ReadMultipleMRC", "(Optional) If reading Multiple MRCs")
	  ("DumpMaps", "(Optional) Dump maps after they were red from maps file")
	  ("LoadMapDump", "(Optional) Read Maps from dump instead of maps file")
	  ("help", "(Optional) Produce help message")
	  ;


	po::positional_options_description p;
	p.add("Inputfile", -1);
	p.add("Modelfile", -1);
	p.add("Particlesfile", -1);
	p.add("ReadPDB", -1);
	p.add("ReadMRC", -1);
	p.add("ReadMultipleMRC", -1);
	p.add("ReadEulerAngles",-1);
	p.add("PrintBestCalMap",-1);
	p.add("DumpMaps", -1);
	p.add("LoadMapDump", -1);

	po::variables_map vm;
	po::store(po::command_line_parser(ac, av).
		  options(desc).positional(p).run(), vm);
	po::notify(vm);

	if((ac < 4)) {
	  std::cout << desc << std::endl;
	  return 1;
	}
	if (vm.count("help")) {
	  cout << "Usage: options_description [options]\n";
	  cout << desc;
	  return 1;
	}
163

164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
	if (vm.count("Inputfile"))
	  {
	    cout << "Input file is: ";
	    cout << vm["Inputfile"].as< std::string >() << "\n";
	    infile = vm["Inputfile"].as< std::string >();
	  }
	if (vm.count("Modelfile"))
	  {
	    cout << "Model file is: "
		 << vm["Modelfile"].as<  std::string  >() << "\n";
	    modelfile = vm["Modelfile"].as<  std::string  >();
	  }
	if (vm.count("ReadPDB"))
	  {
	    cout << "Reading model file in PDB format.\n";
	    Model.readPDB = true;
	  }
	if (vm.count("ReadEulerAngles"))
	  {
	    cout << "Reading Euler Angles from file: "
		 << vm["ReadEulerAngles"].as<  std::string  >() << "\n";
	    cout << "Note: Format should be alpha (12.6f) beta (12.6f) gamma (12.6f)\n";
	    Inputanglefile = vm["ReadEulerAngles"].as<  std::string  >();
	    param.notuniformangles=true;
	  }
	if (vm.count("PrintBestCalMap"))
	  {
	    cout << "Reading Euler Angles from file: "
		 << vm["PrintBestCalMap"].as<  std::string  >() << "\n";
	    Inputbestmap = vm["PrintBestCalMap"].as<  std::string  >();
	    param.printModel=true;
	  }

	if (vm.count("ReadMRC"))
	  {
	    cout << "Reading particle file in MRC format.\n";
	    RefMap.readMRC=true;
	  }

	if (vm.count("ReadMultipleMRC"))
	  {
	    cout << "Reading Multiple MRCs.\n";
	    RefMap.readMultMRC=true;
	  }

	if (vm.count("DumpMaps"))
	  {
	    cout << "Dumping Maps after reading from file.\n";
	    param.dumpMap = true;
	  }

	if (vm.count("LoadMapDump"))
	  {
	    cout << "Loading Map dump.\n";
	    param.loadMap = true;
	  }

	if (vm.count("Particlesfile"))
	  {
	    cout << "Paricle file is: "
		 << vm["Particlesfile"].as< std::string >() << "\n";
	    mapfile = vm["Particlesfile"].as< std::string >();
	  }
      }
      catch(std::exception& e)
	{
	  cout << e.what() << "\n";
	  return 1;
	}
David Rohr's avatar
David Rohr committed
233

234
235
      //check for consitency in multiple MRCs
      if(RefMap.readMultMRC && not(RefMap.readMRC))
David Rohr's avatar
David Rohr committed
236
	{
237
238
239
	  cout << "For Multiple MRCs command --ReadMRC is necesary too";
	  exit(1);
	}
David Rohr's avatar
David Rohr committed
240

241
242
243
244
      if(!Model.readPDB){
	cout << "Note: Reading model in simple text format (not PDB)\n";
	cout << "----  x   y   z  radius  density ------- \n";
      } 
David Rohr's avatar
David Rohr committed
245

246
247
248
249
250
      if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
      // ********************* Reading Parameter Input ***************************
      if(!param.printModel){
	// Standard definition for BioEM
	param.readParameters(infile.c_str(),Inputanglefile.c_str());
David Rohr's avatar
David Rohr committed
251

252
253
	// ********************* Reading Particle Maps Input **********************
	RefMap.readRefMaps(param, mapfile.c_str());
David Rohr's avatar
David Rohr committed
254

255

256
257
      } else{
	// Reading parameters for only writting down Best projection
258

259
260
	param.forprintBest(Inputbestmap.c_str());
      }	
261

262
263
      // ********************* Reading Model Input ******************************
      Model.readModel(modelfile.c_str());
David Rohr's avatar
David Rohr committed
264

265
      cout << "Remark: look at file COORDREAD to confirm that the Model coordinates are correct\n";
David Rohr's avatar
David Rohr committed
266

267
268
      if (DebugOutput >= 2 && mpi_rank == 0) printf("Reading Input Data %f\n", timer.GetCurrentElapsedTime());
    }
269

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#ifdef WITH_MPI
  if (mpi_size > 1)
    {
      if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
      MPI_Bcast(&param, sizeof(param), MPI_BYTE, 0, MPI_COMM_WORLD);
      //refCtf, CtfParam, angles automatically filled by precalculare function below

      MPI_Bcast(&Model, sizeof(Model), MPI_BYTE, 0, MPI_COMM_WORLD);
      if (mpi_rank != 0) Model.points = (bioem_model::bioem_model_point*) mallocchk(sizeof(bioem_model::bioem_model_point) * Model.nPointsModel);
      MPI_Bcast(Model.points, sizeof(bioem_model::bioem_model_point) * Model.nPointsModel, MPI_BYTE, 0, MPI_COMM_WORLD);

      MPI_Bcast(&RefMap, sizeof(RefMap), MPI_BYTE, 0, MPI_COMM_WORLD);
      if (mpi_rank != 0) RefMap.maps = (myfloat_t*) mallocchk(RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap);
      MPI_Bcast(RefMap.maps, RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap, MPI_BYTE, 0, MPI_COMM_WORLD);
      if (DebugOutput >= 2 && mpi_rank == 0) printf("MPI Broadcast of Input Data %f\n", timer.GetCurrentElapsedTime());
    }
#endif
287

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  // ****************** Precalculating Necessary Stuff *********************

  if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
  param.PrepareFFTs();
  if (DebugOutput >= 2 && mpi_rank == 0)
    {
      printf("Time Prepare FFTs %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  precalculate();

  if (getenv("BIOEM_DEBUG_BREAK"))
    {
      const int cut = atoi(getenv("BIOEM_DEBUG_BREAK"));
      if (param.nTotGridAngles > cut) param.nTotGridAngles = cut;
      if (param.nTotCTFs > cut) param.nTotCTFs = cut;
    }

  if (DebugOutput >= 2 && mpi_rank == 0)
    {
      printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  if(!param.printModel)pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, *this);

  if (DebugOutput >= 2 && mpi_rank == 0)
    {
      printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  deviceInit();
  if (DebugOutput >= 2 && mpi_rank == 0) printf("Time Device Init %f\n", timer.GetCurrentElapsedTime());

  return(0);
322
323
}

324
325
void bioem::cleanup()
{
326
327
328
  //Deleting allocated pointers
  free_device_host(pProb.ptr);
  RefMap.freePointers();
329
330
}

331
332
int bioem::precalculate()
{
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  // **************************************************************************************
  // **Precalculating Routine of Orientation grids, Map crosscorrelations and CTF Kernels**
  // **************************************************************************************
  HighResTimer timer;

  // Generating Grids of orientations 
  param.CalculateGridsParam();                

  if (DebugOutput >= 3)
    {
      printf("\tTime Precalculate Grids Param: %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  // Precalculating CTF Kernels stored in class Param
  param.CalculateRefCTF();

  if (DebugOutput >= 3)
    {
      printf("\tTime Precalculate CTFs: %f\n", timer.GetCurrentElapsedTime());
      timer.ResetStart();
    }
  //Precalculate Maps
  if(!param.printModel) RefMap.precalculate(param, *this);
  if (DebugOutput >= 3) printf("\tTime Precalculate Maps: %f\n", timer.GetCurrentElapsedTime());

  return(0);
359
360
361
362
}

int bioem::run()
{
David Rohr's avatar
David Rohr committed
363

364
365
366
  // **************************************************************************************
  // ********** Secondary routine for printing out the only best projection ***************
  // **************************************************************************************
David Rohr's avatar
David Rohr committed
367

368
  if(mpi_rank == 0 && param.printModel){ //Only works for 1 MPI process (not parallelized)
369

370
371
372
373
374
    cout << "\nAnalysis for printing best projection::: \n \n" ; 
    mycomplex_t* proj_mapsFFT;
    myfloat_t* conv_map = NULL;
    mycomplex_t* conv_mapFFT;
    myfloat_t sumCONV, sumsquareCONV;
375

376
377
378
    proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
    conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
    conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
379

380
    cout << "...... Calculating Projection .......................\n " ;
381

382
    createProjection(0, proj_mapsFFT);
383

384
    cout << "...... Calculating Convolution .......................\n " ;
David Rohr's avatar
David Rohr committed
385

386
    createConvolutedProjectionMap(0, 0, proj_mapsFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
387

388
  }
389

390
391
392
  // **************************************************************************************
  // **** Main BioEM routine, projects, convolutes and compares with Map using OpenMP ****
  // **************************************************************************************
393

394
395
  // **** If we want to control the number of threads -> omp_set_num_threads(XX); ******
  // ****************** Declarying class of Probability Pointer  *************************
David Rohr's avatar
David Rohr committed
396

397
398
399
400
401
  if (mpi_rank == 0) printf("\tInitializing Probabilities\n");
  // Inizialzing Probabilites to zero and constant to -Infinity
  for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++)
    {
      bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
David Rohr's avatar
David Rohr committed
402

403
404
405
      pProbMap.Total = 0.0;
      pProbMap.Constoadd = -9999999;
      if (param.param_device.writeAngles)
406
	{
407
408
409
410
411
412
413
414
415
416
417
418
419
	  for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient ++)
	    {
	      bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

	      pProbAngle.forAngles = 0.0;
	      pProbAngle.ConstAngle = -99999999;
	    }
	}
      if (param.param_device.writeCC)
	{      int  cc=0;
	  for (int cent_x = 0; cent_x < param.param_device.NumberPixels -param.param_device.CCdisplace ; cent_x = cent_x + param.param_device.CCdisplace)
	    {
	      for (int cent_y = 0; cent_y < param.param_device.NumberPixels - param.param_device.CCdisplace ; cent_y = cent_y + param.param_device.CCdisplace)
420
		{
421
422
423
424
		  bioem_Probability_cc& pProbCC = pProb.getProbCC(iRefMap, cc);
		  pProbCC.forCC = 0.0;
 
		  cc++;
425
		}
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
	    }
	}                 
    }
  // **************************************************************************************
  deviceStartRun();

  // ******************************** MAIN CYCLE ******************************************

  mycomplex_t* proj_mapsFFT;
  myfloat_t* conv_map = NULL;
  mycomplex_t* conv_mapFFT;
  myfloat_t sumCONV, sumsquareCONV;

  //allocating fftw_complex vector
  const int ProjMapSize = (param.FFTMapSize + 64) & ~63;	//Make sure this is properly aligned for fftw..., Actually this should be ensureb by using FFTMapSize, but it is not due to a bug in CUFFT which cannot handle padding properly
  proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce);
  conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
  if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
             

  HighResTimer timer, timer2;

  if (DebugOutput >= 1 && mpi_rank == 0) printf("\tMain Loop GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)², Pixels %d², OMP Threads %d, MPI Ranks %d\n", param.nTotGridAngles, param.nTotCTFs, RefMap.ntotRefMap, 2 * param.param_device.maxDisplaceCenter + param.param_device.GridSpaceCenter, param.param_device.GridSpaceCenter, param.param_device.NumberPixels, omp_get_max_threads(), mpi_size);

  const int iOrientStart = (int) ((long long int) mpi_rank * param.nTotGridAngles / mpi_size);
  int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size);
  if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles;

  for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; iOrientAtOnce += nProjectionsAtOnce)
    {
      // ***************************************************************************************
      // ***** Creating Projection for given orientation and transforming to Fourier space *****
      if (DebugOutput >= 1) timer2.ResetStart();
      if (DebugOutput >= 2) timer.ResetStart();
      int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce);
#pragma omp parallel for
      for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
	{
	  createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]);
	}
      if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, timer.GetCurrentElapsedTime(), mpi_rank);
467

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
      for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
	{
	  mycomplex_t* proj_mapFFT = &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize];
	  // ***************************************************************************************
	  // ***** **** Internal Loop over convolutions **** *****
	  for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
	    {
	      // *** Calculating convolutions of projection map and crosscorrelations ***

	      if (DebugOutput >= 2) timer.ResetStart();
	      createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
	      if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);

	      // ***************************************************************************************
	      // *** Comparing each calculated convoluted map with all experimental maps ***
	      if (DebugOutput >= 2) timer.ResetStart();
	      compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);

	      if (DebugOutput >= 2)
487
		{
488
489
490
491
492
493
494
495
496
		  const double compTime = timer.GetCurrentElapsedTime();
		  const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
		  const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
		    (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime;
		  const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
		    (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
		  const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;

		  printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
497
		}
498
499
500
501
502
503
	    }
	  if (DebugOutput >= 1)
	    {
	      printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
	      timer2.ResetStart();
	    }
504
	}
505
506
507
508
509
    }
  //deallocating fftw_complex vector
  myfftw_free(proj_mapsFFT);
  myfftw_free(conv_mapFFT);
  if (!FFTAlgo) myfftw_free(conv_map);
David Rohr's avatar
David Rohr committed
510

511
  deviceFinishRun();
512

513
  // ************* Writing Out Probabilities ***************
514

515
  // *** Angular Probability ***
David Rohr's avatar
David Rohr committed
516

David Rohr's avatar
David Rohr committed
517
#ifdef WITH_MPI
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
  if (mpi_size > 1)
    {
      if (DebugOutput >= 1 && mpi_rank == 0) timer.ResetStart();
      //Reduce Constant and summarize probabilities
      {
	myfloat_t* tmp1 = new myfloat_t[RefMap.ntotRefMap];
	myfloat_t* tmp2 = new myfloat_t[RefMap.ntotRefMap];
	myfloat_t* tmp3 = new myfloat_t[RefMap.ntotRefMap];
	for (int i = 0;i < RefMap.ntotRefMap;i++)
	  {
	    tmp1[i] = pProb.getProbMap(i).Constoadd;
	  }
	MPI_Allreduce(tmp1, tmp2, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
	for (int i = 0;i < RefMap.ntotRefMap;i++)
	  {
	    bioem_Probability_map& pProbMap = pProb.getProbMap(i);
	    tmp1[i] = pProbMap.Total * exp(pProbMap.Constoadd - tmp2[i]);
	  }
	MPI_Reduce(tmp1, tmp3, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);

	//Find MaxProb
	MPI_Status mpistatus;
David Rohr's avatar
David Rohr committed
540
	{
541
542
543
544
545
546
547
548
549
550
551
	  int* tmpi1 = new int[RefMap.ntotRefMap];
	  int* tmpi2 = new int[RefMap.ntotRefMap];
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      bioem_Probability_map& pProbMap = pProb.getProbMap(i);
	      tmpi1[i] = tmp2[i] <= pProbMap.Constoadd ? mpi_rank : -1;
	    }
	  MPI_Allreduce(tmpi1, tmpi2, RefMap.ntotRefMap, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      if (tmpi2[i] == -1)
David Rohr's avatar
David Rohr committed
552
		{
553
		  if (mpi_rank == 0) printf("Error: Could not find highest probability\n");
David Rohr's avatar
David Rohr committed
554
		}
555
	      else if (tmpi2[i] != 0) //Skip if rank 0 already has highest probability
David Rohr's avatar
David Rohr committed
556
		{
557
558
559
560
561
562
563
564
		  if (mpi_rank == 0)
		    {
		      MPI_Recv(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, MPI_COMM_WORLD, &mpistatus);
		    }
		  else if (mpi_rank == tmpi2[i])
		    {
		      MPI_Send(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, 0, i, MPI_COMM_WORLD);
		    }
David Rohr's avatar
David Rohr committed
565
		}
566
567
568
	    }
	  delete[] tmpi1;
	  delete[] tmpi2;
569
	}
570

David Rohr's avatar
David Rohr committed
571
	if (mpi_rank == 0)
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
	  {
	    for (int i = 0;i < RefMap.ntotRefMap;i++)
	      {
		bioem_Probability_map& pProbMap = pProb.getProbMap(i);
		pProbMap.Total = tmp3[i];
		pProbMap.Constoadd = tmp2[i];
	      }
	  }

	delete[] tmp1;
	delete[] tmp2;
	delete[] tmp3;
	if (DebugOutput >= 1 && mpi_rank == 0 && mpi_size > 1) printf("Time MPI Reduction: %f\n", timer.GetCurrentElapsedTime());
      }

      //Angle Reduction and Probability summation for individual angles
      if (param.param_device.writeAngles)
589
	{
590
591
592
593
594
595
596
597
598
599
600
601
	  const int count = RefMap.ntotRefMap * param.nTotGridAngles;
	  myfloat_t* tmp1 = new myfloat_t[count];
	  myfloat_t* tmp2 = new myfloat_t[count];
	  myfloat_t* tmp3 = new myfloat_t[count];
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      tmp1[i] = pProb.getProbMap(i).Constoadd;
	    }
	  MPI_Allreduce(tmp1, tmp2, count, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
	  for (int i = 0;i < RefMap.ntotRefMap;i++)
	    {
	      for (int j = 0;j < param.nTotGridAngles;j++)
David Rohr's avatar
David Rohr committed
602
		{
603
604
		  bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j);
		  tmp1[i * param.nTotGridAngles + j] = pProbAngle.forAngles * exp(pProbAngle.ConstAngle - tmp2[i * param.nTotGridAngles + j]);
David Rohr's avatar
David Rohr committed
605
		}
606
607
608
609
610
	    }
	  MPI_Reduce(tmp1, tmp3, count, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
	  if (mpi_rank == 0)
	    {
	      for (int i = 0;i < RefMap.ntotRefMap;i++)
611
		{
612
613
614
615
616
617
		  for (int j = 0;j < param.nTotGridAngles;j++)
		    {
		      bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j);
		      pProbAngle.forAngles = tmp3[i * param.nTotGridAngles + j];
		      pProbAngle.ConstAngle = tmp2[i * param.nTotGridAngles + j];
		    }
618
		}
619
620
621
622
623
624
625
	    }
	  delete[] tmp1;
	  delete[] tmp2;
	  delete[] tmp3;
	}
    }
#endif
David Rohr's avatar
David Rohr committed
626

627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
  if (mpi_rank == 0)
    {
      ofstream angProbfile;
      if(param.param_device.writeAngles)
	{
	  angProbfile.open ("ANG_PROB");
	}
      ofstream ccProbfile;
      if(param.param_device.writeCC)
	{
	  ccProbfile.open ("CROSS_CORRELATION");
	}

      ofstream outputProbFile;
      outputProbFile.open ("Output_Probabilities");
642
643
644
645
646
647
        outputProbFile <<"************************* HEADER:: NOTATION *******************************************\n";   
 	outputProbFile << " RefMap:  MapNumber ; LogProb: natural logarithm of posterior Probability ; Constant: Numerical Const. for adding Probabilities \n";
        outputProbFile << " RefMap:  MapNumber ; Maximizing Param: MaxLogProb - alpha - beta - gamma - PSF amp - PSF phase - PSF envelope - center x - center y - normalization - offsett \n";
        if(param.writeCTF) outputProbFile << " RefMap:  MapNumber ; CTFMaxParm: defocus - b-factor (ref. Penzeck 2010)\n";
        outputProbFile <<"************************* HEADER:: NOTATION *******************************************\n\n";

648
649
650
651
652
      for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++)
	{
	  // **** Total Probability ***
	  bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);

653
	  outputProbFile << "RefMap: " << iRefMap << " LogProb:  "  << log(pProbMap.Total) + pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant: " << pProbMap.Constoadd  << "\n";
654

655
	  outputProbFile << "RefMap: " << iRefMap << " Maximizing Param: ";
656
657
658

	  // *** Param that maximize probability****
	  outputProbFile << (pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " ";
659
660
661
662
663
664
665
666
667
668
	  outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[0] << " [rad] ";
	  outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[1] << " [rad] ";
	  outputProbFile << param.angles[pProbMap.max.max_prob_orient].pos[2] << " [rad] ";
	  outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[0] << " [ ] ";
	  outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[1] << " [1./A²] ";
	  outputProbFile << param.CtfParam[pProbMap.max.max_prob_conv].pos[2] << " [1./A²] ";
	  outputProbFile << pProbMap.max.max_prob_cent_x << " [pix] ";
	  outputProbFile << pProbMap.max.max_prob_cent_y << " [pix] " ;
	  outputProbFile << pProbMap.max.max_prob_norm << " [ ] " ; 
	  outputProbFile << pProbMap.max.max_prob_mu << " [ ] ";
669
670
	  outputProbFile << "\n";

671
672
673
674
675
676
677
678

	  if(param.writeCTF){
          myfloat_t denomi;
          denomi = param.CtfParam[pProbMap.max.max_prob_conv].pos[1] * param.CtfParam[pProbMap.max.max_prob_conv].pos[1] + param.CtfParam[pProbMap.max.max_prob_conv].pos[2] * param.CtfParam[pProbMap.max.max_prob_conv].pos[2];
	  outputProbFile << "RefMap: " << iRefMap << " CTFMaxParam: ";
	  outputProbFile <<  2*M_PI*param.CtfParam[pProbMap.max.max_prob_conv].pos[1]/denomi/param.elecwavel << " [micro-m]; ";
          outputProbFile << "2*(" << sqrt(4*M_PI*param.CtfParam[pProbMap.max.max_prob_conv].pos[2]/denomi) << ")² [1./A²] \n";
	}
679
680
681
682
683
	  // *** For individual files*** //angProbfile.open ("ANG_PROB_"iRefMap);

	  if(param.param_device.writeAngles)
	    {
	      for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++)
David Rohr's avatar
David Rohr committed
684
		{
685
		  bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
686

687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
		  angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << log(pProbAngle.forAngles) + pProbAngle.ConstAngle + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << log(param.param_device.volu) << "\n";
		}
	    }
	  if(param.param_device.writeCC)
	    {
	      int  cc=0;
	      int halfPix;
	      int rx=0;
	      int ry=0;
	      myfloat_t localcc[ (param.param_device.NumberPixels+1) * (param.param_device.NumberPixels+1) ];

	      halfPix = param.param_device.NumberPixels / 2 ;
	      // Ordering the centers of the Cross Correlation

	      for (int rx = param.param_device.CCdisplace; rx < param.param_device.NumberPixels -param.param_device.CCdisplace ; rx = rx + param.param_device.CCdisplace)
David Rohr's avatar
David Rohr committed
702
		{
703
704
705
706
		  for (int ry = param.param_device.CCdisplace; ry < param.param_device.NumberPixels - param.param_device.CCdisplace ; ry = ry + param.param_device.CCdisplace)
		    {
		      localcc[ rx * param.param_device.NumberPixels + ry ] = 0.0;
		    }
David Rohr's avatar
David Rohr committed
707
		}
708

709
	      for (int cent_x = 0; cent_x < param.param_device.NumberPixels -param.param_device.CCdisplace ; cent_x = cent_x + param.param_device.CCdisplace)
710
		{
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
		  for (int cent_y = 0; cent_y < param.param_device.NumberPixels - param.param_device.CCdisplace ; cent_y = cent_y + param.param_device.CCdisplace)
		    {
		      //localcc[ rx * param.param_device.NumberPixels + ry ] = 0.0;
		      bioem_Probability_cc& pProbCC = pProb.getProbCC(iRefMap, cc);
		      if(cent_x < halfPix && cent_y < halfPix){
			//	ccProbfile << " " << iRefMap << " " << (myfloat_t) halfPix  - cent_x << " " << halfPix - cent_y << " " << pProbCC.forCC <<"\n";
			rx = halfPix  - cent_x;
			ry = halfPix  - cent_y;}
		      if(cent_x >= halfPix && cent_y < halfPix){
			//      ccProbfile << " " << iRefMap << " " << (myfloat_t) 3 * halfPix  - cent_x << " " << halfPix - cent_y << " " << pProbCC.forCC <<"\n"; 
			rx = 3 * halfPix  - cent_x;
			ry = halfPix  - cent_y;}
		      if(cent_x < halfPix && cent_y >= halfPix){
			//      ccProbfile << " " << iRefMap << " " << (myfloat_t) halfPix  - cent_x << " " << 3 * halfPix - cent_y << " " << pProbCC.forCC <<"\n";
			rx = halfPix  - cent_x;
			ry = 3 * halfPix  - cent_y;}
		      if(cent_x >= halfPix && cent_y >= halfPix){
			//        ccProbfile << " " << iRefMap << " " << 3* halfPix  - cent_x << " " << 3 * halfPix - cent_y << " " << pProbCC.forCC <<"\n";
			rx = 3 * halfPix  - cent_x;
			ry = 3 * halfPix  - cent_y;}
		      //						cout << " TT " << cent_x << " " << rx << " " << cent_y << " " << ry << " " <<  pProbCC.forCC << "\n";
		      localcc[ rx * param.param_device.NumberPixels + ry ] = pProbCC.forCC;
		      cc++;
		    }
		  //              ccProbfile << "\n";
736
		}
737
738
739
740
741
742
743
744
745
746
747
	      for (int rx = param.param_device.CCdisplace; rx < param.param_device.NumberPixels -param.param_device.CCdisplace ; rx = rx + param.param_device.CCdisplace)
		{
		  for (int ry = param.param_device.CCdisplace; ry < param.param_device.NumberPixels - param.param_device.CCdisplace ; ry = ry + param.param_device.CCdisplace)
		    {
		      ccProbfile << rx << " " << ry << " " << localcc[ rx * param.param_device.NumberPixels + ry ] << "\n" ; 
		    }
		  ccProbfile << "\n";
		}			
  
	    }
	}
748

749
750
751
752
753
754
755
756
      if(param.param_device.writeAngles)
	{
	  angProbfile.close();
	}

      if(param.param_device.writeCC)
	{
	  ccProbfile.close();
757
	}
758

759
760
761
762
      outputProbFile.close();
    }

  return(0);
763
764
}

765
int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
766
{
767
768
769
770
771
772
773
  //***************************************************************************************
  //***** BioEM routine for comparing reference maps to convoluted maps *****
  if (FFTAlgo)
    {
      //With FFT Algorithm
#pragma omp parallel for schedule(dynamic, 1)
      for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
774
	{
775
776
	  const int num = omp_get_thread_num();
	  calculateCCFFT(iRefMap, iOrient, iConv, sumC, sumsquareC, localmultFFT, param.fft_scratch_complex[num], param.fft_scratch_real[num]);
777
	}
778
779
780
781
782
783
    }
  else
    {
      //Without FFT Algorithm
#pragma omp parallel for schedule(dynamic, 1)
      for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
784
	{
785
	  compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap);
786
	}
787
788
    }
  return(0);
789
790
}

791
inline void bioem::calculateCCFFT(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC)
792
{
793
794
  //***************************************************************************************
  //***** Calculating cross correlation in FFTALGOrithm *****
Pilar Cossio's avatar
Pilar Cossio committed
795

796
797
798
799
800
801
  const mycomplex_t* RefMapFFT = &RefMap.RefMapsFFT[iRefMap * param.FFTMapSize];
  for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D; i++)
    {
      localCCT[i][0] = localConvFFT[i][0] * RefMapFFT[i][0] + localConvFFT[i][1] * RefMapFFT[i][1];
      localCCT[i][1] = localConvFFT[i][1] * RefMapFFT[i][0] - localConvFFT[i][0] * RefMapFFT[i][1];
    }
802

803
  myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, localCCT, lCC);
804

805
  doRefMapFFT(iRefMap, iOrient, iConv, lCC, sumC, sumsquareC, pProb, param.param_device, RefMap);
Pilar Cossio's avatar
Pilar Cossio committed
806
807

#ifdef PILAR_DEBUG
808
809
810
811
812
813
814
815
816
817
818
  if (param.param_device.writeCC)
    {      int  cc=0;
      for (int cent_x = 0; cent_x < param.param_device.NumberPixels -param.param_device.CCdisplace ; cent_x = cent_x + param.param_device.CCdisplace)
	{
	  for (int cent_y = 0; cent_y < param.param_device.NumberPixels - param.param_device.CCdisplace ; cent_y = cent_y + param.param_device.CCdisplace)
	    {
	      cout << "CHECKCC " << " " << cent_x << " " << cent_y <<" " << lCC[cent_x * param.param_device.NumberPixels + cent_y] / (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels ) << "\n";
	      cc++;
	    }
	}
    }
Pilar Cossio's avatar
Pilar Cossio committed
819
820
#endif

821
}
822

823
int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
824
{
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
  // **************************************************************************************
  // ****  BioEM Create Projection routine in Euler angle predefined grid******************
  // ********************* and turns projection into Fourier space ************************
  // **************************************************************************************

  cuda_custom_timeslot("Projection", 0);

  myfloat3_t RotatedPointsModel[Model.nPointsModel];
  myfloat_t rotmat[3][3];
  myfloat_t alpha, gam, beta;
  myfloat_t* localproj;

  localproj = param.fft_scratch_real[omp_get_thread_num()];
  memset(localproj, 0, param.param_device.NumberPixels * param.param_device.NumberPixels * sizeof(*localproj));

  alpha = param.angles[iMap].pos[0];
  beta = param.angles[iMap].pos[1];
  gam = param.angles[iMap].pos[2];

  // **** To see how things are going: cout << "Id " << omp_get_thread_num() <<  " Angs: " << alpha << " " << beta << " " << gam << "\n"; ***

  // ********** Creat Rotation with pre-defiend grid of orientations**********
  rotmat[0][0] = cos(gam) * cos(alpha) - cos(beta) * sin(alpha) * sin(gam);
  rotmat[0][1] = cos(gam) * sin(alpha) + cos(beta) * cos(alpha) * sin(gam);
  rotmat[0][2] = sin(gam) * sin(beta);
  rotmat[1][0] = -sin(gam) * cos(alpha) - cos(beta) * sin(alpha) * cos(gam);
  rotmat[1][1] = -sin(gam) * sin(alpha) + cos(beta) * cos(alpha) * cos(gam);
  rotmat[1][2] = cos(gam) * sin(beta);
  rotmat[2][0] = sin(beta) * sin(alpha);
  rotmat[2][1] = -sin(beta) * cos(alpha);
  rotmat[2][2] = cos(beta);

  for(int n = 0; n < Model.nPointsModel; n++)
    {
      RotatedPointsModel[n].pos[0] = 0.0;
      RotatedPointsModel[n].pos[1] = 0.0;
      RotatedPointsModel[n].pos[2] = 0.0;
    }
  for(int n = 0; n < Model.nPointsModel; n++)
    {
      for(int k = 0; k < 3; k++)
866
	{
867
868
869
870
	  for(int j = 0; j < 3; j++)
	    {
	      RotatedPointsModel[n].pos[k] += rotmat[k][j] * Model.points[n].point.pos[j];
	    }
871
	}
872
    }
873

874
  int i, j;
875

876
877
878
879
880
  //********** Projection with radius ***************
  if(param.doaaradius)
    {
      int irad;
      myfloat_t dist, rad2;
Pilar Cossio's avatar
Pilar Cossio committed
881

882
883
884
885
886
887
888
889
890
891
892
893
894
      for(int n = 0; n < Model.nPointsModel; n++)
	{
	  //Getting Centers of Sphere
	  i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
	  j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
	  //Getting the radius
	  irad=int( Model.points[n].radius / param.pixelSize );
	  rad2= Model.points[n].radius * Model.points[n].radius;

	  //Projecting over the radius
	  for(int ii= i - irad; ii < i + irad; ii++)
	    {	
	      for(int jj = j - irad; jj < j + irad; jj++)
895
		{
896
897
898
899
900
901
902
		  dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) *  param.pixelSize ;
		  if( dist < rad2 )
		    {
		      localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density
			//							/ Model.NormDen * 3 / (4 * M_PI * rad2 *  Model.points[n].radius); 	
			* 3 / (4 * M_PI * rad2 *  Model.points[n].radius); 
		    }
Pilar Cossio's avatar
Pilar Cossio committed
903
		}
904
	    }
Pilar Cossio's avatar
Pilar Cossio committed
905
	}
906
907
908
909
910
    }
  else 
    {
      // ************ Simple Projection over the Z axis********************
      for(int n = 0; n < Model.nPointsModel; n++)
911
	{
912
913
914
	  //Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 )
	  i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
	  j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
915

916
917
918
919
920
	  if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels)
	    {
	      if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n";
	      continue;
	    }
921

922
	  localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density;//Model.NormDen;
923
	}
924
    }
925

926
  // **** Output Just to check****
927
#ifdef PILAR_DEBUG
928
929
930
931
932
933
934
935
  if(iMap == 10)
    {
      ofstream myexamplemap;
      ofstream myexampleRot;
      myexamplemap.open ("MAP_i10");
      myexampleRot.open ("Rot_i10");
      myexamplemap << "ANGLES " << alpha << " " << beta << " " << gam << "\n";
      for(int k = 0; k < param.param_device.NumberPixels; k++)
936
	{
937
	  for(int j = 0; j < param.param_device.NumberPixels; j++) myexamplemap << "\nMAP " << k << " " << j << " " << localproj[k * param.param_device.NumberPixels + j];
938
	}
939
940
941
942
943
      myexamplemap << " \n";
      for(int n = 0; n < Model.nPointsModel; n++)myexampleRot << "\nCOOR " << RotatedPointsModel[n].pos[0] << " " << RotatedPointsModel[n].pos[1] << " " << RotatedPointsModel[n].pos[2];
      myexamplemap.close();
      myexampleRot.close();
    }
944
#endif
945

946
947
948
  // ***** Converting projection to Fourier Space for Convolution later with kernel****
  // ********** Omp Critical is necessary with FFTW*******
  myfftw_execute_dft_r2c(param.fft_plan_r2c_forward, localproj, mapFFT);
949

950
  cuda_custom_timeslot_end;
951

952
  return(0);
953
954
}

955
int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj, myfloat_t* Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC)
956
{
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
  // **************************************************************************************
  // ****  BioEM Create Convoluted Projection Map routine, multiplies in Fourier **********
  // **************** calculated Projection with convoluted precalculated Kernel***********
  // *************** and Backtransforming it to real Space ********************************
  // **************************************************************************************


  cuda_custom_timeslot("Convolution", 1);

  mycomplex_t* tmp = param.fft_scratch_complex[omp_get_thread_num()];

  // **** Multiplying FFTmap with corresponding kernel ****
  const mycomplex_t* refCTF = &param.refCTF[iConv * param.FFTMapSize];
  for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D; i++)
    {
      localmultFFT[i][0] = ( lproj[i][0] * refCTF[i][0] + lproj[i][1] * refCTF[i][1] ) ;
      localmultFFT[i][1] = ( lproj[i][1] * refCTF[i][0] - lproj[i][0] * refCTF[i][1] ) ;
      // cout << "GG " << i << " " << j << " " << refCTF[i][0] << " " << refCTF[i][1] <<" " <<lproj[i][0] <<" " <<lproj[i][1] << "\n";
    }

  // *** Calculating Cross-correlations of cal-convoluted map with its self *****
  sumC = localmultFFT[0][0];

  sumsquareC = 0;
  if (FFTAlgo)
    {
      int jloopend = param.param_device.NumberFFTPixels1D;
      if ((param.param_device.NumberPixels & 1) == 0) jloopend--;
      for(int i = 0; i < param.param_device.NumberPixels; i++)
	{
	  for (int j = 1;j < jloopend;j++)
	    {
	      int k = i * param.param_device.NumberFFTPixels1D + j;
	      sumsquareC += (localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1]) * 2;
	    }
	  int k = i * param.param_device.NumberFFTPixels1D;
	  sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1];
	  if ((param.param_device.NumberPixels & 1) == 0)
	    {
	      k += param.param_device.NumberFFTPixels1D - 1;
	      sumsquareC += localmultFFT[k][0] * localmultFFT[k][0] + localmultFFT[k][1] * localmultFFT[k][1];
	    }
	}
1000

1001
1002
1003
1004
1005
1006
1007
      myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels);
      sumsquareC = sumsquareC / norm2;
    }
  else
    {
      //FFTW_C2R will destroy the input array, so we have to work on a copy here
      memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
1008

1009
1010
      // **** Bringing convoluted Map to real Space ****
      myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv);
Pilar Cossio's avatar
Pilar Cossio committed
1011

1012
      for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberPixels; i++)
1013
	{
1014
	  sumsquareC += Mapconv[i] * Mapconv[i];
1015
1016
	}

1017
1018
1019
1020
      myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels);
      myfloat_t norm4 = norm2 * norm2;
      sumsquareC = sumsquareC / norm4;
    }
David Rohr's avatar
David Rohr committed
1021

1022
1023
1024
  // **************************************************************************************
  // *********** Routine for printing out the best projetion ******************************
  // **************************************************************************************
David Rohr's avatar
David Rohr committed
1025

1026
1027
1028
  if (mpi_rank == 0 && param.printModel)
    {
      MTRand mtr;
1029

1030
      memcpy(tmp, localmultFFT, sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
1031

1032
1033
      // **** Bringing convoluted Map to real Space ****
      myfftw_execute_dft_c2r(param.fft_plan_c2r_backward, tmp, Mapconv);
1034

1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
      myfloat_t norm2 = (myfloat_t) (param.param_device.NumberPixels * param.param_device.NumberPixels);

      ofstream myexamplemap;
      myexamplemap.open ("BESTMAP");
      for(int k = 0; k < param.param_device.NumberPixels; k++)
	{
	  for(int j = 0; j < param.param_device.NumberPixels; j++) {
	    if(!param.withnoise){
	      myexamplemap << "\nMAP " << k+param.ddx << " " << j+param.ddy << " " <<  Mapconv[k * param.param_device.NumberPixels + j] / norm2 *param.bestnorm +param.bestoff ; 
	    } else {
	      myexamplemap << "\nMAP " << k+param.ddx << " " << j+param.ddy << " " <<  Mapconv[k * param.param_device.NumberPixels + j] / norm2 *param.bestnorm +param.bestoff+ mtr.randNorm(0.0,param.stnoise);
	    }
	  }
	  myexamplemap << " \n";
1049
	}
1050
1051
1052
1053
      myexamplemap.close();

      cout << "\n\nBest map printed in file: BESTMAP with gnuplot format in columns 2, 3 and 4. \n\n\n";
      exit(1);
David Rohr's avatar
David Rohr committed
1054

1055
    }
1056

1057
1058
1059
  cuda_custom_timeslot_end;

  return(0);
1060
1061
}

1062
int bioem::calcross_cor(myfloat_t* localmap, myfloat_t& sum, myfloat_t& sumsquare)
1063
{
1064
  // *********************** Routine to calculate Cross correlations***********************
1065

1066
1067
1068
1069
1070
  sum = 0.0;
  sumsquare = 0.0;
  for (int i = 0; i < param.param_device.NumberPixels; i++)
    {
      for (int j = 0; j < param.param_device.NumberPixels; j++)
1071
	{
1072
1073
1074
1075
	  // Calculate Sum of pixels
	  sum += localmap[i * param.param_device.NumberPixels + j];
	  // Calculate Sum of pixels squared
	  sumsquare += localmap[i * param.param_device.NumberPixels + j] * localmap[i * param.param_device.NumberPixels + j];
1076
	}
1077
1078
    }
  return(0);
1079
1080
1081
1082
}

int bioem::deviceInit()
{
1083
  return(0);
1084
1085
1086
1087
}

int bioem::deviceStartRun()
{
1088
  return(0);
1089
1090
1091
1092
}

int bioem::deviceFinishRun()
{
1093
  return(0);
1094
}
1095
1096
1097

void* bioem::malloc_device_host(size_t size)
{
1098
  return(mallocchk(size));
1099
1100
1101
1102
}

void bioem::free_device_host(void* ptr)
{
1103
  free(ptr);
1104
}