Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIBP-Hummer
BioEM
Commits
c9754f9e
Commit
c9754f9e
authored
Apr 06, 2014
by
qon
Browse files
Change all line endings in files to unix style
parent
27a1ff28
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
bioem.cpp
View file @
c9754f9e
This diff is collapsed.
Click to expand it.
bioem_cuda.cu
View file @
c9754f9e
#define BIOEM_GPUCODE
#if defined(_WIN32)
#include
<windows.h>
#endif
#include
<iostream>
using
namespace
std
;
#include
"bioem_cuda_internal.h"
#include
"bioem_algorithm.h"
//__constant__ bioem_map gConvMap;
static
inline
void
checkCudaErrors
(
cudaError_t
error
)
{
if
(
error
!=
cudaSuccess
)
{
printf
(
"CUDA Error %d / %s
\n
"
,
error
,
cudaGetErrorString
(
error
));
exit
(
1
);
}
}
bioem_cuda
::
bioem_cuda
()
{
deviceInitialized
=
0
;
GPUAlgo
=
getenv
(
"GPUALGO"
)
==
NULL
?
2
:
atoi
(
getenv
(
"GPUALGO"
));
GPUAsync
=
getenv
(
"GPUASYNC"
)
==
NULL
?
0
:
atoi
(
getenv
(
"GPUASYNC"
));
}
bioem_cuda
::~
bioem_cuda
()
{
deviceExit
();
}
__global__
void
compareRefMap_kernel
(
const
int
iOrient
,
const
int
iConv
,
const
bioem_map
*
pMap
,
bioem_Probability
*
pProb
,
const
bioem_param_device
param
,
const
bioem_RefMap_Mod
*
RefMap
,
const
int
cent_x
,
const
int
cent_y
)
{
const
int
iRefMap
=
myBlockIdxX
*
myBlockDimX
+
myThreadIdxX
;
if
(
iRefMap
<
RefMap
->
ntotRefMap
)
{
compareRefMap
<
0
>
(
iRefMap
,
iOrient
,
iConv
,
*
pMap
,
pProb
,
param
,
*
RefMap
,
cent_x
,
cent_y
);
}
}
__global__
void
compareRefMapShifted_kernel
(
const
int
iOrient
,
const
int
iConv
,
const
bioem_map
*
pMap
,
bioem_Probability
*
pProb
,
const
bioem_param_device
param
,
const
bioem_RefMap_Mod
*
RefMap
)
{
const
int
iRefMap
=
myBlockIdxX
*
myBlockDimX
+
myThreadIdxX
;
if
(
iRefMap
<
RefMap
->
ntotRefMap
)
{
compareRefMapShifted
<
1
>
(
iRefMap
,
iOrient
,
iConv
,
*
pMap
,
pProb
,
param
,
*
RefMap
);
}
}
__global__
void
compareRefMapLoopShifts_kernel
(
const
int
iOrient
,
const
int
iConv
,
const
bioem_map
*
pMap
,
bioem_Probability
*
pProb
,
const
bioem_param_device
param
,
const
bioem_RefMap
*
RefMap
,
const
int
blockoffset
,
const
int
nShifts
,
const
int
nShiftBits
)
{
const
size_t
myid
=
(
myBlockIdxX
+
blockoffset
)
*
myBlockDimX
+
myThreadIdxX
;
const
int
iRefMap
=
myid
>>
(
nShiftBits
<<
1
);
const
int
myRef
=
myThreadIdxX
>>
(
nShiftBits
<<
1
);
const
int
myShiftIdx
=
(
myid
>>
nShiftBits
)
&
(
nShifts
-
1
);
const
int
myShiftIdy
=
myid
&
(
nShifts
-
1
);
const
int
myShift
=
myid
&
(
nShifts
*
nShifts
-
1
);
const
int
cent_x
=
myShiftIdx
*
param
.
GridSpaceCenter
-
param
.
maxDisplaceCenter
;
const
int
cent_y
=
myShiftIdy
*
param
.
GridSpaceCenter
-
param
.
maxDisplaceCenter
;
compareRefMap
<
2
>
(
iRefMap
,
iOrient
,
iConv
,
*
pMap
,
pProb
,
param
,
*
RefMap
,
cent_x
,
cent_y
,
myShift
,
nShifts
*
nShifts
,
myRef
);
}
template
<
class
T
>
static
inline
T
divup
(
T
num
,
T
divider
)
{
return
((
num
+
divider
-
1
)
/
divider
);}
static
inline
bool
IsPowerOf2
(
int
x
)
{
return
((
x
>
0
)
&&
((
x
&
(
x
-
1
))
==
0
));}
#if defined(_WIN32)
static
inline
int
ilog2
(
int
value
)
{
DWORD
index
;
_BitScanReverse
(
&
index
,
value
);
return
(
value
);
}
#else
static
inline
int
ilog2
(
int
value
)
{
return
31
-
__builtin_clz
(
value
);}
#endif
int
bioem_cuda
::
compareRefMaps
(
int
iProjectionOut
,
int
iConv
,
const
bioem_map
&
conv_map
)
{
//cudaMemcpyToSymbolAsync(&gConvMap, &conv_map, sizeof(bioem_map), 0, cudaMemcpyHostToDevice, cudaStream);
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventSynchronize
(
cudaEvent
[
iConv
&
1
]));
}
checkCudaErrors
(
cudaMemcpyAsync
(
pConvMap_device
[
iConv
&
1
],
&
conv_map
,
sizeof
(
bioem_map
),
cudaMemcpyHostToDevice
,
cudaStream
));
if
(
GPUAlgo
==
2
)
//Loop over shifts
{
const
int
nShifts
=
2
*
param
.
param_device
.
maxDisplaceCenter
/
param
.
param_device
.
GridSpaceCenter
+
1
;
if
(
!
IsPowerOf2
(
nShifts
))
{
cout
<<
"Invalid number of displacements, no power of two
\n
"
;
exit
(
1
);
}
if
(
CUDA_THREAD_COUNT
%
(
nShifts
*
nShifts
))
{
cout
<<
"CUDA Thread count ("
<<
CUDA_THREAD_COUNT
<<
") is no multiple of number of shifts ("
<<
(
nShifts
*
nShifts
)
<<
")
\n
"
;
exit
(
1
);
}
if
(
nShifts
>
CUDA_MAX_SHIFT_REDUCE
)
{
cout
<<
"Too many displacements for CUDA reduction
\n
"
;
exit
(
1
);
}
const
int
nShiftBits
=
ilog2
(
nShifts
);
size_t
totalBlocks
=
divup
((
size_t
)
RefMap
.
ntotRefMap
*
(
size_t
)
nShifts
*
(
size_t
)
nShifts
,
(
size_t
)
CUDA_THREAD_COUNT
);
size_t
nBlocks
=
CUDA_BLOCK_COUNT
;
for
(
size_t
i
=
0
;
i
<
totalBlocks
;
i
+=
nBlocks
)
{
compareRefMapLoopShifts_kernel
<<<
min
(
nBlocks
,
totalBlocks
-
i
),
CUDA_THREAD_COUNT
,
(
CUDA_THREAD_COUNT
*
2
+
CUDA_THREAD_COUNT
/
(
nShifts
*
nShifts
)
*
4
)
*
sizeof
(
myfloat_t
),
cudaStream
>>>
(
iProjectionOut
,
iConv
,
pConvMap_device
[
iConv
&
1
],
pProb_device
,
param
.
param_device
,
pRefMap_device
,
i
,
nShifts
,
nShiftBits
);
}
}
else
if
(
GPUAlgo
==
1
)
//Split shifts in multiple kernels
{
for
(
int
cent_x
=
-
param
.
param_device
.
maxDisplaceCenter
;
cent_x
<=
param
.
param_device
.
maxDisplaceCenter
;
cent_x
=
cent_x
+
param
.
param_device
.
GridSpaceCenter
)
{
for
(
int
cent_y
=
-
param
.
param_device
.
maxDisplaceCenter
;
cent_y
<=
param
.
param_device
.
maxDisplaceCenter
;
cent_y
=
cent_y
+
param
.
param_device
.
GridSpaceCenter
)
{
compareRefMap_kernel
<<<
divup
(
RefMap
.
ntotRefMap
,
CUDA_THREAD_COUNT
),
CUDA_THREAD_COUNT
,
0
,
cudaStream
>>>
(
iProjectionOut
,
iConv
,
pConvMap_device
[
iConv
&
1
],
pProb_device
,
param
.
param_device
,
pRefMap_device_Mod
,
cent_x
,
cent_y
);
}
}
}
else
if
(
GPUAlgo
==
0
)
//All shifts in one kernel
{
compareRefMapShifted_kernel
<<<
divup
(
RefMap
.
ntotRefMap
,
CUDA_THREAD_COUNT
),
CUDA_THREAD_COUNT
,
0
,
cudaStream
>>>
(
iProjectionOut
,
iConv
,
pConvMap_device
[
iConv
&
1
],
pProb_device
,
param
.
param_device
,
pRefMap_device_Mod
);
}
else
{
cout
<<
"Invalid GPU Algorithm selected
\n
"
;
exit
(
1
);
}
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
iConv
&
1
],
cudaStream
));
}
else
{
checkCudaErrors
(
cudaStreamSynchronize
(
cudaStream
));
}
return
(
0
);
}
int
bioem_cuda
::
deviceInit
()
{
deviceExit
();
checkCudaErrors
(
cudaStreamCreate
(
&
cudaStream
));
checkCudaErrors
(
cudaMalloc
(
&
pRefMap_device
,
sizeof
(
bioem_RefMap
)));
checkCudaErrors
(
cudaMalloc
(
&
pProb_device
,
sizeof
(
bioem_Probability
)
*
RefMap
.
ntotRefMap
));
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
checkCudaErrors
(
cudaEventCreate
(
&
cudaEvent
[
i
]));
checkCudaErrors
(
cudaMalloc
(
&
pConvMap_device
[
i
],
sizeof
(
bioem_map
)));
}
pRefMap_device_Mod
=
(
bioem_RefMap_Mod
*
)
pRefMap_device
;
if
(
GPUAlgo
==
0
||
GPUAlgo
==
1
)
{
bioem_RefMap_Mod
*
RefMapGPU
=
new
bioem_RefMap_Mod
(
RefMap
);
cudaMemcpy
(
pRefMap_device_Mod
,
RefMapGPU
,
sizeof
(
bioem_RefMap
),
cudaMemcpyHostToDevice
);
delete
RefMapGPU
;
}
else
{
cudaMemcpy
(
pRefMap_device
,
&
RefMap
,
sizeof
(
bioem_RefMap
),
cudaMemcpyHostToDevice
);
}
deviceInitialized
=
1
;
return
(
0
);
}
int
bioem_cuda
::
deviceExit
()
{
if
(
deviceInitialized
==
0
)
return
(
0
);
cudaStreamDestroy
(
cudaStream
);
cudaFree
(
pRefMap_device
);
cudaFree
(
pProb_device
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
cudaEventDestroy
(
cudaEvent
[
i
]);
cudaFree
(
pConvMap_device
);
}
cudaThreadExit
();
deviceInitialized
=
0
;
return
(
0
);
}
int
bioem_cuda
::
deviceStartRun
()
{
cudaMemcpy
(
pProb_device
,
pProb
,
sizeof
(
bioem_Probability
)
*
RefMap
.
ntotRefMap
,
cudaMemcpyHostToDevice
);
return
(
0
);
}
int
bioem_cuda
::
deviceFinishRun
()
{
if
(
GPUAsync
)
cudaStreamSynchronize
(
cudaStream
);
cudaMemcpy
(
pProb
,
pProb_device
,
sizeof
(
bioem_Probability
)
*
RefMap
.
ntotRefMap
,
cudaMemcpyDeviceToHost
);
return
(
0
);
}
bioem
*
bioem_cuda_create
()
{
return
new
bioem_cuda
;
}
#define BIOEM_GPUCODE
#if defined(_WIN32)
#include
<windows.h>
#endif
#include
<iostream>
using
namespace
std
;
#include
"bioem_cuda_internal.h"
#include
"bioem_algorithm.h"
//__constant__ bioem_map gConvMap;
static
inline
void
checkCudaErrors
(
cudaError_t
error
)
{
if
(
error
!=
cudaSuccess
)
{
printf
(
"CUDA Error %d / %s
\n
"
,
error
,
cudaGetErrorString
(
error
));
exit
(
1
);
}
}
bioem_cuda
::
bioem_cuda
()
{
deviceInitialized
=
0
;
GPUAlgo
=
getenv
(
"GPUALGO"
)
==
NULL
?
2
:
atoi
(
getenv
(
"GPUALGO"
));
GPUAsync
=
getenv
(
"GPUASYNC"
)
==
NULL
?
0
:
atoi
(
getenv
(
"GPUASYNC"
));
}
bioem_cuda
::~
bioem_cuda
()
{
deviceExit
();
}
__global__
void
compareRefMap_kernel
(
const
int
iOrient
,
const
int
iConv
,
const
bioem_map
*
pMap
,
bioem_Probability
*
pProb
,
const
bioem_param_device
param
,
const
bioem_RefMap_Mod
*
RefMap
,
const
int
cent_x
,
const
int
cent_y
)
{
const
int
iRefMap
=
myBlockIdxX
*
myBlockDimX
+
myThreadIdxX
;
if
(
iRefMap
<
RefMap
->
ntotRefMap
)
{
compareRefMap
<
0
>
(
iRefMap
,
iOrient
,
iConv
,
*
pMap
,
pProb
,
param
,
*
RefMap
,
cent_x
,
cent_y
);
}
}
__global__
void
compareRefMapShifted_kernel
(
const
int
iOrient
,
const
int
iConv
,
const
bioem_map
*
pMap
,
bioem_Probability
*
pProb
,
const
bioem_param_device
param
,
const
bioem_RefMap_Mod
*
RefMap
)
{
const
int
iRefMap
=
myBlockIdxX
*
myBlockDimX
+
myThreadIdxX
;
if
(
iRefMap
<
RefMap
->
ntotRefMap
)
{
compareRefMapShifted
<
1
>
(
iRefMap
,
iOrient
,
iConv
,
*
pMap
,
pProb
,
param
,
*
RefMap
);
}
}
__global__
void
compareRefMapLoopShifts_kernel
(
const
int
iOrient
,
const
int
iConv
,
const
bioem_map
*
pMap
,
bioem_Probability
*
pProb
,
const
bioem_param_device
param
,
const
bioem_RefMap
*
RefMap
,
const
int
blockoffset
,
const
int
nShifts
,
const
int
nShiftBits
)
{
const
size_t
myid
=
(
myBlockIdxX
+
blockoffset
)
*
myBlockDimX
+
myThreadIdxX
;
const
int
iRefMap
=
myid
>>
(
nShiftBits
<<
1
);
const
int
myRef
=
myThreadIdxX
>>
(
nShiftBits
<<
1
);
const
int
myShiftIdx
=
(
myid
>>
nShiftBits
)
&
(
nShifts
-
1
);
const
int
myShiftIdy
=
myid
&
(
nShifts
-
1
);
const
int
myShift
=
myid
&
(
nShifts
*
nShifts
-
1
);
const
int
cent_x
=
myShiftIdx
*
param
.
GridSpaceCenter
-
param
.
maxDisplaceCenter
;
const
int
cent_y
=
myShiftIdy
*
param
.
GridSpaceCenter
-
param
.
maxDisplaceCenter
;
compareRefMap
<
2
>
(
iRefMap
,
iOrient
,
iConv
,
*
pMap
,
pProb
,
param
,
*
RefMap
,
cent_x
,
cent_y
,
myShift
,
nShifts
*
nShifts
,
myRef
);
}
template
<
class
T
>
static
inline
T
divup
(
T
num
,
T
divider
)
{
return
((
num
+
divider
-
1
)
/
divider
);}
static
inline
bool
IsPowerOf2
(
int
x
)
{
return
((
x
>
0
)
&&
((
x
&
(
x
-
1
))
==
0
));}
#if defined(_WIN32)
static
inline
int
ilog2
(
int
value
)
{
DWORD
index
;
_BitScanReverse
(
&
index
,
value
);
return
(
value
);
}
#else
static
inline
int
ilog2
(
int
value
)
{
return
31
-
__builtin_clz
(
value
);}
#endif
int
bioem_cuda
::
compareRefMaps
(
int
iProjectionOut
,
int
iConv
,
const
bioem_map
&
conv_map
)
{
//cudaMemcpyToSymbolAsync(&gConvMap, &conv_map, sizeof(bioem_map), 0, cudaMemcpyHostToDevice, cudaStream);
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventSynchronize
(
cudaEvent
[
iConv
&
1
]));
}
checkCudaErrors
(
cudaMemcpyAsync
(
pConvMap_device
[
iConv
&
1
],
&
conv_map
,
sizeof
(
bioem_map
),
cudaMemcpyHostToDevice
,
cudaStream
));
if
(
GPUAlgo
==
2
)
//Loop over shifts
{
const
int
nShifts
=
2
*
param
.
param_device
.
maxDisplaceCenter
/
param
.
param_device
.
GridSpaceCenter
+
1
;
if
(
!
IsPowerOf2
(
nShifts
))
{
cout
<<
"Invalid number of displacements, no power of two
\n
"
;
exit
(
1
);
}
if
(
CUDA_THREAD_COUNT
%
(
nShifts
*
nShifts
))
{
cout
<<
"CUDA Thread count ("
<<
CUDA_THREAD_COUNT
<<
") is no multiple of number of shifts ("
<<
(
nShifts
*
nShifts
)
<<
")
\n
"
;
exit
(
1
);
}
if
(
nShifts
>
CUDA_MAX_SHIFT_REDUCE
)
{
cout
<<
"Too many displacements for CUDA reduction
\n
"
;
exit
(
1
);
}
const
int
nShiftBits
=
ilog2
(
nShifts
);
size_t
totalBlocks
=
divup
((
size_t
)
RefMap
.
ntotRefMap
*
(
size_t
)
nShifts
*
(
size_t
)
nShifts
,
(
size_t
)
CUDA_THREAD_COUNT
);
size_t
nBlocks
=
CUDA_BLOCK_COUNT
;
for
(
size_t
i
=
0
;
i
<
totalBlocks
;
i
+=
nBlocks
)
{
compareRefMapLoopShifts_kernel
<<<
min
(
nBlocks
,
totalBlocks
-
i
),
CUDA_THREAD_COUNT
,
(
CUDA_THREAD_COUNT
*
2
+
CUDA_THREAD_COUNT
/
(
nShifts
*
nShifts
)
*
4
)
*
sizeof
(
myfloat_t
),
cudaStream
>>>
(
iProjectionOut
,
iConv
,
pConvMap_device
[
iConv
&
1
],
pProb_device
,
param
.
param_device
,
pRefMap_device
,
i
,
nShifts
,
nShiftBits
);
}
}
else
if
(
GPUAlgo
==
1
)
//Split shifts in multiple kernels
{
for
(
int
cent_x
=
-
param
.
param_device
.
maxDisplaceCenter
;
cent_x
<=
param
.
param_device
.
maxDisplaceCenter
;
cent_x
=
cent_x
+
param
.
param_device
.
GridSpaceCenter
)
{
for
(
int
cent_y
=
-
param
.
param_device
.
maxDisplaceCenter
;
cent_y
<=
param
.
param_device
.
maxDisplaceCenter
;
cent_y
=
cent_y
+
param
.
param_device
.
GridSpaceCenter
)
{
compareRefMap_kernel
<<<
divup
(
RefMap
.
ntotRefMap
,
CUDA_THREAD_COUNT
),
CUDA_THREAD_COUNT
,
0
,
cudaStream
>>>
(
iProjectionOut
,
iConv
,
pConvMap_device
[
iConv
&
1
],
pProb_device
,
param
.
param_device
,
pRefMap_device_Mod
,
cent_x
,
cent_y
);
}
}
}
else
if
(
GPUAlgo
==
0
)
//All shifts in one kernel
{
compareRefMapShifted_kernel
<<<
divup
(
RefMap
.
ntotRefMap
,
CUDA_THREAD_COUNT
),
CUDA_THREAD_COUNT
,
0
,
cudaStream
>>>
(
iProjectionOut
,
iConv
,
pConvMap_device
[
iConv
&
1
],
pProb_device
,
param
.
param_device
,
pRefMap_device_Mod
);
}
else
{
cout
<<
"Invalid GPU Algorithm selected
\n
"
;
exit
(
1
);
}
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
iConv
&
1
],
cudaStream
));
}
else
{
checkCudaErrors
(
cudaStreamSynchronize
(
cudaStream
));
}
return
(
0
);
}
int
bioem_cuda
::
deviceInit
()
{
deviceExit
();
checkCudaErrors
(
cudaStreamCreate
(
&
cudaStream
));
checkCudaErrors
(
cudaMalloc
(
&
pRefMap_device
,
sizeof
(
bioem_RefMap
)));
checkCudaErrors
(
cudaMalloc
(
&
pProb_device
,
sizeof
(
bioem_Probability
)
*
RefMap
.
ntotRefMap
));
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
checkCudaErrors
(
cudaEventCreate
(
&
cudaEvent
[
i
]));
checkCudaErrors
(
cudaMalloc
(
&
pConvMap_device
[
i
],
sizeof
(
bioem_map
)));
}
pRefMap_device_Mod
=
(
bioem_RefMap_Mod
*
)
pRefMap_device
;
if
(
GPUAlgo
==
0
||
GPUAlgo
==
1
)
{
bioem_RefMap_Mod
*
RefMapGPU
=
new
bioem_RefMap_Mod
(
RefMap
);
cudaMemcpy
(
pRefMap_device_Mod
,
RefMapGPU
,
sizeof
(
bioem_RefMap
),
cudaMemcpyHostToDevice
);
delete
RefMapGPU
;
}
else
{
cudaMemcpy
(
pRefMap_device
,
&
RefMap
,
sizeof
(
bioem_RefMap
),
cudaMemcpyHostToDevice
);
}
deviceInitialized
=
1
;
return
(
0
);
}
int
bioem_cuda
::
deviceExit
()
{
if
(
deviceInitialized
==
0
)
return
(
0
);
cudaStreamDestroy
(
cudaStream
);
cudaFree
(
pRefMap_device
);
cudaFree
(
pProb_device
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
cudaEventDestroy
(
cudaEvent
[
i
]);
cudaFree
(
pConvMap_device
);
}
cudaThreadExit
();
deviceInitialized
=
0
;
return
(
0
);
}
int
bioem_cuda
::
deviceStartRun
()
{
cudaMemcpy
(
pProb_device
,
pProb
,
sizeof
(
bioem_Probability
)
*
RefMap
.
ntotRefMap
,
cudaMemcpyHostToDevice
);
return
(
0
);
}
int
bioem_cuda
::
deviceFinishRun
()
{
if
(
GPUAsync
)
cudaStreamSynchronize
(
cudaStream
);
cudaMemcpy
(
pProb
,
pProb_device
,
sizeof
(
bioem_Probability
)
*
RefMap
.
ntotRefMap
,
cudaMemcpyDeviceToHost
);
return
(
0
);
}
bioem
*
bioem_cuda_create
()
{
return
new
bioem_cuda
;
}
include/bioem.h
View file @
c9754f9e
#ifndef BIOEM_H
#define BIOEM_H
#include
<fstream>
#include
<iostream>
#include
<complex>
#include
"defs.h"
#include
"bioem.h"
#include
"model.h"
#include
"map.h"
#include
"param.h"
class
bioem
{
public:
bioem
();
virtual
~
bioem
();
int
configure
(
int
ac
,
char
*
av
[]);
int
precalculate
();
// Is it better to pass directly the input File names?
int
dopreCalCrossCorrelation
(
int
iRefMap
,
int
iRefMapLocal
);
int
run
();
int
doProjections
(
int
iMap
);
int
createConvolutedProjectionMap
(
int
iOreint
,
int
iMap
,
mycomplex_t
*
lproj
,
bioem_map
&
Mapconv
);
virtual
int
compareRefMaps
(
int
iProjectionOut
,
int
iConv
,
const
bioem_map
&
conv_map
);
int
createProjection
(
int
iMap
,
mycomplex_t
*
map
);
int
calcross_cor
(
bioem_map
&
localmap
,
myfloat_t
&
sum
,
myfloat_t
&
sumsquare
);
bioem_Probability
*
pProb
;
protected:
virtual
int
deviceInit
();
virtual
int
deviceStartRun
();
virtual
int
deviceFinishRun
();
bioem_param
param
;
bioem_model
Model
;
bioem_RefMap
RefMap
;
int
nReferenceMaps
;
//Maps in memory at a time
int
nReferenceMapsTotal
;
//Maps in total
int
nProjectionMaps
;
//Maps in memory at a time
int
nProjectionMapsTotal
;
//Maps in total
};
#endif
#ifndef BIOEM_H
#define BIOEM_H
#include
<fstream>
#include
<iostream>
#include
<complex>
#include
"defs.h"
#include
"bioem.h"
#include
"model.h"
#include
"map.h"
#include
"param.h"
class
bioem
{
public:
bioem
();
virtual
~
bioem
();
int
configure
(
int
ac
,
char
*
av
[]);
int
precalculate
();
// Is it better to pass directly the input File names?
int
dopreCalCrossCorrelation
(
int
iRefMap
,
int
iRefMapLocal
);
int
run
();
int
doProjections
(
int
iMap
);
int
createConvolutedProjectionMap
(
int
iOreint
,
int
iMap
,
mycomplex_t
*
lproj
,
bioem_map
&
Mapconv
);
virtual
int
compareRefMaps
(
int
iProjectionOut
,
int
iConv
,
const
bioem_map
&
conv_map
);
int
createProjection
(
int
iMap
,
mycomplex_t
*
map
);
int
calcross_cor
(
bioem_map
&
localmap
,
myfloat_t
&
sum
,
myfloat_t
&
sumsquare
);
bioem_Probability
*
pProb
;
protected:
virtual
int
deviceInit
();
virtual
int
deviceStartRun
();
virtual
int
deviceFinishRun
();
bioem_param
param
;
bioem_model
Model
;
bioem_RefMap
RefMap
;
int
nReferenceMaps
;
//Maps in memory at a time
int
nReferenceMapsTotal
;
//Maps in total
int
nProjectionMaps
;
//Maps in memory at a time
int
nProjectionMapsTotal
;
//Maps in total
};
#endif
include/defs.h
View file @
c9754f9e
#ifndef BIOEM_DEFS_H
#define BIOEM_DEFS_H
typedef
float
myfloat_t
;
typedef
double
mycomplex_t
[
2
];
#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU
#define BIOEM_MAP_SIZE_X 224
#define BIOEM_MAP_SIZE_Y 224
#define BIOEM_MODEL_SIZE 120000
#define BIOEM_MAX_MAPS 12000
#define MAX_REF_CTF 200
#define MAX_ORIENT 20000
struct
myfloat3_t
{
myfloat_t
pos
[
BIOEM_FLOAT_3_PHYSICAL_SIZE
];
};
#ifdef BIOEM_GPUCODE
#define myThreadIdxX threadIdx.x
#define myThreadIdxY threadIdx.y
#define myBlockDimX blockDim.x
#define myBlockDimY blockDim.y
#define myBlockIdxX blockIdx.x
#define myBlockIdxY blockIdx.y
#else
#define __device__
#define __host__
#define myThreadIdxX 0
#define myThreadIdxY 0
#define myBlockDimX 1
#define myBlockDimY 1
#define myBlockIdxX 0
#define myBlockIdxY 0
#endif
#define CUDA_THREAD_COUNT 256
#define CUDA_BLOCK_COUNT 1024 * 16
#define CUDA_MAX_SHIFT_REDUCE 1024
#endif
#ifndef BIOEM_DEFS_H
#define BIOEM_DEFS_H
typedef
float
myfloat_t
;
typedef
double
mycomplex_t
[
2
];
#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU
#define BIOEM_MAP_SIZE_X 224
#define BIOEM_MAP_SIZE_Y 224
#define BIOEM_MODEL_SIZE 120000
#define BIOEM_MAX_MAPS 12000
#define MAX_REF_CTF 200
#define MAX_ORIENT 20000
struct
myfloat3_t
{
myfloat_t
pos
[
BIOEM_FLOAT_3_PHYSICAL_SIZE
];
};
#ifdef BIOEM_GPUCODE
#define myThreadIdxX threadIdx.x
#define myThreadIdxY threadIdx.y
#define myBlockDimX blockDim.x
#define myBlockDimY blockDim.y
#define myBlockIdxX blockIdx.x
#define myBlockIdxY blockIdx.y
#else
#define __device__
#define __host__
#define myThreadIdxX 0
#define myThreadIdxY 0
#define myBlockDimX 1
#define myBlockDimY 1
#define myBlockIdxX 0
#define myBlockIdxY 0
#endif
#define CUDA_THREAD_COUNT 256
#define CUDA_BLOCK_COUNT 1024 * 16
#define CUDA_MAX_SHIFT_REDUCE 1024
#endif
include/map.h
View file @
c9754f9e
#ifndef BIOEM_MAP_H
#define BIOEM_MAP_H