Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIBP-Hummer
BioEM
Commits
89cc32a4
Commit
89cc32a4
authored
Apr 20, 2014
by
David Rohr
Browse files
use host pinned memory for dma transfers
parent
7a018205
Changes
6
Hide whitespace changes
Inline
Side-by-side
bioem.cpp
View file @
89cc32a4
...
...
@@ -168,7 +168,7 @@ int bioem::configure(int ac, char* av[])
param
.
nTotCTFs
=
atoi
(
getenv
(
"BIOEM_DEBUG_BREAK"
));
}
pProb
.
init
(
RefMap
.
ntotRefMap
,
param
.
nTotGridAngles
);
pProb
.
init
(
RefMap
.
ntotRefMap
,
param
.
nTotGridAngles
,
*
this
);
deviceInit
();
...
...
@@ -178,7 +178,7 @@ int bioem::configure(int ac, char* av[])
void
bioem
::
cleanup
()
{
//Deleting allocated pointers
free
(
pProb
.
ptr
);
free
_device_host
(
pProb
.
ptr
);
RefMap
.
freePointers
();
}
...
...
@@ -556,3 +556,13 @@ int bioem::deviceFinishRun()
{
return
(
0
);
}
void
*
bioem
::
malloc_device_host
(
size_t
size
)
{
return
(
mallocchk
(
size
));
}
void
bioem
::
free_device_host
(
void
*
ptr
)
{
free
(
ptr
);
}
bioem_cuda.cu
View file @
89cc32a4
...
...
@@ -166,7 +166,8 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map
if
(
FFTAlgo
)
{
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
0
]));
memcpy
(
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
0
]));
if
(
GPUDualStream
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaFFTEvent
[
0
],
cudaStream
[
0
]));
...
...
@@ -309,6 +310,7 @@ int bioem_cuda::deviceInit()
pFFTtmp2
[
1
]
=
pFFTtmp2
[
0
]
+
CUDA_FFTS_AT_ONCE
*
param
.
FFTMapSize
;
pFFTtmp
[
1
]
=
pFFTtmp
[
0
]
+
CUDA_FFTS_AT_ONCE
*
param
.
param_device
.
NumberPixels
*
param
.
param_device
.
NumberPixels
;
checkCudaErrors
(
cudaMalloc
(
&
pConvMapFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
)
*
2
));
checkCudaErrors
(
cudaHostAlloc
(
&
pConvMapFFT_Host
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
)
*
2
,
0
));
checkCudaErrors
(
cudaMemcpy
(
pRefMapsFFT
,
RefMap
.
RefMapsFFT
,
RefMap
.
ntotRefMap
*
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
));
}
...
...
@@ -335,6 +337,7 @@ int bioem_cuda::deviceExit()
{
cudaFree
(
pRefMapsFFT
);
cudaFree
(
pConvMapFFT
);
cudaFreeHost
(
pConvMapFFT_Host
);
cudaFree
(
pFFTtmp
[
0
]);
cudaFree
(
pFFTtmp2
[
0
]);
}
...
...
@@ -357,7 +360,7 @@ int bioem_cuda::deviceStartRun()
{
maxRef
=
GPUWorkload
>=
100
?
RefMap
.
ntotRefMap
:
((
size_t
)
RefMap
.
ntotRefMap
*
(
size_t
)
GPUWorkload
/
100
);
cudaMemcpy
(
pProb_device
.
ptr
,
pProb
.
ptr
,
pProb
.
get_size
(
RefMap
.
ntotRefMap
,
param
.
nTotGridAngles
),
cudaMemcpyHostToDevice
);
cudaMemcpy
Async
(
pProb_device
.
ptr
,
pProb
.
ptr
,
pProb
.
get_size
(
RefMap
.
ntotRefMap
,
param
.
nTotGridAngles
),
cudaMemcpyHostToDevice
,
cudaStream
[
0
]
);
if
(
FFTAlgo
)
{
...
...
@@ -391,7 +394,7 @@ int bioem_cuda::deviceStartRun()
int
bioem_cuda
::
deviceFinishRun
()
{
if
(
GPUAsync
)
cudaStreamSynchronize
(
cudaStream
[
0
]);
cudaMemcpy
(
pProb
.
ptr
,
pProb_device
.
ptr
,
pProb
.
get_size
(
RefMap
.
ntotRefMap
,
param
.
nTotGridAngles
),
cudaMemcpyDeviceToHost
);
cudaMemcpy
Async
(
pProb
.
ptr
,
pProb_device
.
ptr
,
pProb
.
get_size
(
RefMap
.
ntotRefMap
,
param
.
nTotGridAngles
),
cudaMemcpyDeviceToHost
,
cudaStream
[
0
]
);
if
(
FFTAlgo
)
{
...
...
@@ -405,6 +408,18 @@ int bioem_cuda::deviceFinishRun()
return
(
0
);
}
void
*
bioem_cuda
::
malloc_device_host
(
size_t
size
)
{
void
*
ptr
;
checkCudaErrors
(
cudaHostAlloc
(
&
ptr
,
size
,
0
));
return
(
ptr
);
}
void
bioem_cuda
::
free_device_host
(
void
*
ptr
)
{
cudaFreeHost
(
ptr
);
}
bioem
*
bioem_cuda_create
()
{
return
new
bioem_cuda
;
...
...
include/bioem.h
View file @
89cc32a4
...
...
@@ -29,6 +29,9 @@ public:
virtual
int
compareRefMaps
(
int
iOrient
,
int
iConv
,
const
myfloat_t
*
conv_map
,
mycomplex_t
*
localmultFFT
,
myfloat_t
sumC
,
myfloat_t
sumsquareC
,
const
int
startMap
=
0
);
virtual
void
*
malloc_device_host
(
size_t
size
);
virtual
void
free_device_host
(
void
*
ptr
);
int
createProjection
(
int
iMap
,
mycomplex_t
*
map
);
int
calcross_cor
(
myfloat_t
*
localmap
,
myfloat_t
&
sum
,
myfloat_t
&
sumsquare
);
void
calculateCCFFT
(
int
iMap
,
int
iOrient
,
int
iConv
,
myfloat_t
sumC
,
myfloat_t
sumsquareC
,
mycomplex_t
*
localConvFFT
,
mycomplex_t
*
localCCT
,
myfloat_t
*
lCC
);
...
...
include/bioem_cuda_internal.h
View file @
89cc32a4
...
...
@@ -18,6 +18,8 @@ public:
virtual
~
bioem_cuda
();
virtual
int
compareRefMaps
(
int
iOrient
,
int
iConv
,
const
myfloat_t
*
conv_map
,
mycomplex_t
*
localmultFFT
,
myfloat_t
sumC
,
myfloat_t
sumsquareC
,
const
int
startMap
=
0
);
virtual
void
*
malloc_device_host
(
size_t
size
);
virtual
void
free_device_host
(
void
*
ptr
);
protected:
virtual
int
deviceInit
();
...
...
@@ -37,6 +39,7 @@ protected:
mycomplex_t
*
pRefMapsFFT
;
mycomplex_t
*
pConvMapFFT
;
mycomplex_t
*
pConvMapFFT_Host
;
mycuComplex_t
*
pFFTtmp2
[
2
];
myfloat_t
*
pFFTtmp
[
2
];
cufftHandle
plan
[
2
][
2
];
...
...
@@ -52,3 +55,4 @@ protected:
};
#endif
include/map.h
View file @
89cc32a4
...
...
@@ -103,13 +103,7 @@ public:
return
(
maps
*
(
angles
*
sizeof
(
bioem_Probability_angle
)
+
sizeof
(
bioem_Probability_map
)));
}
void
init
(
size_t
maps
,
size_t
angles
)
{
nMaps
=
maps
;
nAngles
=
angles
;
ptr
=
mallocchk
(
get_size
(
maps
,
angles
));
set_pointers
();
}
void
init
(
size_t
maps
,
size_t
angles
,
bioem
&
bio
);
void
set_pointers
()
{
...
...
map.cpp
View file @
89cc32a4
...
...
@@ -207,3 +207,11 @@ int bioem_RefMap::precalculate(bioem_param& param, bioem& bio)
return
(
0
);
}
void
bioem_Probability
::
init
(
size_t
maps
,
size_t
angles
,
bioem
&
bio
)
{
nMaps
=
maps
;
nAngles
=
angles
;
ptr
=
bio
.
malloc_device_host
(
get_size
(
maps
,
angles
));
set_pointers
();
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment