Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIfR-BDG
psrdada_cpp
Commits
e3edc797
Commit
e3edc797
authored
Apr 15, 2019
by
Tobias Winchen
Browse files
Alternate on / off spectra in output
parent
c722b311
Changes
3
Hide whitespace changes
Inline
Side-by-side
psrdada_cpp/effelsberg/edd/DetectorAccumulator.cuh
View file @
e3edc797
...
...
@@ -14,7 +14,7 @@ namespace kernels {
template
<
typename
T
>
__global__
void
detect_and_accumulate
(
float2
const
*
__restrict__
in
,
int8_t
*
__restrict__
out
,
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
)
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
,
int
stride
,
int
out_
offset
)
{
// grid stride loop over output array to keep
for
(
size_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
(
i
<
nsamps
*
nchans
/
naccumulate
);
i
+=
blockDim
.
x
*
gridDim
.
x
)
...
...
@@ -30,7 +30,8 @@ void detect_and_accumulate(float2 const* __restrict__ in, int8_t* __restrict__ o
double
y
=
tmp
.
y
*
tmp
.
y
;
sum
+=
x
+
y
;
}
out
[
i
]
=
(
int8_t
)
((
sum
-
offset
)
/
scale
);
size_t
toff
=
out_offset
*
nchans
+
currentOutputSpectra
*
nchans
;
out
[
toff
+
i
]
=
(
int8_t
)
((
sum
-
offset
)
/
scale
);
}
}
...
...
@@ -39,7 +40,7 @@ void detect_and_accumulate(float2 const* __restrict__ in, int8_t* __restrict__ o
template
<
typename
T
>
__global__
void
detect_and_accumulate
(
float2
const
*
__restrict__
in
,
float
*
__restrict__
out
,
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
)
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
,
int
stride
,
int
out_
offset
)
{
for
(
size_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
(
i
<
nsamps
*
nchans
/
naccumulate
);
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
...
@@ -54,7 +55,8 @@ void detect_and_accumulate(float2 const* __restrict__ in, float* __restrict__ ou
double
y
=
tmp
.
y
*
tmp
.
y
;
sum
+=
x
+
y
;
}
out
[
i
]
=
sum
;
size_t
toff
=
out_offset
*
nchans
+
currentOutputSpectra
*
nchans
*
stride
;
out
[
i
+
toff
]
=
sum
;
}
}
...
...
@@ -90,15 +92,17 @@ public:
}
void
detect
(
InputType
const
&
input
,
OutputType
&
output
)
// stride sets an offset of _nChans * stride to the detection in the output
// to allow multiple spectra in one output
void
detect
(
InputType
const
&
input
,
OutputType
&
output
,
int
stride
=
0
,
int
stoff
=
0
)
{
assert
(
input
.
size
()
%
(
_nchans
*
_tscrunch
)
==
0
/* Input is not a multiple of _nchans * _tscrunch*/
);
output
.
resize
(
input
.
size
()
/
_tscrunch
);
//
output.resize(input.size()/_tscrunch);
int
nsamps
=
input
.
size
()
/
_nchans
;
float2
const
*
input_ptr
=
thrust
::
raw_pointer_cast
(
input
.
data
());
T
*
output_ptr
=
thrust
::
raw_pointer_cast
(
output
.
data
());
kernels
::
detect_and_accumulate
<
T
>
<<<
1024
,
1024
,
0
,
_stream
>>>
(
input_ptr
,
output_ptr
,
_nchans
,
nsamps
,
_tscrunch
,
_scale
,
_offset
);
input_ptr
,
output_ptr
,
_nchans
,
nsamps
,
_tscrunch
,
_scale
,
_offset
,
stride
,
stoff
);
}
...
...
psrdada_cpp/effelsberg/edd/GatedSpectrometer.cuh
View file @
e3edc797
...
...
@@ -89,8 +89,7 @@ public:
private:
void
process
(
thrust
::
device_vector
<
RawVoltageType
>
const
&
digitiser_raw
,
thrust
::
device_vector
<
int64_t
>
const
&
sideChannelData
,
thrust
::
device_vector
<
IntegratedPowerType
>
&
detected_G0
,
thrust
::
device_vector
<
IntegratedPowerType
>
&
detected_G1
,
thrust
::
device_vector
<
IntegratedPowerType
>
&
detected
,
thrust
::
device_vector
<
size_t
>
&
noOfBitSet
);
private:
...
...
@@ -116,8 +115,7 @@ private:
std
::
unique_ptr
<
DetectorAccumulator
<
IntegratedPowerType
>
>
_detector
;
DoubleDeviceBuffer
<
RawVoltageType
>
_raw_voltage_db
;
DoubleDeviceBuffer
<
IntegratedPowerType
>
_power_db_G0
;
DoubleDeviceBuffer
<
IntegratedPowerType
>
_power_db_G1
;
DoubleDeviceBuffer
<
IntegratedPowerType
>
_power_db
;
DoubleDeviceBuffer
<
int64_t
>
_sideChannelData_db
;
DoubleDeviceBuffer
<
size_t
>
_noOfBitSetsInSideChannel
;
size_t
_noOfBitSetsInSideChannel_host
[
2
];
...
...
psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu
View file @
e3edc797
...
...
@@ -180,12 +180,10 @@ GatedSpectrometer<HandlerType, IntegratedPowerType>::GatedSpectrometer(
_channelised_voltage
.
resize
(
_nchans
*
batch
);
BOOST_LOG_TRIVIAL
(
debug
)
<<
" Channelised voltages size: "
<<
_channelised_voltage
.
size
();
_power_db_G0
.
resize
(
_nchans
*
batch
/
_naccumulate
);
_power_db_G1
.
resize
(
_nchans
*
batch
/
_naccumulate
);
BOOST_LOG_TRIVIAL
(
debug
)
<<
" Powers size: "
<<
_power_db_G0
.
size
()
<<
", "
<<
_power_db_G1
.
size
();
_power_db
.
resize
(
_nchans
*
batch
/
_naccumulate
*
2
);
// hold on and off spectra to simplify output
BOOST_LOG_TRIVIAL
(
debug
)
<<
" Powers size: "
<<
_power_db
.
size
()
/
2
;
// on the host both power are stored in the same data buffer
_host_power_db
.
resize
(
_power_db
_G0
.
size
()
+
_power_db_G1
.
size
());
_host_power_db
.
resize
(
_power_db
.
size
());
_noOfBitSetsInSideChannel
.
resize
(
1
);
CUDA_ERROR_CHECK
(
cudaStreamCreate
(
&
_h2d_stream
));
...
...
@@ -244,8 +242,7 @@ template <class HandlerType, typename IntegratedPowerType>
void
GatedSpectrometer
<
HandlerType
,
IntegratedPowerType
>::
process
(
thrust
::
device_vector
<
RawVoltageType
>
const
&
digitiser_raw
,
thrust
::
device_vector
<
int64_t
>
const
&
sideChannelData
,
thrust
::
device_vector
<
IntegratedPowerType
>
&
detected_G0
,
thrust
::
device_vector
<
IntegratedPowerType
>
&
detected_G1
,
thrust
::
device_vector
<
size_t
>
&
noOfBitSet
)
{
thrust
::
device_vector
<
IntegratedPowerType
>
&
detected
,
thrust
::
device_vector
<
size_t
>
&
noOfBitSet
)
{
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Unpacking raw voltages"
;
switch
(
_nbits
)
{
case
8
:
...
...
@@ -282,7 +279,7 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::process(
thrust
::
raw_pointer_cast
(
_channelised_voltage
.
data
());
CUFFT_ERROR_CHECK
(
cufftExecR2C
(
_fft_plan
,
(
cufftReal
*
)
_unpacked_voltage_ptr
,
(
cufftComplex
*
)
_channelised_voltage_ptr
));
_detector
->
detect
(
_channelised_voltage
,
detected
_G
0
);
_detector
->
detect
(
_channelised_voltage
,
detected
,
2
,
0
);
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Performing FFT 2"
;
_unpacked_voltage_ptr
=
thrust
::
raw_pointer_cast
(
_unpacked_voltage_G1
.
data
());
...
...
@@ -290,7 +287,7 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::process(
(
cufftComplex
*
)
_channelised_voltage_ptr
));
CUDA_ERROR_CHECK
(
cudaStreamSynchronize
(
_proc_stream
));
_detector
->
detect
(
_channelised_voltage
,
detected
_G
1
);
_detector
->
detect
(
_channelised_voltage
,
detected
,
2
,
1
);
CUDA_ERROR_CHECK
(
cudaStreamSynchronize
(
_proc_stream
));
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Exit processing"
;
}
// process
...
...
@@ -331,12 +328,10 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
}
// Synchronize all streams
_power_db_G0
.
swap
();
_power_db_G1
.
swap
();
_power_db
.
swap
();
_noOfBitSetsInSideChannel
.
swap
();
process
(
_raw_voltage_db
.
b
(),
_sideChannelData_db
.
b
(),
_power_db_G0
.
a
(),
_power_db_G1
.
a
(),
_noOfBitSetsInSideChannel
.
a
());
process
(
_raw_voltage_db
.
b
(),
_sideChannelData_db
.
b
(),
_power_db
.
a
(),
_noOfBitSetsInSideChannel
.
a
());
// signal that data block has been processed
CUDA_ERROR_CHECK
(
cudaStreamSynchronize
(
_proc_stream
));
...
...
@@ -350,15 +345,15 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
std
::
swap
(
_noOfBitSetsInSideChannel_host
[
0
],
_noOfBitSetsInSideChannel_host
[
1
]);
CUDA_ERROR_CHECK
(
cudaMemcpyAsync
(
static_cast
<
void
*>
(
_host_power_db
.
a_ptr
()),
static_cast
<
void
*>
(
_power_db
_G0
.
b_ptr
()),
_power_db
_G0
.
size
()
*
sizeof
(
IntegratedPowerType
),
static_cast
<
void
*>
(
_power_db
.
b_ptr
()),
_power_db
.
size
()
*
sizeof
(
IntegratedPowerType
),
cudaMemcpyDeviceToHost
,
_d2h_stream
));
CUDA_ERROR_CHECK
(
cudaMemcpyAsync
(
static_cast
<
void
*>
(
_host_power_db
.
a_ptr
()
+
(
_power_db_G0
.
size
())),
// as I am adding BEFORE the cast to void, I dont need the sizeof
static_cast
<
void
*>
(
_power_db_G1
.
b_ptr
()),
_power_db_G1
.
size
()
*
sizeof
(
IntegratedPowerType
),
cudaMemcpyDeviceToHost
,
_d2h_stream
));
//
CUDA_ERROR_CHECK(cudaMemcpyAsync(
//
static_cast<void *>(_host_power_db.a_ptr() +
//
(_power_db_G0.size())), // as I am adding BEFORE the cast to void, I dont need the sizeof
//
static_cast<void *>(_power_db_G1.b_ptr()),
//
_power_db_G1.size() * sizeof(IntegratedPowerType), cudaMemcpyDeviceToHost,
//
_d2h_stream));
CUDA_ERROR_CHECK
(
cudaMemcpyAsync
(
static_cast
<
void
*>
(
&
_noOfBitSetsInSideChannel_host
[
0
]),
static_cast
<
void
*>
(
_noOfBitSetsInSideChannel
.
b_ptr
()),
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment