Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIfR-BDG
psrdada_cpp
Commits
75dfadc4
Commit
75dfadc4
authored
Apr 10, 2019
by
Tobias Winchen
Browse files
Allow 32 bit output
parent
e7f380fc
Changes
8
Hide whitespace changes
Inline
Side-by-side
psrdada_cpp/effelsberg/edd/DetectorAccumulator.cuh
View file @
75dfadc4
...
...
@@ -9,23 +9,99 @@ namespace effelsberg {
namespace
edd
{
namespace
kernels
{
template
<
typename
T
>
__global__
void
detect_and_accumulate
(
float2
const
*
__restrict__
in
,
int8_t
*
__restrict__
out
,
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
);
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
)
{
for
(
int
block_idx
=
blockIdx
.
x
;
block_idx
<
nsamps
/
naccumulate
;
block_idx
+=
gridDim
.
x
)
{
int
read_offset
=
block_idx
*
naccumulate
*
nchans
;
int
write_offset
=
block_idx
*
nchans
;
for
(
int
chan_idx
=
threadIdx
.
x
;
chan_idx
<
nchans
;
chan_idx
+=
blockDim
.
x
)
{
float
sum
=
0.0
f
;
for
(
int
ii
=
0
;
ii
<
naccumulate
;
++
ii
)
{
float2
tmp
=
in
[
read_offset
+
chan_idx
+
ii
*
nchans
];
float
x
=
tmp
.
x
*
tmp
.
x
;
float
y
=
tmp
.
y
*
tmp
.
y
;
sum
+=
x
+
y
;
}
out
[
write_offset
+
chan_idx
]
=
(
int8_t
)
((
sum
-
offset
)
/
scale
);
}
}
}
template
<
typename
T
>
__global__
void
detect_and_accumulate
(
float2
const
*
__restrict__
in
,
float
*
__restrict__
out
,
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
)
{
for
(
int
block_idx
=
blockIdx
.
x
;
block_idx
<
nsamps
/
naccumulate
;
block_idx
+=
gridDim
.
x
)
{
int
read_offset
=
block_idx
*
naccumulate
*
nchans
;
int
write_offset
=
block_idx
*
nchans
;
for
(
int
chan_idx
=
threadIdx
.
x
;
chan_idx
<
nchans
;
chan_idx
+=
blockDim
.
x
)
{
double
sum
=
0.0
;
for
(
int
ii
=
0
;
ii
<
naccumulate
;
++
ii
)
{
float2
tmp
=
in
[
read_offset
+
chan_idx
+
ii
*
nchans
];
double
x
=
tmp
.
x
*
tmp
.
x
;
double
y
=
tmp
.
y
*
tmp
.
y
;
sum
+=
x
+
y
;
}
out
[
write_offset
+
chan_idx
]
=
(
float
)
sum
;
}
}
}
}
template
<
typename
T
>
class
DetectorAccumulator
{
public:
typedef
thrust
::
device_vector
<
float2
>
InputType
;
typedef
thrust
::
device_vector
<
int8_t
>
OutputType
;
typedef
thrust
::
device_vector
<
T
>
OutputType
;
public:
DetectorAccumulator
(
int
nchans
,
int
tscrunch
,
float
scale
,
float
offset
,
cudaStream_t
stream
);
~
DetectorAccumulator
();
DetectorAccumulator
(
DetectorAccumulator
const
&
)
=
delete
;
void
detect
(
InputType
const
&
input
,
OutputType
&
output
);
DetectorAccumulator
(
int
nchans
,
int
tscrunch
,
float
scale
,
float
offset
,
cudaStream_t
stream
)
:
_nchans
(
nchans
)
,
_tscrunch
(
tscrunch
)
,
_scale
(
scale
)
,
_offset
(
offset
)
,
_stream
(
stream
)
{
}
~
DetectorAccumulator
()
{
}
void
detect
(
InputType
const
&
input
,
OutputType
&
output
)
{
assert
(
input
.
size
()
%
(
_nchans
*
_tscrunch
)
==
0
/* Input is not a multiple of _nchans * _tscrunch*/
);
output
.
resize
(
input
.
size
()
/
_tscrunch
);
int
nsamps
=
input
.
size
()
/
_nchans
;
float2
const
*
input_ptr
=
thrust
::
raw_pointer_cast
(
input
.
data
());
T
*
output_ptr
=
thrust
::
raw_pointer_cast
(
output
.
data
());
kernels
::
detect_and_accumulate
<
T
>
<<<
1024
,
1024
,
0
,
_stream
>>>
(
input_ptr
,
output_ptr
,
_nchans
,
nsamps
,
_tscrunch
,
_scale
,
_offset
);
}
private:
int
_nchans
;
...
...
psrdada_cpp/effelsberg/edd/FftSpectrometer.cuh
View file @
75dfadc4
...
...
@@ -68,7 +68,7 @@ private:
int
_nchans
;
int
_call_count
;
std
::
unique_ptr
<
Unpacker
>
_unpacker
;
std
::
unique_ptr
<
DetectorAccumulator
>
_detector
;
std
::
unique_ptr
<
DetectorAccumulator
<
int8_t
>
>
_detector
;
DoubleDeviceBuffer
<
RawVoltageType
>
_raw_voltage_db
;
DoubleDeviceBuffer
<
IntegratedPowerType
>
_power_db
;
thrust
::
device_vector
<
UnpackedVoltageType
>
_unpacked_voltage
;
...
...
psrdada_cpp/effelsberg/edd/GatedSpectrometer.cuh
View file @
75dfadc4
...
...
@@ -35,7 +35,8 @@ public:
typedef
uint64_t
RawVoltageType
;
typedef
float
UnpackedVoltageType
;
typedef
float2
ChannelisedVoltageType
;
typedef
int8_t
IntegratedPowerType
;
typedef
float
IntegratedPowerType
;
//typedef int8_t IntegratedPowerType;
public:
/**
...
...
@@ -112,7 +113,7 @@ private:
int
_nchans
;
int
_call_count
;
std
::
unique_ptr
<
Unpacker
>
_unpacker
;
std
::
unique_ptr
<
DetectorAccumulator
>
_detector
;
std
::
unique_ptr
<
DetectorAccumulator
<
IntegratedPowerType
>
>
_detector
;
DoubleDeviceBuffer
<
RawVoltageType
>
_raw_voltage_db
;
DoubleDeviceBuffer
<
IntegratedPowerType
>
_power_db_G0
;
...
...
psrdada_cpp/effelsberg/edd/detail/FftSpectrometer.cu
View file @
75dfadc4
...
...
@@ -63,7 +63,7 @@ FftSpectrometer<HandlerType>::FftSpectrometer(
CUDA_ERROR_CHECK
(
cudaStreamCreate
(
&
_d2h_stream
));
CUFFT_ERROR_CHECK
(
cufftSetStream
(
_fft_plan
,
_proc_stream
));
_unpacker
.
reset
(
new
Unpacker
(
_proc_stream
));
_detector
.
reset
(
new
DetectorAccumulator
(
_nchans
,
_naccumulate
,
_detector
.
reset
(
new
DetectorAccumulator
<
int8_t
>
(
_nchans
,
_naccumulate
,
scaling
,
offset
,
_proc_stream
));
}
...
...
psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu
View file @
75dfadc4
...
...
@@ -11,6 +11,11 @@ namespace psrdada_cpp {
namespace
effelsberg
{
namespace
edd
{
__global__
void
gating
(
float
*
__restrict__
G0
,
float
*
__restrict__
G1
,
const
int64_t
*
__restrict__
sideChannelData
,
size_t
N
,
size_t
heapSize
,
size_t
bitpos
,
size_t
noOfSideChannels
,
size_t
selectedSideChannel
,
const
float
*
__restrict__
_baseLineN
)
{
...
...
@@ -186,7 +191,7 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
CUFFT_ERROR_CHECK
(
cufftSetStream
(
_fft_plan
,
_proc_stream
));
_unpacker
.
reset
(
new
Unpacker
(
_proc_stream
));
_detector
.
reset
(
new
DetectorAccumulator
(
_nchans
,
_naccumulate
,
scaling
,
_detector
.
reset
(
new
DetectorAccumulator
<
IntegratedPowerType
>
(
_nchans
,
_naccumulate
,
scaling
,
offset
,
_proc_stream
));
}
// constructor
...
...
@@ -260,6 +265,7 @@ void GatedSpectrometer<HandlerType>::process(
CUDA_ERROR_CHECK
(
cudaStreamSynchronize
(
_proc_stream
));
_detector
->
detect
(
_channelised_voltage
,
detected_G1
);
CUDA_ERROR_CHECK
(
cudaStreamSynchronize
(
_proc_stream
));
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Exit processing"
;
}
// process
...
...
@@ -313,25 +319,31 @@ bool GatedSpectrometer<HandlerType>::operator()(RawBytes &block) {
return
false
;
}
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Copy Data back to device"
;
CUDA_ERROR_CHECK
(
cudaStreamSynchronize
(
_d2h_stream
));
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Swap host power"
;
_host_power_db
.
swap
();
BOOST_LOG_TRIVIAL
(
debug
)
<<
"swap no of bit channel"
;
std
::
swap
(
_noOfBitSetsInSideChannel_host
[
0
],
_noOfBitSetsInSideChannel_host
[
1
]);
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Foo"
;
CUDA_ERROR_CHECK
(
cudaMemcpyAsync
(
static_cast
<
void
*>
(
_host_power_db
.
a_ptr
()),
static_cast
<
void
*>
(
_power_db_G0
.
b_ptr
()),
_power_db_G0
.
size
()
*
sizeof
(
IntegratedPowerType
),
cudaMemcpyDeviceToHost
,
_d2h_stream
));
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Bar"
;
CUDA_ERROR_CHECK
(
cudaMemcpyAsync
(
static_cast
<
void
*>
(
_host_power_db
.
a_ptr
()
+
(
_power_db_G0
.
size
()
*
sizeof
(
IntegratedPowerType
))),
(
_power_db_G0
.
size
()
)),
// as I am adding BEFORE the cast to void, I dont need the sizeof
static_cast
<
void
*>
(
_power_db_G1
.
b_ptr
()),
_power_db_G1
.
size
()
*
sizeof
(
IntegratedPowerType
),
cudaMemcpyDeviceToHost
,
_d2h_stream
));
CUDA_ERROR_CHECK
(
cudaMemcpyAsync
(
static_cast
<
void
*>
(
&
_noOfBitSetsInSideChannel_host
[
0
]),
static_cast
<
void
*>
(
_noOfBitSetsInSideChannel
.
b_ptr
()),
1
*
sizeof
(
unsigned
in
t
),
cudaMemcpyDeviceToHost
,
_d2h_stream
));
//
CUDA_ERROR_CHECK(cudaMemcpyAsync(static_cast<void *>(&_noOfBitSetsInSideChannel_host[0]),
//
static_cast<void *>(_noOfBitSetsInSideChannel.b_ptr()),
//
1 * sizeof(
size_
t),cudaMemcpyDeviceToHost, _d2h_stream));
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Copy Data back to device"
;
if
(
_call_count
==
3
)
{
return
false
;
...
...
psrdada_cpp/effelsberg/edd/src/DetectorAccumulator.cu
View file @
75dfadc4
...
...
@@ -6,59 +6,9 @@ namespace effelsberg {
namespace
edd
{
namespace
kernels
{
__global__
void
detect_and_accumulate
(
float2
const
*
__restrict__
in
,
int8_t
*
__restrict__
out
,
int
nchans
,
int
nsamps
,
int
naccumulate
,
float
scale
,
float
offset
)
{
for
(
int
block_idx
=
blockIdx
.
x
;
block_idx
<
nsamps
/
naccumulate
;
block_idx
+=
gridDim
.
x
)
{
int
read_offset
=
block_idx
*
naccumulate
*
nchans
;
int
write_offset
=
block_idx
*
nchans
;
for
(
int
chan_idx
=
threadIdx
.
x
;
chan_idx
<
nchans
;
chan_idx
+=
blockDim
.
x
)
{
float
sum
=
0.0
f
;
for
(
int
ii
=
0
;
ii
<
naccumulate
;
++
ii
)
{
float2
tmp
=
in
[
read_offset
+
chan_idx
+
ii
*
nchans
];
float
x
=
tmp
.
x
*
tmp
.
x
;
float
y
=
tmp
.
y
*
tmp
.
y
;
sum
+=
x
+
y
;
}
out
[
write_offset
+
chan_idx
]
=
(
int8_t
)
((
sum
-
offset
)
/
scale
);
}
}
}
}
//namespace kernels
DetectorAccumulator
::
DetectorAccumulator
(
int
nchans
,
int
tscrunch
,
float
scale
,
float
offset
,
cudaStream_t
stream
)
:
_nchans
(
nchans
)
,
_tscrunch
(
tscrunch
)
,
_scale
(
scale
)
,
_offset
(
offset
)
,
_stream
(
stream
)
{
}
DetectorAccumulator
::~
DetectorAccumulator
()
{
}
void
DetectorAccumulator
::
detect
(
InputType
const
&
input
,
OutputType
&
output
)
{
assert
(
input
.
size
()
%
(
_nchans
*
_tscrunch
)
==
0
/* Input is not a multiple of _nchans * _tscrunch*/
);
output
.
resize
(
input
.
size
()
/
_tscrunch
);
int
nsamps
=
input
.
size
()
/
_nchans
;
float2
const
*
input_ptr
=
thrust
::
raw_pointer_cast
(
input
.
data
());
int8_t
*
output_ptr
=
thrust
::
raw_pointer_cast
(
output
.
data
());
kernels
::
detect_and_accumulate
<<<
1024
,
1024
,
0
,
_stream
>>>
(
input_ptr
,
output_ptr
,
_nchans
,
nsamps
,
_tscrunch
,
_scale
,
_offset
);
}
}
//namespace edd
}
//namespace effelsberg
}
//namespace psrdada_cpp
psrdada_cpp/effelsberg/edd/test/DetectorAccumulatorTester.cuh
View file @
75dfadc4
...
...
@@ -35,7 +35,7 @@ protected:
float
offset
);
void
compare_against_host
(
DetectorAccumulator
::
OutputType
const
&
gpu_output
,
DetectorAccumulator
<
int8_t
>
::
OutputType
const
&
gpu_output
,
OutputType
const
&
host_output
);
protected:
...
...
psrdada_cpp/effelsberg/edd/test/src/DetectorAccumulatorTester.cu
View file @
75dfadc4
...
...
@@ -62,7 +62,7 @@ void DetectorAccumulatorTester::detect_c_reference(
}
void
DetectorAccumulatorTester
::
compare_against_host
(
DetectorAccumulator
::
OutputType
const
&
gpu_output
,
DetectorAccumulator
<
int8_t
>
::
OutputType
const
&
gpu_output
,
OutputType
const
&
host_output
)
{
OutputType
copy_from_gpu
=
gpu_output
;
...
...
@@ -88,10 +88,10 @@ TEST_F(DetectorAccumulatorTester, noise_test)
host_input
[
ii
].
x
=
distribution
(
generator
);
host_input
[
ii
].
y
=
distribution
(
generator
);
}
DetectorAccumulator
::
InputType
gpu_input
=
host_input
;
DetectorAccumulator
::
OutputType
gpu_output
;
DetectorAccumulator
<
int8_t
>
::
InputType
gpu_input
=
host_input
;
DetectorAccumulator
<
int8_t
>
::
OutputType
gpu_output
;
OutputType
host_output
;
DetectorAccumulator
detector
(
nchans
,
tscrunch
,
scale
,
0.0
,
_stream
);
DetectorAccumulator
<
int8_t
>
detector
(
nchans
,
tscrunch
,
scale
,
0.0
,
_stream
);
detector
.
detect
(
gpu_input
,
gpu_output
);
detect_c_reference
(
host_input
,
host_output
,
nchans
,
tscrunch
,
scale
,
0.0
);
compare_against_host
(
gpu_output
,
host_output
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment