Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Tobias Winchen
psrdada_cpp
Commits
de4c3c1c
Commit
de4c3c1c
authored
Jun 28, 2021
by
Tobias Winchen
Browse files
Merge branch 'detect_accumualte_fix' into 'devel'
Detect accumulate fix See merge request
mpifr-bdg/psrdada_cpp!14
parents
c673c39a
6ab413cc
Pipeline
#104512
passed with stages
in 11 minutes and 4 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
psrdada_cpp/effelsberg/edd/DetectorAccumulator.cuh
View file @
de4c3c1c
...
...
@@ -38,38 +38,59 @@ void detect_and_accumulate(float2 const* __restrict__ in, int8_t* __restrict__ o
}
/**
* @brief Integrates output power from complex voltage input.
*
* @param in Pointer to input data of size nsamps.
* @param out Pointer to output data of size nsamps / (naccumualte * nchans) * nchans
* @param nchans Number of channels
* @param naccumulate Number of input spectra to integrate into one output spectrum
* @param scale Normalization constant for the utput spectrum
* @param offset Output is shifted by this value
* @param stride Stride of the output spectra. For 2, leave a gap of
* nchan between every output spetrum, for 1 no gap.
* @param out_offset
*
*
*/
template
<
typename
T
>
__global__
void
detect_and_accumulate
(
float2
const
*
__restrict__
in
,
float
*
__restrict__
out
,
in
t
nchans
,
in
t
nsamps
,
in
t
naccumulate
,
float
scale
,
float
offset
,
int
stride
,
int
out_offset
)
size_
t
nchans
,
size_
t
nsamps
,
size_
t
naccumulate
,
float
scale
,
float
offset
,
int
stride
,
int
out_offset
)
{
const
int
nb
=
naccumulate
/
blockDim
.
x
+
1
;
const
int
bs
=
blockDim
.
x
;
const
int
number_of_spectra
=
nsamps
/
(
nchans
*
naccumulate
);
const
int
number_of_spectra
=
nsamps
/
(
nchans
*
naccumulate
);
for
(
size_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
(
i
<
nsamps
*
nchans
/
naccumulate
*
nb
);
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
const
size_t
bn
=
i
/
nchans
/
number_of_spectra
;
const
size_t
currentOutputSpectra
=
i
/
nchans
;
const
size_t
currentChannel
=
i
%
nchans
;
for
(
size_t
current_spectrum
=
0
;
current_spectrum
<
number_of_spectra
;
current_spectrum
++
)
{
// I could just launch more blocks in a second dimension
for
(
size_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
(
i
<
nchans
*
nb
);
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
const
size_t
channel_number
=
i
%
nchans
;
const
size_t
bn
=
i
/
nchans
;
double
sum
=
0
;
for
(
size_t
k
=
0
;
k
<
bs
;
k
++
)
{
size_t
j
=
k
+
bn
*
bs
;
if
(
j
>=
naccumulate
)
break
;
double
sum
=
0
;
for
(
size_t
k
=
0
;
k
<
bs
;
k
++
)
{
int
cidx
=
k
+
bn
*
bs
;
float2
tmp
=
in
[
j
*
nchans
+
currentOutputSpectra
*
nchans
*
naccumulate
+
currentChannel
];
double
x
=
tmp
.
x
*
tmp
.
x
;
double
y
=
tmp
.
y
*
tmp
.
y
;
sum
+=
x
+
y
;
}
size_t
toff
=
out_offset
*
nchans
+
currentOutputSpectra
*
nchans
*
stride
;
if
(
cidx
>=
naccumulate
)
break
;
atomicAdd
(
&
out
[
toff
+
currentChannel
],
((
sum
-
offset
)
/
scale
));
const
float2
tmp
=
in
[
channel_number
+
cidx
*
nchans
+
current_spectrum
*
naccumulate
*
nchans
];
double
x
=
tmp
.
x
*
tmp
.
x
;
double
y
=
tmp
.
y
*
tmp
.
y
;
sum
+=
x
+
y
;
}
size_t
toff
=
out_offset
*
nchans
+
current_spectrum
*
nchans
*
stride
;
atomicAdd
(
&
out
[
toff
+
channel_number
],
((
sum
-
offset
)
/
scale
));
}
}
}
...
...
psrdada_cpp/effelsberg/edd/test/src/DetectorAccumulatorTester.cu
View file @
de4c3c1c
...
...
@@ -99,6 +99,112 @@ TEST_F(DetectorAccumulatorTester, noise_test)
compare_against_host
(
gpu_output
,
host_output
);
}
// parameters for the detector accumulator test
struct
detect_accumulate_params
{
size_t
nchan
;
// umber of channels
size_t
nspectra
;
// number of output spectra
size_t
naccumulate
;
// number of spectra to accumulate
};
class
detect_and_accumulate32bit
:
public
testing
::
TestWithParam
<
detect_accumulate_params
>
{
// Test that the kernel executes in certain parameter
// settings
};
TEST_P
(
detect_and_accumulate32bit
,
no_stride
)
{
thrust
::
device_vector
<
float2
>
input
;
thrust
::
device_vector
<
float
>
output
;
detect_accumulate_params
params
=
GetParam
();
input
.
resize
(
params
.
nspectra
*
params
.
nchan
*
params
.
naccumulate
);
output
.
resize
(
params
.
nspectra
*
params
.
nchan
);
float2
v
;
v
.
x
=
1.0
;
v
.
y
=
1.0
;
thrust
::
fill
(
input
.
begin
(),
input
.
end
(),
v
);
thrust
::
fill
(
output
.
begin
(),
output
.
end
(),
0.
);
kernels
::
detect_and_accumulate
<
float
>
<<<
1024
,
1024
>>>
(
thrust
::
raw_pointer_cast
(
input
.
data
()),
thrust
::
raw_pointer_cast
(
output
.
data
()),
params
.
nchan
,
input
.
size
(),
params
.
naccumulate
,
1
,
0.
,
1
,
0
);
cudaDeviceSynchronize
();
thrust
::
host_vector
<
float
>
output_host
=
output
;
for
(
size_t
i
=
0
;
i
<
output
.
size
();
i
++
)
{
ASSERT_FLOAT_EQ
(
output_host
[
i
],
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
)
*
params
.
naccumulate
)
<<
"i = "
<<
i
<<
" for nchan = "
<<
params
.
nchan
<<
", nspectra = "
<<
params
.
nspectra
<<
", naccumulate = "
<<
params
.
naccumulate
;
}
}
TEST_P
(
detect_and_accumulate32bit
,
w_stride
)
{
thrust
::
device_vector
<
float2
>
input
;
thrust
::
device_vector
<
float
>
output
;
detect_accumulate_params
params
=
GetParam
();
input
.
resize
(
params
.
nspectra
*
params
.
nchan
*
params
.
naccumulate
);
output
.
resize
(
params
.
nspectra
*
params
.
nchan
*
2
);
float2
v
;
v
.
x
=
1.0
;
v
.
y
=
1.0
;
thrust
::
fill
(
input
.
begin
(),
input
.
end
(),
v
);
thrust
::
fill
(
output
.
begin
(),
output
.
end
(),
0.
);
kernels
::
detect_and_accumulate
<
float
>
<<<
1024
,
1024
>>>
(
thrust
::
raw_pointer_cast
(
input
.
data
()),
thrust
::
raw_pointer_cast
(
output
.
data
()),
params
.
nchan
,
input
.
size
(),
params
.
naccumulate
,
1
,
0.
,
2
,
0
);
cudaDeviceSynchronize
();
thrust
::
host_vector
<
float
>
output_host
=
output
;
for
(
size_t
i
=
0
;
i
<
params
.
nchan
*
params
.
nspectra
;
i
++
)
{
size_t
current_spectrum
=
i
/
params
.
nchan
;
ASSERT_FLOAT_EQ
(
output_host
[
i
+
current_spectrum
*
params
.
nchan
],
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
)
*
params
.
naccumulate
)
<<
"i = "
<<
i
<<
" for nchan = "
<<
params
.
nchan
<<
", nspectra = "
<<
params
.
nspectra
<<
", naccumulate = "
<<
params
.
naccumulate
;
}
}
INSTANTIATE_TEST_CASE_P
(
DetectorAccumulatorTester
,
detect_and_accumulate32bit
,
testing
::
Values
(
// nchan; nspectra; naccumulate;
detect_accumulate_params
({
1024
,
1
,
128
}),
detect_accumulate_params
({
1024
,
1
,
1024
}),
detect_accumulate_params
({
1024
,
1
,
2048
}),
detect_accumulate_params
({
1024
,
1
,
16384
}),
detect_accumulate_params
({
1024
,
16
,
1024
}),
detect_accumulate_params
({
8388608
,
2
,
16
}),
detect_accumulate_params
({
8388608
,
1
,
32
})
)
);
}
//namespace test
}
//namespace edd
}
//namespace meerkat
...
...
psrdada_cpp/effelsberg/edd/test/src/GatedSpectrometerTest.cu
View file @
de4c3c1c
...
...
@@ -146,6 +146,7 @@ TEST(GatedSpectrometer, stokes_accumulate)
testStokesAccumulateParam
(
8
*
1024
*
1024
+
1
,
32
);
testStokesAccumulateParam
(
1024
+
1
,
5
);
testStokesAccumulateParam
(
1024
+
1
,
64
);
testStokesAccumulateParam
(
1024
,
65536
);
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment