Skip to content
Snippets Groups Projects
Commit fb5726c1 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Calculate baseline

parent f4d097c8
Branches
No related tags found
No related merge requests found
...@@ -20,7 +20,7 @@ install(TARGETS fft_spectrometer DESTINATION bin) ...@@ -20,7 +20,7 @@ install(TARGETS fft_spectrometer DESTINATION bin)
#Gated FFT spectrometer interface #Gated FFT spectrometer interface
cuda_add_executable(gated_spectrometer src/GatedSpectrometer_cli.cu) cuda_add_executable(gated_spectrometer src/GatedSpectrometer_cli.cu)
target_link_libraries(gated_spectrometer ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}) target_link_libraries(gated_spectrometer ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} -lcublas)
install(TARGETS gated_spectrometer DESTINATION bin) install(TARGETS gated_spectrometer DESTINATION bin)
add_subdirectory(test) add_subdirectory(test)
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include "thrust/device_vector.h" #include "thrust/device_vector.h"
#include "cufft.h" #include "cufft.h"
#include "cublas_v2.h"
namespace psrdada_cpp { namespace psrdada_cpp {
namespace effelsberg { namespace effelsberg {
namespace edd { namespace edd {
...@@ -122,6 +124,8 @@ private: ...@@ -122,6 +124,8 @@ private:
thrust::device_vector<UnpackedVoltageType> _unpacked_voltage_G1; thrust::device_vector<UnpackedVoltageType> _unpacked_voltage_G1;
thrust::device_vector<ChannelisedVoltageType> _channelised_voltage; thrust::device_vector<ChannelisedVoltageType> _channelised_voltage;
thrust::device_vector<UnpackedVoltageType> _baseLineN;
DoublePinnedHostBuffer<IntegratedPowerType> _host_power_db; DoublePinnedHostBuffer<IntegratedPowerType> _host_power_db;
cudaStream_t _h2d_stream; cudaStream_t _h2d_stream;
...@@ -135,9 +139,9 @@ private: ...@@ -135,9 +139,9 @@ private:
* *
* @detail The resulting gaps are filled with zeros in the other stream. * @detail The resulting gaps are filled with zeros in the other stream.
* *
* @param GO Input data. Data is set to zero if corresponding * @param GO Input data. Data is set to the baseline value if corresponding
* sideChannelData bit at bitpos os set. * sideChannelData bit at bitpos os set.
* @param G1 Data in this array is set to zero if corresponding * @param G1 Data in this array is set to the baseline value if corresponding
* sideChannelData bit at bitpos is not set. * sideChannelData bit at bitpos is not set.
* @param sideChannelData noOfSideChannels items per block of heapSize * @param sideChannelData noOfSideChannels items per block of heapSize
* bytes in the input data. * bytes in the input data.
...@@ -149,11 +153,24 @@ private: ...@@ -149,11 +153,24 @@ private:
* data. * data.
* @param selectedSideChannel No. of side channel item to be eveluated. * @param selectedSideChannel No. of side channel item to be eveluated.
0 <= selectedSideChannel < noOfSideChannels. 0 <= selectedSideChannel < noOfSideChannels.
*
*/ */
__global__ void gating(float *G0, float *G1, const int64_t *sideChannelData, __global__ void gating(float *G0, float *G1, const int64_t *sideChannelData,
size_t N, size_t heapSize, size_t bitpos, size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel); size_t noOfSideChannels, size_t selectedSideChannel, const float *_baseLine);
/**
* @brief Sums all elements of an input array.
*
* @detail The results is stored in an array with one value per launch
* block. Full reuction thus requires two kernel launches.
*
* @param in. Input array.
* @param N. Size of input array.
* @param out. Output array.
* */
__global__ void array_sum(float *in, size_t N, float *out);
} // edd } // edd
......
...@@ -11,13 +11,13 @@ namespace psrdada_cpp { ...@@ -11,13 +11,13 @@ namespace psrdada_cpp {
namespace effelsberg { namespace effelsberg {
namespace edd { namespace edd {
__global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const int64_t* __restrict__ sideChannelData,
__global__ void gating(float *G0, float *G1, const int64_t *sideChannelData,
size_t N, size_t heapSize, size_t bitpos, size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel) { size_t noOfSideChannels, size_t selectedSideChannel, const float* __restrict__ _baseLineN) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; (i < N); float baseLine = *_baseLineN / N;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N);
i += blockDim.x * gridDim.x) { i += blockDim.x * gridDim.x) {
const float w = G0[i]; const float w = G0[i] - baseLine;
const int64_t sideChannelItem = const int64_t sideChannelItem =
sideChannelData[((i / heapSize) * (noOfSideChannels)) + sideChannelData[((i / heapSize) * (noOfSideChannels)) +
selectedSideChannel]; // Probably not optimal access as selectedSideChannel]; // Probably not optimal access as
...@@ -26,8 +26,8 @@ __global__ void gating(float *G0, float *G1, const int64_t *sideChannelData, ...@@ -26,8 +26,8 @@ __global__ void gating(float *G0, float *G1, const int64_t *sideChannelData,
// handled by cache? // handled by cache?
const int bit_set = TEST_BIT(sideChannelItem, bitpos); const int bit_set = TEST_BIT(sideChannelItem, bitpos);
G1[i] = w * bit_set; G1[i] = w * bit_set + baseLine;
G0[i] = w * (!bit_set); G0[i] = w * (!bit_set) + baseLine;
} }
} }
...@@ -61,6 +61,43 @@ __global__ void countBitSet(const int64_t *sideChannelData, size_t N, size_t ...@@ -61,6 +61,43 @@ __global__ void countBitSet(const int64_t *sideChannelData, size_t N, size_t
} }
// blocksize for the array sum kernel
#define array_sum_Nthreads 1024
__global__ void array_sum(float *in, size_t N, float *out) {
extern __shared__ float data[];
size_t tid = threadIdx.x;
float ls = 0;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N);
i += blockDim.x * gridDim.x) {
ls += in[i]; // + in[i + blockDim.x]; // loading two elements increase the used bandwidth by ~10% but requires matching blocksize and size of input array
}
data[tid] = ls;
__syncthreads();
for (size_t i = blockDim.x / 2; i > 0; i /= 2) {
if (tid < i) {
data[tid] += data[tid + i];
}
__syncthreads();
}
// unroll last warp
// if (tid < 32)
//{
// warpReduce(data, tid);
//}
if (tid == 0) {
out[blockIdx.x] = data[0];
}
}
template <class HandlerType> template <class HandlerType>
GatedSpectrometer<HandlerType>::GatedSpectrometer( GatedSpectrometer<HandlerType>::GatedSpectrometer(
std::size_t buffer_bytes, std::size_t nSideChannels, std::size_t buffer_bytes, std::size_t nSideChannels,
...@@ -128,6 +165,8 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer( ...@@ -128,6 +165,8 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
<< _raw_voltage_db.size(); << _raw_voltage_db.size();
_unpacked_voltage_G0.resize(nsamps_per_buffer); _unpacked_voltage_G0.resize(nsamps_per_buffer);
_unpacked_voltage_G1.resize(nsamps_per_buffer); _unpacked_voltage_G1.resize(nsamps_per_buffer);
_baseLineN.resize(array_sum_Nthreads);
BOOST_LOG_TRIVIAL(debug) << " Unpacked voltages size (in samples): " BOOST_LOG_TRIVIAL(debug) << " Unpacked voltages size (in samples): "
<< _unpacked_voltage_G0.size(); << _unpacked_voltage_G0.size();
_channelised_voltage.resize(_nchans * batch); _channelised_voltage.resize(_nchans * batch);
...@@ -187,8 +226,9 @@ void GatedSpectrometer<HandlerType>::process( ...@@ -187,8 +226,9 @@ void GatedSpectrometer<HandlerType>::process(
default: default:
throw std::runtime_error("Unsupported number of bits"); throw std::runtime_error("Unsupported number of bits");
} }
// raw voltage buffer is free again BOOST_LOG_TRIVIAL(debug) << "Calculate baseline";
//CUDA_ERROR_CHECK(cudaEventRecord(_procB, _proc_stream)); psrdada_cpp::effelsberg::edd::array_sum<<<64, array_sum_Nthreads, array_sum_Nthreads * sizeof(float), _proc_stream>>>(thrust::raw_pointer_cast(_unpacked_voltage_G0.data()), _unpacked_voltage_G0.size(), thrust::raw_pointer_cast(_baseLineN.data()));
psrdada_cpp::effelsberg::edd::array_sum<<<1, array_sum_Nthreads, array_sum_Nthreads * sizeof(float), _proc_stream>>>(thrust::raw_pointer_cast(_baseLineN.data()), _baseLineN.size(), thrust::raw_pointer_cast(_baseLineN.data()));
BOOST_LOG_TRIVIAL(debug) << "Perform gating"; BOOST_LOG_TRIVIAL(debug) << "Perform gating";
gating<<<1024, 1024, 0, _proc_stream>>>( gating<<<1024, 1024, 0, _proc_stream>>>(
...@@ -196,7 +236,7 @@ void GatedSpectrometer<HandlerType>::process( ...@@ -196,7 +236,7 @@ void GatedSpectrometer<HandlerType>::process(
thrust::raw_pointer_cast(_unpacked_voltage_G1.data()), thrust::raw_pointer_cast(_unpacked_voltage_G1.data()),
thrust::raw_pointer_cast(sideChannelData.data()), thrust::raw_pointer_cast(sideChannelData.data()),
_unpacked_voltage_G0.size(), _speadHeapSize, _selectedBit, _nSideChannels, _unpacked_voltage_G0.size(), _speadHeapSize, _selectedBit, _nSideChannels,
_selectedSideChannel); _selectedSideChannel, thrust::raw_pointer_cast(_baseLineN.data()));
countBitSet<<<(sideChannelData.size()+255)/256, 256, 0, countBitSet<<<(sideChannelData.size()+255)/256, 256, 0,
_proc_stream>>>(thrust::raw_pointer_cast(sideChannelData.data()), _proc_stream>>>(thrust::raw_pointer_cast(sideChannelData.data()),
...@@ -306,7 +346,9 @@ bool GatedSpectrometer<HandlerType>::operator()(RawBytes &block) { ...@@ -306,7 +346,9 @@ bool GatedSpectrometer<HandlerType>::operator()(RawBytes &block) {
// The handler can't do anything asynchronously without a copy here // The handler can't do anything asynchronously without a copy here
// as it would be unsafe (given that it does not own the memory it // as it would be unsafe (given that it does not own the memory it
// is being passed). // is being passed).
return _handler(bytes);
_handler(bytes);
return false; //
} // operator () } // operator ()
} // edd } // edd
......
...@@ -25,6 +25,7 @@ const size_t ERROR_UNHANDLED_EXCEPTION = 2; ...@@ -25,6 +25,7 @@ const size_t ERROR_UNHANDLED_EXCEPTION = 2;
} // namespace } // namespace
int main(int argc, char **argv) { int main(int argc, char **argv) {
try { try {
key_t input_key; key_t input_key;
...@@ -42,6 +43,7 @@ int main(int argc, char **argv) { ...@@ -42,6 +43,7 @@ int main(int argc, char **argv) {
char buffer[32]; char buffer[32];
std::strftime(buffer, 32, "%Y-%m-%d-%H:%M:%S.bp", ptm); std::strftime(buffer, 32, "%Y-%m-%d-%H:%M:%S.bp", ptm);
std::string filename(buffer); std::string filename(buffer);
std::string output_type = "file";
/** Define and parse the program options /** Define and parse the program options
*/ */
...@@ -55,25 +57,29 @@ int main(int argc, char **argv) { ...@@ -55,25 +57,29 @@ int main(int argc, char **argv) {
[&input_key](std::string in) { input_key = string_to_key(in); }), [&input_key](std::string in) { input_key = string_to_key(in); }),
"The shared memory key for the dada buffer to connect to (hex " "The shared memory key for the dada buffer to connect to (hex "
"string)"); "string)");
desc.add_options()("fft_length,n", po::value<int>(&fft_length)->required(), desc.add_options()(
"The length of the FFT to perform on the data"); "output_type", po::value<std::string>(&output_type)->default_value(output_type),
desc.add_options()("naccumulate,a", "output type [dada, file]. Default is file."
po::value<int>(&naccumulate)->required(), );
"The number of samples to integrate in each channel"); desc.add_options()(
desc.add_options()("nsidechannelitems,n", "output_key,o", po::value<std::string>(&filename)->default_value(filename),
"The key of the output bnuffer / name of the output file to write spectra "
"to");
desc.add_options()("nsidechannelitems,s",
po::value<size_t>()->default_value(0)->notifier( po::value<size_t>()->default_value(0)->notifier(
[&nSideChannels](size_t in) { nSideChannels = in; }), [&nSideChannels](size_t in) { nSideChannels = in; }),
"Number of side channel items"); "Number of side channel items");
desc.add_options()( desc.add_options()(
"selected_sidechannel", "selected_sidechannel,e",
po::value<size_t>()->default_value(0)->notifier( po::value<size_t>()->default_value(0)->notifier(
[&selectedSideChannel](size_t in) { selectedSideChannel = in; }), [&selectedSideChannel](size_t in) { selectedSideChannel = in; }),
"Side channel selected for evaluation."); "Side channel selected for evaluation.");
desc.add_options()("selected_bit", desc.add_options()("selected_bit,B",
po::value<size_t>()->default_value(63)->notifier( po::value<size_t>()->default_value(63)->notifier(
[&selectedBit](size_t in) { selectedBit = in; }), [&selectedBit](size_t in) { selectedBit = in; }),
"Side channel selected for evaluation."); "Side channel selected for evaluation.");
desc.add_options()("speadheap_size,s", desc.add_options()("speadheap_size",
po::value<size_t>()->default_value(4096)->notifier( po::value<size_t>()->default_value(4096)->notifier(
[&speadHeapSize](size_t in) { speadHeapSize = in; }), [&speadHeapSize](size_t in) { speadHeapSize = in; }),
"size of the spead data heaps. The number of the " "size of the spead data heaps. The number of the "
...@@ -82,6 +88,14 @@ int main(int argc, char **argv) { ...@@ -82,6 +88,14 @@ int main(int argc, char **argv) {
desc.add_options()("nbits,b", po::value<int>(&nbits)->required(), desc.add_options()("nbits,b", po::value<int>(&nbits)->required(),
"The number of bits per sample in the " "The number of bits per sample in the "
"packetiser output (8 or 12)"); "packetiser output (8 or 12)");
desc.add_options()("fft_length,n", po::value<int>(&fft_length)->required(),
"The length of the FFT to perform on the data");
desc.add_options()("naccumulate,a",
po::value<int>(&naccumulate)->required(),
"The number of samples to integrate in each channel");
desc.add_options()("input_level", desc.add_options()("input_level",
po::value<float>(&input_level)->required(), po::value<float>(&input_level)->required(),
"The input power level (standard " "The input power level (standard "
...@@ -91,10 +105,6 @@ int main(int argc, char **argv) { ...@@ -91,10 +105,6 @@ int main(int argc, char **argv) {
"The output power level (standard " "The output power level (standard "
"deviation, used for 8-bit " "deviation, used for 8-bit "
"conversion)"); "conversion)");
desc.add_options()(
"outfile,o", po::value<std::string>(&filename)->default_value(filename),
"The output file to write spectra "
"to");
desc.add_options()( desc.add_options()(
"log_level", po::value<std::string>()->default_value("info")->notifier( "log_level", po::value<std::string>()->default_value("info")->notifier(
[](std::string level) { set_log_level(level); }), [](std::string level) { set_log_level(level); }),
...@@ -114,7 +124,12 @@ int main(int argc, char **argv) { ...@@ -114,7 +124,12 @@ int main(int argc, char **argv) {
<< desc << std::endl; << desc << std::endl;
return SUCCESS; return SUCCESS;
} }
po::notify(vm); po::notify(vm);
if (vm .count("output_type") && (!(output_type == "dada" || output_type == "file") ))
{
throw po::validation_error(po::validation_error::invalid_option_value, "output_type", output_type);
}
} catch (po::error &e) { } catch (po::error &e) {
std::cerr << "ERROR: " << e.what() << std::endl << std::endl; std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
std::cerr << desc << std::endl; std::cerr << desc << std::endl;
...@@ -129,6 +144,10 @@ int main(int argc, char **argv) { ...@@ -129,6 +144,10 @@ int main(int argc, char **argv) {
//client.cuda_register_memory(); //client.cuda_register_memory();
std::size_t buffer_bytes = client.data_buffer_size(); std::size_t buffer_bytes = client.data_buffer_size();
std::cout << "Running with output_type: " << output_type << std::endl;
if (output_type == "file")
{
SimpleFileWriter sink(filename); SimpleFileWriter sink(filename);
effelsberg::edd::GatedSpectrometer<decltype(sink)> spectrometer( effelsberg::edd::GatedSpectrometer<decltype(sink)> spectrometer(
buffer_bytes, nSideChannels, selectedSideChannel, selectedBit, buffer_bytes, nSideChannels, selectedSideChannel, selectedBit,
...@@ -137,6 +156,24 @@ int main(int argc, char **argv) { ...@@ -137,6 +156,24 @@ int main(int argc, char **argv) {
DadaInputStream<decltype(spectrometer)> istream(input_key, log, DadaInputStream<decltype(spectrometer)> istream(input_key, log,
spectrometer); spectrometer);
istream.start(); istream.start();
}
else if (output_type == "dada")
{
DadaOutputStream sink(string_to_key(filename), log);
effelsberg::edd::GatedSpectrometer<decltype(sink)> spectrometer(
buffer_bytes, nSideChannels, selectedSideChannel, selectedBit,
speadHeapSize, fft_length, naccumulate, nbits, input_level,
output_level, sink);
DadaInputStream<decltype(spectrometer)> istream(input_key, log,
spectrometer);
istream.start();
}
else
{
throw std::runtime_error("Unknown oputput-type");
}
/** /**
* End of application code * End of application code
*/ */
......
...@@ -12,5 +12,5 @@ set( ...@@ -12,5 +12,5 @@ set(
src/GatedSpectrometerTest.cu src/GatedSpectrometerTest.cu
) )
cuda_add_executable(gtest_edd ${gtest_edd_src} ) cuda_add_executable(gtest_edd ${gtest_edd_src} )
target_link_libraries(gtest_edd ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}) target_link_libraries(gtest_edd ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} -lcublas)
add_test(gtest_edd gtest_edd --test_data "${CMAKE_CURRENT_LIST_DIR}/data") add_test(gtest_edd gtest_edd --test_data "${CMAKE_CURRENT_LIST_DIR}/data")
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
namespace { namespace {
TEST(BitManipulationMacros, SetBit_TestBit) { TEST(GatedSpectrometer, BitManipulationMacros) {
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {
int64_t v = 0; int64_t v = 0;
SET_BIT(v, i); SET_BIT(v, i);
...@@ -62,6 +62,7 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -62,6 +62,7 @@ TEST(GatedSpectrometer, GatingKernel)
thrust::device_vector<float> G0(blockSize * nBlocks); thrust::device_vector<float> G0(blockSize * nBlocks);
thrust::device_vector<float> G1(blockSize * nBlocks); thrust::device_vector<float> G1(blockSize * nBlocks);
thrust::device_vector<int64_t> _sideChannelData(nBlocks); thrust::device_vector<int64_t> _sideChannelData(nBlocks);
thrust::device_vector<float> baseLine(1);
thrust::fill(G0.begin(), G0.end(), 42); thrust::fill(G0.begin(), G0.end(), 42);
thrust::fill(G1.begin(), G1.end(), 23); thrust::fill(G1.begin(), G1.end(), 23);
...@@ -69,13 +70,14 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -69,13 +70,14 @@ TEST(GatedSpectrometer, GatingKernel)
// everything to G0 // everything to G0
{ {
baseLine[0] = 0.;
const int64_t *sideCD = const int64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data())); (int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>( psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()), thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD, thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), blockSize, 0, 1, G0.size(), blockSize, 0, 1,
0); 0, thrust::raw_pointer_cast(baseLine.data()));
thrust::pair<thrust::device_vector<float>::iterator, thrust::device_vector<float>::iterator> minmax; thrust::pair<thrust::device_vector<float>::iterator, thrust::device_vector<float>::iterator> minmax;
minmax = thrust::minmax_element(G0.begin(), G0.end()); minmax = thrust::minmax_element(G0.begin(), G0.end());
EXPECT_EQ(*minmax.first, 42); EXPECT_EQ(*minmax.first, 42);
...@@ -86,8 +88,9 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -86,8 +88,9 @@ TEST(GatedSpectrometer, GatingKernel)
EXPECT_EQ(*minmax.second, 0); EXPECT_EQ(*minmax.second, 0);
} }
// everything to G1 // everything to G1 // with baseline -5
{ {
baseLine[0] = -5. * G0.size();
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L); thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
const int64_t *sideCD = const int64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data())); (int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
...@@ -95,11 +98,11 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -95,11 +98,11 @@ TEST(GatedSpectrometer, GatingKernel)
thrust::raw_pointer_cast(G0.data()), thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD, thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), blockSize, 0, 1, G0.size(), blockSize, 0, 1,
0); 0, thrust::raw_pointer_cast(baseLine.data()));
thrust::pair<thrust::device_vector<float>::iterator, thrust::device_vector<float>::iterator> minmax; thrust::pair<thrust::device_vector<float>::iterator, thrust::device_vector<float>::iterator> minmax;
minmax = thrust::minmax_element(G0.begin(), G0.end()); minmax = thrust::minmax_element(G0.begin(), G0.end());
EXPECT_EQ(*minmax.first, 0); EXPECT_EQ(*minmax.first, -5.);
EXPECT_EQ(*minmax.second, 0); EXPECT_EQ(*minmax.second, -5.);
minmax = thrust::minmax_element(G1.begin(), G1.end()); minmax = thrust::minmax_element(G1.begin(), G1.end());
EXPECT_EQ(*minmax.first, 42); EXPECT_EQ(*minmax.first, 42);
...@@ -149,6 +152,29 @@ TEST(GatedSpectrometer, countBitSet) { ...@@ -149,6 +152,29 @@ TEST(GatedSpectrometer, countBitSet) {
} }
TEST(GatedSpectrometer, array_sum) {
const size_t NBLOCKS = 16 * 32;
const size_t NTHREADS = 1024;
size_t inputLength = 1 << 22 + 1 ;
size_t dataLength = inputLength;
////zero pad input array
//if (inputLength % (2 * NTHREADS) != 0)
// dataLength = (inputLength / (2 * NTHREADS) + 1) * 2 * NTHREADS;
thrust::device_vector<float> data(dataLength);
thrust::fill(data.begin(), data.begin() + inputLength, 1);
//thrust::fill(data.begin() + inputLength, data.end(), 0);
thrust::device_vector<float> blr(NTHREADS * 2);
thrust::fill(blr.begin(), blr.end(), 0);
psrdada_cpp::effelsberg::edd::array_sum<<<NBLOCKS, NTHREADS, NTHREADS* sizeof(float)>>>(thrust::raw_pointer_cast(data.data()), data.size(), thrust::raw_pointer_cast(blr.data()));
psrdada_cpp::effelsberg::edd::array_sum<<<1, NTHREADS, NTHREADS* sizeof(float)>>>(thrust::raw_pointer_cast(blr.data()), blr.size(), thrust::raw_pointer_cast(blr.data()));
EXPECT_EQ(size_t(blr[0]), inputLength);
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment