Commit 15f1eb52 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Power spectrum with new IO classes

parent e931dc38
......@@ -24,12 +24,16 @@ class DadaBufferLayout
public:
// input key of the dadad buffer
// size of spead heaps in bytes
// number of side channels
DadaBufferLayout(key_t input_key , size_t heapSize, size_t nSideChannels);
key_t getInputkey() const;
size_t getBufferSize() const;
// get size of heaps in bytes
size_t getHeapSize() const;
size_t getNSideChannels() const;
......
......@@ -41,17 +41,30 @@ typedef float IntegratedPowerType;
struct InputDataStream
/// Input data and intermediate processing data for one polarization
struct PolarizationData
{
virtual void swap() = 0;
virtual void resize(size_t _nchans, size_t _batch, const DadaBufferLayout &dadaBufferLayout) = 0;
virtual void getFromBlock(RawBytes &block, DadaBufferLayout &dadaBufferLayout, cudaStream_t &_h2d_stream) = 0;
};
DadaBufferLayout _dadaBufferLayout;
size_t _fft_length;
size_t _batch;
// a buffer contains batch * fft_length samples
PolarizationData(size_t fft_length, size_t batch, const DadaBufferLayout
&dadaBufferLayout) : _fft_length(fft_length), _batch(batch), _dadaBufferLayout(dadaBufferLayout)
{
_raw_voltage.resize(_dadaBufferLayout.sizeOfData() / sizeof(uint64_t));
BOOST_LOG_TRIVIAL(debug) << " Input voltages size (in 64-bit words): " << _raw_voltage.size();
_baseLineG0.resize(1);
_baseLineG0_update.resize(1);
_baseLineG1.resize(1);
_baseLineG1_update.resize(1);
_channelised_voltage_G0.resize((_fft_length / 2 + 1) * _batch);
_channelised_voltage_G1.resize((_fft_length / 2 + 1) * _batch);
_sideChannelData.resize(_dadaBufferLayout.getNSideChannels() * _dadaBufferLayout.getNHeaps());
BOOST_LOG_TRIVIAL(debug) << " Channelised voltages size: " << _channelised_voltage_G0.size();
};
/// Input data and intermediate processing data for one polarization
struct PolarizationData : public InputDataStream
{
/// Raw ADC Voltage
DoubleDeviceBuffer<RawVoltageType> _raw_voltage;
/// Side channel data
......@@ -79,80 +92,133 @@ struct PolarizationData : public InputDataStream
_sideChannelData.swap();
}
void resize(size_t _nchans, size_t _batch, const DadaBufferLayout &dadaBufferLayout)
{
_raw_voltage.resize(dadaBufferLayout.sizeOfData() / sizeof(uint64_t));
BOOST_LOG_TRIVIAL(debug) << " Input voltages size (in 64-bit words): " << _raw_voltage.size();
_baseLineG0.resize(1);
_baseLineG0_update.resize(1);
_baseLineG1.resize(1);
_baseLineG1_update.resize(1);
_channelised_voltage_G0.resize(_nchans * _batch);
_channelised_voltage_G1.resize(_nchans * _batch);
BOOST_LOG_TRIVIAL(debug) << " Channelised voltages size: " << _channelised_voltage_G0.size();
}
void getFromBlock(RawBytes &block, DadaBufferLayout &dadaBufferLayout, cudaStream_t &_h2d_stream)
void getFromBlock(RawBytes &block, cudaStream_t &_h2d_stream)
{
BOOST_LOG_TRIVIAL(debug) << " block.used_bytes() = " << block.used_bytes()
<< ", dataBlockBytes = " << dadaBufferLayout.sizeOfData() << "\n";
<< ", dataBlockBytes = " << _dadaBufferLayout.sizeOfData() << "\n";
CUDA_ERROR_CHECK(cudaMemcpyAsync(static_cast<void *>(_raw_voltage.a_ptr()),
static_cast<void *>(block.ptr()),
dadaBufferLayout.sizeOfData() , cudaMemcpyHostToDevice,
_dadaBufferLayout.sizeOfData() , cudaMemcpyHostToDevice,
_h2d_stream));
CUDA_ERROR_CHECK(cudaMemcpyAsync(
static_cast<void *>(_sideChannelData.a_ptr()),
static_cast<void *>(block.ptr() + dadaBufferLayout.sizeOfData() + dadaBufferLayout.sizeOfGap()),
dadaBufferLayout.sizeOfSideChannelData(), cudaMemcpyHostToDevice, _h2d_stream));
static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()),
_dadaBufferLayout.sizeOfSideChannelData(), cudaMemcpyHostToDevice, _h2d_stream));
BOOST_LOG_TRIVIAL(debug) << "First side channel item: 0x" << std::setw(16)
<< std::setfill('0') << std::hex <<
(reinterpret_cast<uint64_t*>(block.ptr() + dadaBufferLayout.sizeOfData()
+ dadaBufferLayout.sizeOfGap()))[0] <<
(reinterpret_cast<uint64_t*>(block.ptr() + _dadaBufferLayout.sizeOfData()
+ _dadaBufferLayout.sizeOfGap()))[0] <<
std::dec;
}
};
};
/// Input data and intermediate processing data for two polarizations
struct DualPolarizationData
{
DadaBufferLayout _dadaBufferLayout;
DualPolarizationData(size_t fft_length, size_t batch, const DadaBufferLayout
&dadaBufferLayout) : polarization0(fft_length, batch, dadaBufferLayout),
polarization1(fft_length, batch, dadaBufferLayout),
_dadaBufferLayout(dadaBufferLayout)
{
};
//struct DualPolarizationData : public InputDataStream
//{
// PolarizationData pol0, pol1;
// void swap()
// {
// pol0.swap(); pol1.swap();
// }
//
// void resize(size_t _nchans, size_t _batch, const DadaBufferLayout &dadaBufferLayout)
// {
// BOOST_LOG_TRIVIAL(debug) << " Pol0";
// pol0.resize(_nchans, _batch, dadaBufferLayout);
// BOOST_LOG_TRIVIAL(debug) << " Pol1";
// pol1.resize(_nchans, _batch, dadaBufferLayout);
// }
//};
PolarizationData polarization0, polarization1;
void swap()
{
polarization0.swap(); polarization1.swap();
}
void getFromBlock(RawBytes &block, cudaStream_t &_h2d_stream)
{
// Copy the data with stride to the GPU:
// CPU: P1P2P1P2P1P2 ...
// GPU: P1P1P1 ... P2P2P2 ...
// If this is a bottleneck the gating kernel could sort the layout out
// during copy
int heapsize_bytes = _dadaBufferLayout.getHeapSize();
CUDA_ERROR_CHECK(cudaMemcpy2DAsync(
static_cast<void *>(polarization0._raw_voltage.a_ptr()),
heapsize_bytes,
static_cast<void *>(block.ptr()),
2 * heapsize_bytes,
heapsize_bytes, _dadaBufferLayout.sizeOfData() / heapsize_bytes/ 2,
cudaMemcpyHostToDevice, _h2d_stream));
CUDA_ERROR_CHECK(cudaMemcpy2DAsync(
static_cast<void *>(polarization1._raw_voltage.a_ptr()),
heapsize_bytes,
static_cast<void *>(block.ptr()) + heapsize_bytes,
2 * heapsize_bytes,
heapsize_bytes, _dadaBufferLayout.sizeOfData() / heapsize_bytes/ 2,
cudaMemcpyHostToDevice, _h2d_stream));
CUDA_ERROR_CHECK(cudaMemcpy2DAsync(
static_cast<void *>(polarization0._sideChannelData.a_ptr()),
sizeof(uint64_t),
static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()),
2 * sizeof(uint64_t),
sizeof(uint64_t),
_dadaBufferLayout.sizeOfSideChannelData() / 2 / sizeof(uint64_t),
cudaMemcpyHostToDevice, _h2d_stream));
CUDA_ERROR_CHECK(cudaMemcpy2DAsync(
static_cast<void *>(polarization1._sideChannelData.a_ptr()),
sizeof(uint64_t),
static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap() + sizeof(uint64_t)),
2 * sizeof(uint64_t),
sizeof(uint64_t),
_dadaBufferLayout.sizeOfSideChannelData() / 2 / sizeof(uint64_t), cudaMemcpyHostToDevice, _h2d_stream));
BOOST_LOG_TRIVIAL(debug) << "First side channel item: 0x" << std::setw(16)
<< std::setfill('0') << std::hex <<
(reinterpret_cast<uint64_t*>(block.ptr() + _dadaBufferLayout.sizeOfData()
+ _dadaBufferLayout.sizeOfGap()))[0] << std::dec;
}
};
// Output data for one gate N = 1 for one pol, or 4 for full stokes
/// INterface for the processed output data stream
struct OutputDataStream
{
/// Reset outptu for new integration
size_t _nchans;
size_t _blocks;
OutputDataStream(size_t nchans, size_t blocks) : _nchans(nchans), _blocks(blocks)
{
}
/// Reset output to for new integration
virtual void reset(cudaStream_t &_proc_stream) = 0;
/// Swap output buffers
virtual void swap() = 0;
virtual void resize(size_t size, size_t blocks) = 0;
// output buffer on the host
DoublePinnedHostBuffer<char> _host_power;
// copy data from internal buffers of the implementation to the host output
// buffer
virtual void data2Host(cudaStream_t &_d2h_stream) = 0;
};
// Output data for one gate, single power spectrum
struct PowerSpectrumOutput
{
PowerSpectrumOutput(size_t size, size_t blocks)
{
BOOST_LOG_TRIVIAL(debug) << "Setting size of power spectrum output size = " << size << ", blocks = " << blocks;
data.resize(size * blocks);
_noOfBitSets.resize(blocks);
}
/// spectrum data
DoubleDeviceBuffer<IntegratedPowerType> data;
......@@ -162,7 +228,7 @@ struct PowerSpectrumOutput
/// Reset outptu for new integration
void reset(cudaStream_t &_proc_stream)
{
thrust::fill(thrust::cuda::par.on(_proc_stream), data.a().begin(),data.a().end(), 0.);
thrust::fill(thrust::cuda::par.on(_proc_stream), data.a().begin(), data.a().end(), 0.);
thrust::fill(thrust::cuda::par.on(_proc_stream), _noOfBitSets.a().begin(), _noOfBitSets.a().end(), 0L);
}
......@@ -172,20 +238,21 @@ struct PowerSpectrumOutput
data.swap();
_noOfBitSets.swap();
}
/// Resize all buffers
void resize(size_t size, size_t blocks)
{
data.resize(size * blocks);
_noOfBitSets.resize(blocks);
}
};
struct GatedPowerSpectrumOutput : public OutputDataStream
{
GatedPowerSpectrumOutput(size_t nchans, size_t blocks) : OutputDataStream(nchans, blocks),
G0(nchans, blocks), G1(nchans, blocks)
{
// on the host both power are stored in the same data buffer together with
// the number of bit sets
_host_power.resize( 2 * ( _nchans * sizeof(IntegratedPowerType) + sizeof(size_t) ) * G0._noOfBitSets.size());
}
PowerSpectrumOutput G0, G1;
size_t _nchans;
void reset(cudaStream_t &_proc_stream)
{
......@@ -201,19 +268,6 @@ struct GatedPowerSpectrumOutput : public OutputDataStream
_host_power.swap();
}
/// Resize all buffers
void resize(size_t size, size_t blocks)
{
// ToDo: size passed in constructor, also number of blocks.
G0.resize(size, blocks);
G1.resize(size, blocks);
_nchans = size;
// on the host both power are stored in the same data buffer together with
// the number of bit sets
_host_power.resize( 2 * ( size * sizeof(IntegratedPowerType) + sizeof(size_t) ) * G0._noOfBitSets.size());
}
void data2Host(cudaStream_t &_d2h_stream)
{
// copy data to host if block is finished
......@@ -301,9 +355,11 @@ struct FullStokesOutput : public OutputDataStream
}
};
/*
struct GatedFullStokesOutput: public OutputDataStream
{
FullStokesOutput G0, G1;
void reset(cudaStream_t &_proc_stream)
{
......@@ -324,8 +380,112 @@ struct GatedFullStokesOutput: public OutputDataStream
G0.resize(size, blocks);
G1.resize(size, blocks);
}
};
void data2Host(cudaStream_t &_d2h_stream)
{
for (size_t i = 0; i < G0._noOfBitSets.size(); i++)
{
size_t memslicesize = (_nchans * sizeof(IntegratedPowerType));
size_t memOffset = 8 * i * (memslicesize + + sizeof(size_t));
// Copy II QQ UU VV
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power.a_ptr() + memOffset) ,
static_cast<void *>(G0.I.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power_db.a_ptr() + memOffset + 1 * memslicesize) ,
static_cast<void *>(G1.I.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power_db.a_ptr() + memOffset + 2 * memslicesize) ,
static_cast<void *>(G0.Q.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power_db.a_ptr() + memOffset + 3 * memslicesize) ,
static_cast<void *>(G1.Q.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power_db.a_ptr() + memOffset + 4 * memslicesize) ,
static_cast<void *>(G0.U.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power_db.a_ptr() + memOffset + 5 * memslicesize) ,
static_cast<void *>(G1.U.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power_db.a_ptr() + memOffset + 6 * memslicesize) ,
static_cast<void *>(G0.V.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync(static_cast<void *>(_host_power_db.a_ptr() + memOffset + 7 * memslicesize) ,
static_cast<void *>(G1.V.b_ptr() + i * memslicesize),
_nchans * sizeof(IntegratedPowerType),
cudaMemcpyDeviceToHost, _d2h_stream));
// Copy SCI
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize),
static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize + 1 * sizeof(size_t)),
static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize + 2 * sizeof(size_t)),
static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize + 3 * sizeof(size_t)),
static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize + 4 * sizeof(size_t)),
static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize + 5 * sizeof(size_t)),
static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize + 6 * sizeof(size_t)),
static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 8 * memslicesize + 7 * sizeof(size_t)),
static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
}
}
};
*/
......@@ -335,7 +495,10 @@ struct GatedFullStokesOutput: public OutputDataStream
bit set in side channel data.
*/
template <class HandlerType > class GatedSpectrometer {
template <class HandlerType,
class InputType,
class OutputType
> class GatedSpectrometer {
public:
/**
* @brief Constructor
......@@ -384,6 +547,15 @@ public:
*/
bool operator()(RawBytes &block);
// Processing strategy for single pol mode
void process(PolarizationData *inputDataStream, GatedPowerSpectrumOutput *outputDataStream);
// Processing strategy for dual pol power mode
//void process(DualPolarizationData*inputDataStream, GatedPowerSpectrumOutput *outputDataStream);
// Processing strategy for full stokes mode
//void process(DualPolarizationData*inputDataStream, FullStokesOutput *outputDataStream);
private:
// gate the data from the input stream and fft data per gate. Number of bit
// sets in every gate is stored in the output datasets
......@@ -408,22 +580,17 @@ private:
uint64_t _nchans;
uint64_t _nBlocks;
uint64_t _call_count;
double _processing_efficiency;
std::unique_ptr<Unpacker> _unpacker;
OutputDataStream* outputDataStream;
InputDataStream* inputDataStream;
std::unique_ptr<DetectorAccumulator<IntegratedPowerType> > _detector;
OutputType* outputDataStream;
InputType* inputDataStream;
// Temporary processing block
// ToDo: Use inplace FFT to avoid temporary coltage array
// ToDo: Use inplace FFT to avoid temporary voltage array
thrust::device_vector<UnpackedVoltageType> _unpacked_voltage_G0;
thrust::device_vector<UnpackedVoltageType> _unpacked_voltage_G1;
cudaStream_t _h2d_stream;
cudaStream_t _proc_stream;
cudaStream_t _d2h_stream;
......
......@@ -17,7 +17,7 @@ namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
// Reduce thread local vatiable v in shared array x, so that x[0]
// Reduce thread local vatiable v in shared array x, so that x[0] contains sum
template<typename T>
__device__ void sum_reduce(T *x, const T &v)
{
......@@ -32,9 +32,10 @@ __device__ void sum_reduce(T *x, const T &v)
}
// If one of the side channel items is lsot, then both are considered as lost
// If one of the side channel items is lost, then both are considered as lost
// here
__global__ void mergeSideChannels(uint64_t* __restrict__ A, uint64_t* __restrict__ B, size_t N)
__global__ void mergeSideChannels(uint64_t* __restrict__ A, uint64_t*
__restrict__ B, size_t N)
{
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N);
i += blockDim.x * gridDim.x)
......@@ -146,21 +147,16 @@ __global__ void update_baselines(float* __restrict__ baseLineG0,
template <class HandlerType>
GatedSpectrometer<HandlerType>::GatedSpectrometer(
const DadaBufferLayout &dadaBufferLayout,
std::size_t selectedSideChannel, std::size_t selectedBit, std::size_t fft_length, std::size_t naccumulate,
std::size_t nbits, float input_level, float output_level,
HandlerType &handler) : _dadaBufferLayout(dadaBufferLayout),
_selectedSideChannel(selectedSideChannel), _selectedBit(selectedBit),
_fft_length(fft_length),
_naccumulate(naccumulate), _nbits(nbits), _handler(handler), _fft_plan(0),
_call_count(0), _nsamps_per_heap(4096), _processing_efficiency(0.){
template <class HandlerType, class InputType, class OutputType>
GatedSpectrometer<HandlerType, InputType, OutputType>::GatedSpectrometer(
const DadaBufferLayout &dadaBufferLayout, std::size_t selectedSideChannel,
std::size_t selectedBit, std::size_t fft_length, std::size_t naccumulate,
std::size_t nbits, float input_level, float output_level, HandlerType
&handler) : _dadaBufferLayout(dadaBufferLayout),
_selectedSideChannel(selectedSideChannel), _selectedBit(selectedBit),
_fft_length(fft_length), _naccumulate(naccumulate), _nbits(nbits),
_handler(handler), _fft_plan(0), _call_count(0), _nsamps_per_heap(4096)
{
// Sanity checks
assert(((_nbits == 12) || (_nbits == 8)));
......@@ -184,7 +180,6 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
assert(selectedBit < 64); // Sanity check of selected bit
_nsamps_per_buffer = _dadaBufferLayout.sizeOfData() * 8 / nbits;
_nsamps_per_output_spectra = fft_length * naccumulate;
if (_nsamps_per_output_spectra <= _nsamps_per_buffer)
{ // one buffer block is used for one or multiple output spectra
......@@ -200,10 +195,12 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
assert(N * _nsamps_per_buffer == _nsamps_per_output_spectra);
_nBlocks = N;
}
BOOST_LOG_TRIVIAL(debug) << "Integrating " << _nsamps_per_output_spectra << " samples from " << _nBlocks << " into one spectra.";
BOOST_LOG_TRIVIAL(debug) << "Integrating " << _nsamps_per_output_spectra <<
" samples from " << _nBlocks << " into one output spectrum.";
_nchans = _fft_length / 2 + 1;
int batch = _nsamps_per_buffer / _fft_length;
float dof = 2 * _naccumulate;
float scale =
std::pow(input_level * std::sqrt(static_cast<float>(_nchans)), 2);
......@@ -213,27 +210,28 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
<< "Correction factors for 8-bit conversion: offset = " << offset
<< ", scaling = " << scaling;
BOOST_LOG_TRIVIAL(debug) << "Generating FFT plan";
int n[] = {static_cast<int>(_fft_length)};
BOOST_LOG_TRIVIAL(debug) << "Generating FFT plan: \n"
<< " fft_length = " << _fft_length << "\n"
<< " n[0] = " << n[0] << "\n"
<< " _nchans = " << _nchans << "\n"
<< " batch = " << batch << "\n";
;
CUFFT_ERROR_CHECK(cufftPlanMany(&_fft_plan, 1, n, NULL, 1, _fft_length, NULL,
1, _nchans, CUFFT_R2C, batch));
cufftSetStream(_fft_plan, _proc_stream);
BOOST_LOG_TRIVIAL(debug) << "Allocating memory";
// if singlePol
inputDataStream = new PolarizationData();
inputDataStream->resize(_nchans, batch, _dadaBufferLayout);
inputDataStream = new InputType(fft_length, batch, _dadaBufferLayout);
_unpacked_voltage_G0.resize(_nsamps_per_buffer);
_unpacked_voltage_G1.resize(_nsamps_per_buffer);
BOOST_LOG_TRIVIAL(debug) << " Unpacked voltages size (in samples): "
<< _unpacked_voltage_G0.size();
outputDataStream = new GatedPowerSpectrumOutput();
outputDataStream->resize(_nchans, batch / (_naccumulate / _nBlocks));
outputDataStream = new OutputType(_nchans, batch / (_naccumulate / _nBlocks));
CUDA_ERROR_CHECK(cudaStreamCreate(&_h2d_stream));
CUDA_ERROR_CHECK(cudaStreamCreate(&_proc_stream));
......@@ -241,14 +239,13 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
CUFFT_ERROR_CHECK(cufftSetStream(_fft_plan, _proc_stream));
_unpacker.reset(new Unpacker(_proc_stream));
_detector.reset(new DetectorAccumulator<IntegratedPowerType>(_nchans, _naccumulate / _nBlocks, scaling,
offset, _proc_stream));
} // constructor
template <class HandlerType>
GatedSpectrometer<HandlerType>::~GatedSpectrometer() {
template <class HandlerType, class InputType, class OutputType>
GatedSpectrometer<HandlerType, InputType, OutputType>::~GatedSpectrometer() {
BOOST_LOG_TRIVIAL(debug) << "Destroying GatedSpectrometer";
cudaDeviceSynchronize();
if (!_fft_plan)
cufftDestroy(_fft_plan);
cudaStreamDestroy(_h2d_stream);
......@@ -257,14 +254,14 @@ GatedSpectrometer<HandlerType>::~GatedSpectrometer() {
}