Commit ce3445f8 authored by Ewan Barr's avatar Ewan Barr
Browse files

added float to char conversion to accumulate function

parent 1f99b501
......@@ -93,17 +93,16 @@ void SimpleFFTSpectrometer<HandlerType>::init(RawBytes& block)
_handler.init(block);
}
template <class HandlerType>
void SimpleFFTSpectrometer<HandlerType>::process(
thrust::device_vector<uint64_t>* digitiser_raw,
thrust::device_vector<float>* detected)
thrust::device_vector<char>* detected)
{
uint64_t* digitiser_raw_ptr = thrust::raw_pointer_cast(digitiser_raw->data());
float* digitiser_unpacked_ptr = thrust::raw_pointer_cast(_edd_unpacked.data());
cufftComplex* channelised_ptr = thrust::raw_pointer_cast(_channelised.data());
float* detected_ptr = thrust::raw_pointer_cast(detected->data());
char* detected_ptr = thrust::raw_pointer_cast(detected->data());
BOOST_LOG_TRIVIAL(debug) << "Unpacking 12-bit data";
int nblocks = digitiser_raw->size() / NTHREADS_UNPACK;
......@@ -114,7 +113,8 @@ void SimpleFFTSpectrometer<HandlerType>::process(
CUFFT_ERROR_CHECK(cufftExecR2C(_fft_plan, (cufftReal*) digitiser_unpacked_ptr, channelised_ptr));
BOOST_LOG_TRIVIAL(debug) << "Detecting and accumulating";
kernels::detect_and_accumulate<<<1024, 1024, 0, _proc_stream>>>(channelised_ptr, detected_ptr, _nchans, _nsamps/_fft_length, _naccumulate);
kernels::detect_and_accumulate<<<1024, 1024, 0, _proc_stream>>>(channelised_ptr, detected_ptr,
_nchans, _nsamps/_fft_length, _naccumulate, 1.0f, 0.0f);
}
template <class HandlerType>
......@@ -144,13 +144,13 @@ bool SimpleFFTSpectrometer<HandlerType>::operator()(RawBytes& block)
cudaMemcpyAsync((char*) thrust::raw_pointer_cast(_detected_host_current->data()),
(char*) thrust::raw_pointer_cast(_detected_previous->data()),
_detected_previous->size() * sizeof(float),
_detected_previous->size() * sizeof(char),
cudaMemcpyDeviceToHost, _d2h_stream);
//Wrap _detected_host_previous in a RawBytes object here;
RawBytes bytes((char*) thrust::raw_pointer_cast(_detected_host_previous->data()),
_detected_host_previous->size() * sizeof(float),
_detected_host_previous->size() * sizeof(float));
_detected_host_previous->size() * sizeof(char),
_detected_host_previous->size() * sizeof(char));
BOOST_LOG_TRIVIAL(debug) << "Calling handler";
CUDA_ERROR_CHECK(cudaStreamSynchronize(_h2d_stream));
......
......@@ -18,7 +18,8 @@ namespace kernels {
void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n);
__global__
void detect_and_accumulate(cufftComplex* __restrict__ in, float* __restrict__ out, int nchans, int nsamps, int naccumulate);
void detect_and_accumulate(cufftComplex* __restrict__ in, char* __restrict__ out,
int nchans, int nsamps, int naccumulate, float scale, float offset);
} //kernels
......@@ -58,7 +59,7 @@ public:
private:
void process(thrust::device_vector<uint64_t>* digitiser_raw,
thrust::device_vector<float>* detected);
thrust::device_vector<char>* detected);
private:
int _nsamps;
......@@ -78,16 +79,15 @@ private:
thrust::device_vector<uint64_t>* _edd_raw_current;
thrust::device_vector<uint64_t>* _edd_raw_previous;
thrust::device_vector<float> _detected_a;
thrust::device_vector<float> _detected_b;
thrust::device_vector<float>* _detected_current;
thrust::device_vector<float>* _detected_previous;
thrust::host_vector<float, thrust::system::cuda::experimental::pinned_allocator<float> > _detected_host_a;
thrust::host_vector<float, thrust::system::cuda::experimental::pinned_allocator<float> > _detected_host_b;
thrust::host_vector<float, thrust::system::cuda::experimental::pinned_allocator<float> >* _detected_host_current;
thrust::host_vector<float, thrust::system::cuda::experimental::pinned_allocator<float> >* _detected_host_previous;
thrust::device_vector<char> _detected_a;
thrust::device_vector<char> _detected_b;
thrust::device_vector<char>* _detected_current;
thrust::device_vector<char>* _detected_previous;
thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char> > _detected_host_a;
thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char> > _detected_host_b;
thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char> >* _detected_host_current;
thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char> >* _detected_host_previous;
cudaStream_t _h2d_stream;
cudaStream_t _proc_stream;
......
......@@ -94,7 +94,8 @@ void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__
}
__global__
void detect_and_accumulate(float2* __restrict__ in, float* __restrict__ out, int nchans, int nsamps, int naccumulate)
void detect_and_accumulate(float2* __restrict__ in, char* __restrict__ out,
int nchans, int nsamps, int naccumulate, float scale, float offset)
{
for (int block_idx = blockIdx.x; block_idx < nsamps/naccumulate; block_idx+=gridDim.x)
{
......@@ -110,7 +111,7 @@ void detect_and_accumulate(float2* __restrict__ in, float* __restrict__ out, int
float y = tmp.y * tmp.y;
sum += x + y;
}
out[write_offset + chan_idx] = sum / naccumulate;
out[write_offset + chan_idx] = (char) ((sum / naccumulate) - offset)/scale;
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment