Commit 2b332841 authored by Ewan Barr's avatar Ewan Barr
Browse files

Added support for 8-bit mode

parent d03725c3
......@@ -34,7 +34,16 @@ SimpleFFTSpectrometer<HandlerType>::SimpleFFTSpectrometer(
throw std::runtime_error("Number of samples is not multiple of FFT size");
}
if (_nbits != 12)
int n64bit_words;
if (_nbits == 12)
{
n64bit_words = 3 * _nsamps / 16;
}
else if (_nbits == 8)
{
n64bit_words = _nsamps / 8;
}
else
{
throw std::runtime_error("Only 12-bit mode is supported");
}
......@@ -44,8 +53,6 @@ SimpleFFTSpectrometer<HandlerType>::SimpleFFTSpectrometer(
cudaStreamCreate(&_d2h_stream);
_nchans = _fft_length / 2 + 1;
int n64bit_words = 3 * _nsamps / 16;
int batch = _nsamps/_fft_length;
BOOST_LOG_TRIVIAL(debug) << "Generating FFT plan";
......@@ -91,10 +98,20 @@ void SimpleFFTSpectrometer<HandlerType>::process(
cufftComplex* channelised_ptr = thrust::raw_pointer_cast(_channelised.data());
char* detected_ptr = thrust::raw_pointer_cast(detected->data());
BOOST_LOG_TRIVIAL(debug) << "Unpacking 12-bit data";
int nblocks = digitiser_raw->size() / NTHREADS_UNPACK;
kernels::unpack_edd_12bit_to_float32<<< nblocks, NTHREADS_UNPACK, 0, _proc_stream>>>(
digitiser_raw_ptr, digitiser_unpacked_ptr, digitiser_raw->size());
if (_nbits == 12)
{
BOOST_LOG_TRIVIAL(debug) << "Unpacking 12-bit data";
int nblocks = digitiser_raw->size() / NTHREADS_UNPACK;
kernels::unpack_edd_12bit_to_float32<<< nblocks, NTHREADS_UNPACK, 0, _proc_stream>>>(
digitiser_raw_ptr, digitiser_unpacked_ptr, digitiser_raw->size());
}
else if (_nbits == 8)
{
BOOST_LOG_TRIVIAL(debug) << "Unpacking 8-bit data";
int nblocks = digitiser_raw->size() / NTHREADS_UNPACK;
kernels::unpack_edd_8bit_to_float32<<< nblocks, NTHREADS_UNPACK, 0, _proc_stream>>>(
digitiser_raw_ptr, digitiser_unpacked_ptr, digitiser_raw->size());
}
BOOST_LOG_TRIVIAL(debug) << "Performing FFT";
CUFFT_ERROR_CHECK(cufftExecR2C(_fft_plan, (cufftReal*) digitiser_unpacked_ptr, channelised_ptr));
......
......@@ -93,6 +93,41 @@ void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__
}
}
__global__
void unpack_edd_8bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n)
{
/**
* Note: This kernels will not work with more than 512 threads.
*/
__shared__ volatile float tmp_out[NTHREADS_UNPACK * 8];
int block_idx = blockIdx.x;
uint64_t val;
volatile float* sout = tmp_out + (8 * threadIdx.x);
for (int idx = blockIdx.x * blockDim.x + threadIdx.x ; idx < n ; idx+=gridDim.x*blockDim.x)
{
int block_read_start = block_idx * NTHREADS_UNPACK;
val = swap64(in[block_read_start + threadIdx.x]);
sout[0] = (float)((int64_t)(( 0xFF00000000000000 & val) << 0) >> 56);
sout[1] = (float)((int64_t)(( 0x00FF000000000000 & val) << 8) >> 56);
sout[2] = (float)((int64_t)(( 0x0000FF0000000000 & val) << 16) >> 56);
sout[3] = (float)((int64_t)(( 0x000000FF00000000 & val) << 24) >> 56);
sout[4] = (float)((int64_t)(( 0x00000000FF000000 & val) << 32) >> 56);
sout[5] = (float)((int64_t)(( 0x0000000000FF0000 & val) << 40) >> 56);
sout[6] = (float)((int64_t)(( 0x000000000000FF00 & val) << 48) >> 56);
sout[7] = (float)((int64_t)(( 0x00000000000000FF & val) << 56) >> 56);
__syncthreads();
int block_write_start = block_idx * NTHREADS_UNPACK * 8;
for (int ii = threadIdx.x; ii < 8 * NTHREADS_UNPACK; ii+=blockDim.x)
{
out[block_write_start+ii] = tmp_out[ii];
}
block_idx += gridDim.x;
}
}
__global__
void detect_and_accumulate(float2* __restrict__ in, char* __restrict__ out,
int nchans, int nsamps, int naccumulate, float scale, float offset)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment