Skip to content
Snippets Groups Projects
Commit 845dfb09 authored by Ewan Barr's avatar Ewan Barr
Browse files

dirty commit for debugging in docker

parent fa992f64
No related branches found
No related tags found
No related merge requests found
add_subdirectory(edd)
\ No newline at end of file
if(ENABLE_CUDA)
set(PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES
${CMAKE_PROJECT_NAME}
${CMAKE_PROJECT_NAME}_effelsberg_edd
${DEPENDENCY_LIBRARIES})
set(psrdada_cpp_effelsberg_edd_src
src/edd_simple_fft_spectrometer.cu
)
cuda_add_library(${CMAKE_PROJECT_NAME}_effelsberg_edd ${psrdada_cpp_effelsberg_edd_src})
#simple FFT spectrometer test
cuda_add_executable(edd_specpol src/edd_simple_fft_spectrometer_test.cu)
target_link_libraries(edd_specpol ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES})
install(TARGETS edd_specpol DESTINATION bin)
endif(ENABLE_CUDA)
#ifndef PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#define PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#include "psrdada_cpp/effelsberg/edd/"
#include "psrdada_cpp/raw_bytes.hpp"
#include "thrust/device_vector.h"
#include "thrust/host_vector.h"
#include "cufft.h"
namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
namespace kernels {
__global__
void unpack_edd_12bit_to_float32(char* __restrict__ in, float* __restrict__ out, int n);
} //kernels
template <class HandlerType>
class SimpleFFTSpectrometer
{
public:
SimpleFFTSpectrometer(
std::size_t fft_length,
std::size_t naccumulate,
std::size_t nbits,
HandlerType& handler);
~SimpleFFTSpectrometer();
/**
* @brief A callback to be called on connection
* to a ring buffer.
*
* @detail The first available header block in the
* in the ring buffer is provided as an argument.
* It is here that header parameters could be read
* if desired.
*
* @param block A RawBytes object wrapping a DADA header buffer
*/
void init(RawBytes& block);
/**
* @brief A callback to be called on acqusition of a new
* data block.
*
* @param block A RawBytes object wrapping a DADA data buffer
*/
bool operator()(RawBytes& block);
private:
std::size_t _fft_length;
std::size_t _naccumulate;
std::size_t _nbits;
HandlerType& _handler;
bool _first_block;
std::size_t _nsamps;
cufftHandle _fft_plan;
thrust::device_vector<uint64_t> _edd_raw;
thrust::device_vector<float> _edd_unpacked;
thrust::device_vector<cufftComplex> _channelised;
thrust::device_vector<float> _detected;
};
} //edd
} //effelsberg
} //psrdada_cpp
#include "psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu"
#endif //PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
\ No newline at end of file
#ifndef PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#define PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#include "psrdada_cpp/effelsberg/edd/edd_simple_fft_spectrometer.cuh"
namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
template <class HandlerType>
SimpleFFTSpectrometer<HandlerType>::SimpleFFTSpectrometer(
std::size_t fft_length,
std::size_t naccumulate,
std::size_t nbits,
HandlerType& handler)
: _fft_length(fft_length)
, _naccumulate(naccumulate)
, _nbits(nbits)
, _handler(handler)
, _first_block(true)
, _nsamps(0)
, _fft_plan(NULL)
{
}
template <class HandlerType>
SimpleFFTSpectrometer<HandlerType>::~SimpleFFTSpectrometer()
{
if (!_first_block)
cufftDestroy(_fft_plan);
}
template <class HandlerType>
void SimpleFFTSpectrometer<HandlerType>::init(RawBytes& block)
{
}
template <class HandlerType>
bool SimpleFFTSpectrometer<HandlerType>::operator()(RawBytes& block)
{
std::size_t nsamps_in_block = 8 * block.used_bytes() / _nbits;
std::size_t nchans = _fft_length / 2 + 1;
if (_first_block)
{
_nsamps = nsamps_in_block;
std::size_t n64bit_words = 3 * _nsamps / 16;
if (_nsamps % _fft_length != 0)
{
throw std::runtime_error("Number of samples is not multiple of FFT size");
}
std::size_t batch = _nsamps/_fft_length;
// Only do these things once
CUFFT_ERROR_CHECK(cufftPlanMany(&_fft_plan, 1, _fft_length, 0, 1,
_fft_length, 1, 1, _fft_length, CUFFT_R2C, batch));
_edd_raw.resize(n64bit_words);
_edd_unpacked.resize(_nsamps);
_channelised.resize(nchans * batch);
_detected.resize(nchans * batch / _naccumulate);
_first_block = false;
}
if (_nsamps != nsamps_in_block)
{
throw std::runtime_error("Received incomplete block");
}
uint64_t* _edd_raw_ptr = thrust::raw_pointer_cast(_edd_raw.data());
float* _edd_unpacked_ptr = thrust::raw_pointer_cast(_edd_unpacked.data());
//Copy DADA buffer to GPU
thrust::copy(block.ptr(), block.ptr()+block.used_bytes(), (char*) _edd_raw_ptr);
if (_nbits == 12)
{
unpack_edd_12bit_to_float32<<< 64, 1024>>>(_edd_raw_ptr, _edd_unpacked_ptr, _edd_raw.size());
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
}
else if (_nbits == 8)
{
throw std::runtime_error("Only 12-bit mode supported");
}
else
{
throw std::runtime_error("Only 12-bit mode supported");
}
cufftComplex* _channelised_ptr = thrust::raw_pointer_cast(_channelised.data());
CUFFT_ERROR_CHECK(cufftExecuteR2C(_fft_plan, (cufftReal*)_edd_unpacked_ptr, _channelised_ptr, CUFFT_FORWARD));
//thrust::copy(_edd_unpacked.begin(), _edd_unpacked.end(), block.ptr());
//_handler(block);
}
} //edd
} //effelsberg
} //psrdada_cpp
#include "psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu"
#endif //PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
\ No newline at end of file
#include "psrdada_cpp/common.hpp"
#include "psrdada_cpp/cuda_utils.hpp"
#define BSWAP64(x) ((0xFF00000000000000 & x) >> 56) | \
((0x00FF000000000000 & x) >> 40) | \
((0x0000FF0000000000 & x) >> 24) | \
((0x000000FF00000000 & x) >> 8) | \
((0x00000000FF000000 & x) << 8) | \
((0x0000000000FF0000 & x) << 24) | \
((0x000000000000FF00 & x) << 40) | \
((0x00000000000000FF & x) << 56)
namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
namespace kernels {
__device__ __forceinline__ uint64_t swap64(uint64_t x)
{
uint64_t result;
uint2 t;
asm("mov.b64 {%0,%1},%2; \n\t"
: "=r"(t.x), "=r"(t.y) : "l"(x));
t.x = __byte_perm(t.x, 0, 0x0123);
t.y = __byte_perm(t.y, 0, 0x0123);
asm("mov.b64 %0,{%1,%2}; \n\t"
: "=l"(result) : "r"(t.y), "r"(t.x));
return result;
}
__global__
void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n)
{
for (int idx = blockIdx.x * blockDim.x + threadIdx.x ; (3*idx+2) < n ; idx+=gridDim.x*blockDim.x)
{
uint64_t val;
uint64_t rest;
int read_idx = 3*idx;
int write_idx = 16*idx;
float* sout = out+write_idx;
val = swap64(in[read_idx]);
sout[0] = (float)((int64_t)(( 0xFFF0000000000000 & val) << 0) >> 52);
sout[1] = (float)((int64_t)(( 0x000FFF0000000000 & val) << 12) >> 52);
sout[2] = (float)((int64_t)(( 0x000000FFF0000000 & val) << 24) >> 52);
sout[3] = (float)((int64_t)(( 0x000000000FFF0000 & val) << 36) >> 52);
sout[4] = (float)((int64_t)(( 0x000000000000FFF0 & val) << 48) >> 52);
rest = ( 0x000000000000000F & val) << 60;
val = swap64(in[read_idx+1]);
sout[5] = (float)((int64_t)((( 0xFF00000000000000 & val) >> 4) | rest) >> 52);
sout[6] = (float)((int64_t)(( 0x00FFF00000000000 & val) << 8) >> 52);
sout[7] = (float)((int64_t)(( 0x00000FFF00000000 & val) << 20) >> 52);
sout[8] = (float)((int64_t)(( 0x00000000FFF00000 & val) << 32) >> 52);
sout[9] = (float)((int64_t)(( 0x00000000000FFF00 & val) << 44) >> 52);
rest = ( 0x00000000000000FF & val) << 56;
val = swap64(in[read_idx+2]);
sout[10] = (float)((int64_t)((( 0xF000000000000000 & val) >> 8) | rest) >> 52);
sout[11] = (float)((int64_t)(( 0x0FFF000000000000 & val) << 4) >> 52);
sout[12] = (float)((int64_t)(( 0x0000FFF000000000 & val) << 16) >> 52);
sout[13] = (float)((int64_t)(( 0x0000000FFF000000 & val) << 28) >> 52);
sout[14] = (float)((int64_t)(( 0x0000000000FFF000 & val) << 40) >> 52);
}
}
} //kernels
namespace test
{
void unpack_edd_12bit_to_float32_cpu(thrust::host_vector<uint64_t>const& input, thrust::host_vector<float>& output)
{
uint64_t val;
uint64_t rest;
output.reserve(16 * input.size() / 3);
for (int ii=0; ii<input.size(); ii+=3)
{
int idx = ii;
val = be64toh(input[idx]);
output.push_back( (float)((int64_t)(( 0xFFF0000000000000 & val) << 0) >> 52));
output.push_back( (float)((int64_t)(( 0x000FFF0000000000 & val) << 12) >> 52));
output.push_back( (float)((int64_t)(( 0x000000FFF0000000 & val) << 24) >> 52));
output.push_back( (float)((int64_t)(( 0x000000000FFF0000 & val) << 36) >> 52));
output.push_back( (float)((int64_t)(( 0x000000000000FFF0 & val) << 48) >> 52));
rest = ( 0x000000000000000F & val) << 60;
val = be64toh(input[++idx]);
output.push_back( (float)((int64_t)((( 0xFF00000000000000 & val) >> 4) | rest) >> 52));
output.push_back( (float)((int64_t)(( 0x00FFF00000000000 & val) << 8) >> 52));
output.push_back( (float)((int64_t)(( 0x00000FFF00000000 & val) << 20) >> 52));
output.push_back( (float)((int64_t)(( 0x00000000FFF00000 & val) << 32) >> 52));
output.push_back( (float)((int64_t)(( 0x00000000000FFF00 & val) << 44) >> 52));
rest = ( 0x00000000000000FF & val) << 56;
val = be64toh(input[++idx]);
output.push_back( (float)((int64_t)((( 0xF000000000000000 & val) >> 8) | rest) >> 52));
output.push_back( (float)((int64_t)(( 0x0FFF000000000000 & val) << 4) >> 52));
output.push_back( (float)((int64_t)(( 0x0000FFF000000000 & val) << 16) >> 52));
output.push_back( (float)((int64_t)(( 0x0000000FFF000000 & val) << 28) >> 52));
output.push_back( (float)((int64_t)(( 0x0000000000FFF000 & val) << 40) >> 52));
output.push_back( (float)((int64_t)(( 0x0000000000000FFF & val) << 52) >> 52));
}
}
/*
void unpack_edd_12bit_to_float32_test()
{
int nvalues = 1600;
int nlongs = 3 * nvalues / 16;
int nthreads = nlongs/3;
int nblocks = 1;
int smem_size = nthreads * 16 * sizeof(float);
thrust::host_vector<uint64_t> input;
for (int val=0; val<nlongs; ++val)
{
input.push_back(val);
}
// Run CUDA version
thrust::device_vector<uint64_t> input_d = input;
thrust::device_vector<float> output_d(nvalues);
uint64_t* input_ptr = thrust::raw_pointer_cast(input_d.data());
float* output_ptr = thrust::raw_pointer_cast(output_d.data())
unpack_edd_12bit_to_float32<<<nblocks, nthreads, smem_size, 0>>>(input_ptr, output_ptr, nlongs);
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
thrust::host_vector<float> output = output_d;
// Run host version
thrust::host_vector<float> test_output;
unpack_edd_12bit_to_float32_cpu(input, test_output);
for (int ii=0; ii<nvalues; ++ii)
{
if (output[ii] != test_output[ii])
{
printf("Error at index %d -> %f != %f \n", ii, output[ii], test_output[ii]);
}
}
}
*/
} //test
} //edd
} //effelsberg
} //psrdada_cpp
\ No newline at end of file
#include "psrdada_cpp/common.h"
#include "psrdada_cpp/raw_bytes.h"
#include "psrdada_cpp/effelsberg/ebb/edd_simple_fft_spectrometer.cuh"
#include "thrust/host_vector.h"
int main()
{
std::size_t size = 4096 * 12 * 4096 / 8
thrust::host_vector<char> data;
data.resize(size);
RawBytes dada_input(data.data(), data.size());
SimpleFFTSpectrometer spectrometer(8192, 1, 12, NULL);
for (int ii=0; ii<100; ++ii)
{
spectrometer(dada_input);
}
return 0;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment