Commit 845dfb09 authored by Ewan Barr's avatar Ewan Barr
Browse files

dirty commit for debugging in docker

parent fa992f64
add_subdirectory(edd)
\ No newline at end of file
if(ENABLE_CUDA)
set(PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES
${CMAKE_PROJECT_NAME}
${CMAKE_PROJECT_NAME}_effelsberg_edd
${DEPENDENCY_LIBRARIES})
set(psrdada_cpp_effelsberg_edd_src
src/edd_simple_fft_spectrometer.cu
)
cuda_add_library(${CMAKE_PROJECT_NAME}_effelsberg_edd ${psrdada_cpp_effelsberg_edd_src})
#simple FFT spectrometer test
cuda_add_executable(edd_specpol src/edd_simple_fft_spectrometer_test.cu)
target_link_libraries(edd_specpol ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES})
install(TARGETS edd_specpol DESTINATION bin)
endif(ENABLE_CUDA)
#ifndef PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#define PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#include "psrdada_cpp/effelsberg/edd/"
#include "psrdada_cpp/raw_bytes.hpp"
#include "thrust/device_vector.h"
#include "thrust/host_vector.h"
#include "cufft.h"
namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
namespace kernels {
__global__
void unpack_edd_12bit_to_float32(char* __restrict__ in, float* __restrict__ out, int n);
} //kernels
template <class HandlerType>
class SimpleFFTSpectrometer
{
public:
SimpleFFTSpectrometer(
std::size_t fft_length,
std::size_t naccumulate,
std::size_t nbits,
HandlerType& handler);
~SimpleFFTSpectrometer();
/**
* @brief A callback to be called on connection
* to a ring buffer.
*
* @detail The first available header block in the
* in the ring buffer is provided as an argument.
* It is here that header parameters could be read
* if desired.
*
* @param block A RawBytes object wrapping a DADA header buffer
*/
void init(RawBytes& block);
/**
* @brief A callback to be called on acqusition of a new
* data block.
*
* @param block A RawBytes object wrapping a DADA data buffer
*/
bool operator()(RawBytes& block);
private:
std::size_t _fft_length;
std::size_t _naccumulate;
std::size_t _nbits;
HandlerType& _handler;
bool _first_block;
std::size_t _nsamps;
cufftHandle _fft_plan;
thrust::device_vector<uint64_t> _edd_raw;
thrust::device_vector<float> _edd_unpacked;
thrust::device_vector<cufftComplex> _channelised;
thrust::device_vector<float> _detected;
};
} //edd
} //effelsberg
} //psrdada_cpp
#include "psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu"
#endif //PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
\ No newline at end of file
#ifndef PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#define PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
#include "psrdada_cpp/effelsberg/edd/edd_simple_fft_spectrometer.cuh"
namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
template <class HandlerType>
SimpleFFTSpectrometer<HandlerType>::SimpleFFTSpectrometer(
std::size_t fft_length,
std::size_t naccumulate,
std::size_t nbits,
HandlerType& handler)
: _fft_length(fft_length)
, _naccumulate(naccumulate)
, _nbits(nbits)
, _handler(handler)
, _first_block(true)
, _nsamps(0)
, _fft_plan(NULL)
{
}
template <class HandlerType>
SimpleFFTSpectrometer<HandlerType>::~SimpleFFTSpectrometer()
{
if (!_first_block)
cufftDestroy(_fft_plan);
}
template <class HandlerType>
void SimpleFFTSpectrometer<HandlerType>::init(RawBytes& block)
{
}
template <class HandlerType>
bool SimpleFFTSpectrometer<HandlerType>::operator()(RawBytes& block)
{
std::size_t nsamps_in_block = 8 * block.used_bytes() / _nbits;
std::size_t nchans = _fft_length / 2 + 1;
if (_first_block)
{
_nsamps = nsamps_in_block;
std::size_t n64bit_words = 3 * _nsamps / 16;
if (_nsamps % _fft_length != 0)
{
throw std::runtime_error("Number of samples is not multiple of FFT size");
}
std::size_t batch = _nsamps/_fft_length;
// Only do these things once
CUFFT_ERROR_CHECK(cufftPlanMany(&_fft_plan, 1, _fft_length, 0, 1,
_fft_length, 1, 1, _fft_length, CUFFT_R2C, batch));
_edd_raw.resize(n64bit_words);
_edd_unpacked.resize(_nsamps);
_channelised.resize(nchans * batch);
_detected.resize(nchans * batch / _naccumulate);
_first_block = false;
}
if (_nsamps != nsamps_in_block)
{
throw std::runtime_error("Received incomplete block");
}
uint64_t* _edd_raw_ptr = thrust::raw_pointer_cast(_edd_raw.data());
float* _edd_unpacked_ptr = thrust::raw_pointer_cast(_edd_unpacked.data());
//Copy DADA buffer to GPU
thrust::copy(block.ptr(), block.ptr()+block.used_bytes(), (char*) _edd_raw_ptr);
if (_nbits == 12)
{
unpack_edd_12bit_to_float32<<< 64, 1024>>>(_edd_raw_ptr, _edd_unpacked_ptr, _edd_raw.size());
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
}
else if (_nbits == 8)
{
throw std::runtime_error("Only 12-bit mode supported");
}
else
{
throw std::runtime_error("Only 12-bit mode supported");
}
cufftComplex* _channelised_ptr = thrust::raw_pointer_cast(_channelised.data());
CUFFT_ERROR_CHECK(cufftExecuteR2C(_fft_plan, (cufftReal*)_edd_unpacked_ptr, _channelised_ptr, CUFFT_FORWARD));
//thrust::copy(_edd_unpacked.begin(), _edd_unpacked.end(), block.ptr());
//_handler(block);
}
} //edd
} //effelsberg
} //psrdada_cpp
#include "psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu"
#endif //PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
\ No newline at end of file
#include "psrdada_cpp/common.hpp"
#include "psrdada_cpp/cuda_utils.hpp"
#define BSWAP64(x) ((0xFF00000000000000 & x) >> 56) | \
((0x00FF000000000000 & x) >> 40) | \
((0x0000FF0000000000 & x) >> 24) | \
((0x000000FF00000000 & x) >> 8) | \
((0x00000000FF000000 & x) << 8) | \
((0x0000000000FF0000 & x) << 24) | \
((0x000000000000FF00 & x) << 40) | \
((0x00000000000000FF & x) << 56)
namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
namespace kernels {
__device__ __forceinline__ uint64_t swap64(uint64_t x)
{
uint64_t result;
uint2 t;
asm("mov.b64 {%0,%1},%2; \n\t"
: "=r"(t.x), "=r"(t.y) : "l"(x));
t.x = __byte_perm(t.x, 0, 0x0123);
t.y = __byte_perm(t.y, 0, 0x0123);
asm("mov.b64 %0,{%1,%2}; \n\t"
: "=l"(result) : "r"(t.y), "r"(t.x));
return result;
}
__global__
void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n)
{
for (int idx = blockIdx.x * blockDim.x + threadIdx.x ; (3*idx+2) < n ; idx+=gridDim.x*blockDim.x)
{
uint64_t val;
uint64_t rest;
int read_idx = 3*idx;
int write_idx = 16*idx;
float* sout = out+write_idx;
val = swap64(in[read_idx]);
sout[0] = (float)((int64_t)(( 0xFFF0000000000000 & val) << 0) >> 52);
sout[1] = (float)((int64_t)(( 0x000FFF0000000000 & val) << 12) >> 52);
sout[2] = (float)((int64_t)(( 0x000000FFF0000000 & val) << 24) >> 52);
sout[3] = (float)((int64_t)(( 0x000000000FFF0000 & val) << 36) >> 52);
sout[4] = (float)((int64_t)(( 0x000000000000FFF0 & val) << 48) >> 52);
rest = ( 0x000000000000000F & val) << 60;
val = swap64(in[read_idx+1]);
sout[5] = (float)((int64_t)((( 0xFF00000000000000 & val) >> 4) | rest) >> 52);
sout[6] = (float)((int64_t)(( 0x00FFF00000000000 & val) << 8) >> 52);
sout[7] = (float)((int64_t)(( 0x00000FFF00000000 & val) << 20) >> 52);
sout[8] = (float)((int64_t)(( 0x00000000FFF00000 & val) << 32) >> 52);
sout[9] = (float)((int64_t)(( 0x00000000000FFF00 & val) << 44) >> 52);
rest = ( 0x00000000000000FF & val) << 56;
val = swap64(in[read_idx+2]);
sout[10] = (float)((int64_t)((( 0xF000000000000000 & val) >> 8) | rest) >> 52);
sout[11] = (float)((int64_t)(( 0x0FFF000000000000 & val) << 4) >> 52);
sout[12] = (float)((int64_t)(( 0x0000FFF000000000 & val) << 16) >> 52);
sout[13] = (float)((int64_t)(( 0x0000000FFF000000 & val) << 28) >> 52);
sout[14] = (float)((int64_t)(( 0x0000000000FFF000 & val) << 40) >> 52);
}
}
} //kernels
namespace test
{
void unpack_edd_12bit_to_float32_cpu(thrust::host_vector<uint64_t>const& input, thrust::host_vector<float>& output)
{
uint64_t val;
uint64_t rest;
output.reserve(16 * input.size() / 3);
for (int ii=0; ii<input.size(); ii+=3)
{
int idx = ii;
val = be64toh(input[idx]);
output.push_back( (float)((int64_t)(( 0xFFF0000000000000 & val) << 0) >> 52));
output.push_back( (float)((int64_t)(( 0x000FFF0000000000 & val) << 12) >> 52));
output.push_back( (float)((int64_t)(( 0x000000FFF0000000 & val) << 24) >> 52));
output.push_back( (float)((int64_t)(( 0x000000000FFF0000 & val) << 36) >> 52));
output.push_back( (float)((int64_t)(( 0x000000000000FFF0 & val) << 48) >> 52));
rest = ( 0x000000000000000F & val) << 60;
val = be64toh(input[++idx]);
output.push_back( (float)((int64_t)((( 0xFF00000000000000 & val) >> 4) | rest) >> 52));
output.push_back( (float)((int64_t)(( 0x00FFF00000000000 & val) << 8) >> 52));
output.push_back( (float)((int64_t)(( 0x00000FFF00000000 & val) << 20) >> 52));
output.push_back( (float)((int64_t)(( 0x00000000FFF00000 & val) << 32) >> 52));
output.push_back( (float)((int64_t)(( 0x00000000000FFF00 & val) << 44) >> 52));
rest = ( 0x00000000000000FF & val) << 56;
val = be64toh(input[++idx]);
output.push_back( (float)((int64_t)((( 0xF000000000000000 & val) >> 8) | rest) >> 52));
output.push_back( (float)((int64_t)(( 0x0FFF000000000000 & val) << 4) >> 52));
output.push_back( (float)((int64_t)(( 0x0000FFF000000000 & val) << 16) >> 52));
output.push_back( (float)((int64_t)(( 0x0000000FFF000000 & val) << 28) >> 52));
output.push_back( (float)((int64_t)(( 0x0000000000FFF000 & val) << 40) >> 52));
output.push_back( (float)((int64_t)(( 0x0000000000000FFF & val) << 52) >> 52));
}
}
/*
void unpack_edd_12bit_to_float32_test()
{
int nvalues = 1600;
int nlongs = 3 * nvalues / 16;
int nthreads = nlongs/3;
int nblocks = 1;
int smem_size = nthreads * 16 * sizeof(float);
thrust::host_vector<uint64_t> input;
for (int val=0; val<nlongs; ++val)
{
input.push_back(val);
}
// Run CUDA version
thrust::device_vector<uint64_t> input_d = input;
thrust::device_vector<float> output_d(nvalues);
uint64_t* input_ptr = thrust::raw_pointer_cast(input_d.data());
float* output_ptr = thrust::raw_pointer_cast(output_d.data())
unpack_edd_12bit_to_float32<<<nblocks, nthreads, smem_size, 0>>>(input_ptr, output_ptr, nlongs);
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
thrust::host_vector<float> output = output_d;
// Run host version
thrust::host_vector<float> test_output;
unpack_edd_12bit_to_float32_cpu(input, test_output);
for (int ii=0; ii<nvalues; ++ii)
{
if (output[ii] != test_output[ii])
{
printf("Error at index %d -> %f != %f \n", ii, output[ii], test_output[ii]);
}
}
}
*/
} //test
} //edd
} //effelsberg
} //psrdada_cpp
\ No newline at end of file
#include "psrdada_cpp/common.h"
#include "psrdada_cpp/raw_bytes.h"
#include "psrdada_cpp/effelsberg/ebb/edd_simple_fft_spectrometer.cuh"
#include "thrust/host_vector.h"
int main()
{
std::size_t size = 4096 * 12 * 4096 / 8
thrust::host_vector<char> data;
data.resize(size);
RawBytes dada_input(data.data(), data.size());
SimpleFFTSpectrometer spectrometer(8192, 1, 12, NULL);
for (int ii=0; ii<100; ++ii)
{
spectrometer(dada_input);
}
return 0;
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment