From 845dfb09c7691639f12f0927b304af472a5ed10b Mon Sep 17 00:00:00 2001
From: Ewan Barr <ewan.d.barr@googlemail.com>
Date: Tue, 24 Apr 2018 18:36:34 +0200
Subject: [PATCH] dirty commit for debugging in docker
---
psrdada_cpp/effelsberg/CMakeLists.txt | 1 +
psrdada_cpp/effelsberg/edd/CMakeLists.txt | 20 +++
.../edd/detail/edd_simple_fft_spectrometer.cu | 72 +++++++++
.../edd/edd_simple_fft_spectrometer.cuh | 104 +++++++++++++
.../edd/src/edd_simple_fft_spectrometer.cu | 147 ++++++++++++++++++
.../src/edd_simple_fft_spectrometer_test.cu | 19 +++
6 files changed, 363 insertions(+)
create mode 100644 psrdada_cpp/effelsberg/CMakeLists.txt
create mode 100644 psrdada_cpp/effelsberg/edd/CMakeLists.txt
create mode 100644 psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu
create mode 100644 psrdada_cpp/effelsberg/edd/edd_simple_fft_spectrometer.cuh
create mode 100644 psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer.cu
create mode 100644 psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer_test.cu
diff --git a/psrdada_cpp/effelsberg/CMakeLists.txt b/psrdada_cpp/effelsberg/CMakeLists.txt
new file mode 100644
index 00000000..0c34cd3c
--- /dev/null
+++ b/psrdada_cpp/effelsberg/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(edd)
\ No newline at end of file
diff --git a/psrdada_cpp/effelsberg/edd/CMakeLists.txt b/psrdada_cpp/effelsberg/edd/CMakeLists.txt
new file mode 100644
index 00000000..398e13ae
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/CMakeLists.txt
@@ -0,0 +1,20 @@
+if(ENABLE_CUDA)
+
+set(PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES
+ ${CMAKE_PROJECT_NAME}
+ ${CMAKE_PROJECT_NAME}_effelsberg_edd
+ ${DEPENDENCY_LIBRARIES})
+
+set(psrdada_cpp_effelsberg_edd_src
+ src/edd_simple_fft_spectrometer.cu
+ )
+
+cuda_add_library(${CMAKE_PROJECT_NAME}_effelsberg_edd ${psrdada_cpp_effelsberg_edd_src})
+
+#simple FFT spectrometer test
+cuda_add_executable(edd_specpol src/edd_simple_fft_spectrometer_test.cu)
+target_link_libraries(edd_specpol ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES})
+install(TARGETS edd_specpol DESTINATION bin)
+
+
+endif(ENABLE_CUDA)
diff --git a/psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu b/psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu
new file mode 100644
index 00000000..2206c1ec
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu
@@ -0,0 +1,72 @@
+#ifndef PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
+#define PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
+
+#include "psrdada_cpp/effelsberg/edd/"
+#include "psrdada_cpp/raw_bytes.hpp"
+#include "thrust/device_vector.h"
+#include "thrust/host_vector.h"
+#include "cufft.h"
+
+namespace psrdada_cpp {
+namespace effelsberg {
+namespace edd {
+namespace kernels {
+
+ __global__
+ void unpack_edd_12bit_to_float32(char* __restrict__ in, float* __restrict__ out, int n);
+
+} //kernels
+
+template <class HandlerType>
+class SimpleFFTSpectrometer
+{
+public:
+ SimpleFFTSpectrometer(
+ std::size_t fft_length,
+ std::size_t naccumulate,
+ std::size_t nbits,
+ HandlerType& handler);
+ ~SimpleFFTSpectrometer();
+
+ /**
+ * @brief A callback to be called on connection
+ * to a ring buffer.
+ *
+ * @detail The first available header block in the
+ * in the ring buffer is provided as an argument.
+ * It is here that header parameters could be read
+ * if desired.
+ *
+ * @param block A RawBytes object wrapping a DADA header buffer
+ */
+ void init(RawBytes& block);
+
+ /**
+ * @brief A callback to be called on acqusition of a new
+ * data block.
+ *
+ * @param block A RawBytes object wrapping a DADA data buffer
+ */
+ bool operator()(RawBytes& block);
+
+private:
+ std::size_t _fft_length;
+ std::size_t _naccumulate;
+ std::size_t _nbits;
+ HandlerType& _handler;
+ bool _first_block;
+ std::size_t _nsamps;
+ cufftHandle _fft_plan;
+ thrust::device_vector<uint64_t> _edd_raw;
+ thrust::device_vector<float> _edd_unpacked;
+ thrust::device_vector<cufftComplex> _channelised;
+ thrust::device_vector<float> _detected;
+};
+
+
+} //edd
+} //effelsberg
+} //psrdada_cpp
+
+#include "psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu"
+#endif //PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
\ No newline at end of file
diff --git a/psrdada_cpp/effelsberg/edd/edd_simple_fft_spectrometer.cuh b/psrdada_cpp/effelsberg/edd/edd_simple_fft_spectrometer.cuh
new file mode 100644
index 00000000..37e163c2
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/edd_simple_fft_spectrometer.cuh
@@ -0,0 +1,104 @@
+#ifndef PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
+#define PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
+
+#include "psrdada_cpp/effelsberg/edd/edd_simple_fft_spectrometer.cuh"
+
+namespace psrdada_cpp {
+namespace effelsberg {
+namespace edd {
+
+template <class HandlerType>
+SimpleFFTSpectrometer<HandlerType>::SimpleFFTSpectrometer(
+ std::size_t fft_length,
+ std::size_t naccumulate,
+ std::size_t nbits,
+ HandlerType& handler)
+ : _fft_length(fft_length)
+ , _naccumulate(naccumulate)
+ , _nbits(nbits)
+ , _handler(handler)
+ , _first_block(true)
+ , _nsamps(0)
+ , _fft_plan(NULL)
+{
+
+}
+
+template <class HandlerType>
+SimpleFFTSpectrometer<HandlerType>::~SimpleFFTSpectrometer()
+{
+ if (!_first_block)
+ cufftDestroy(_fft_plan);
+}
+
+template <class HandlerType>
+void SimpleFFTSpectrometer<HandlerType>::init(RawBytes& block)
+{
+}
+
+template <class HandlerType>
+bool SimpleFFTSpectrometer<HandlerType>::operator()(RawBytes& block)
+{
+
+ std::size_t nsamps_in_block = 8 * block.used_bytes() / _nbits;
+ std::size_t nchans = _fft_length / 2 + 1;
+
+ if (_first_block)
+ {
+ _nsamps = nsamps_in_block;
+ std::size_t n64bit_words = 3 * _nsamps / 16;
+ if (_nsamps % _fft_length != 0)
+ {
+ throw std::runtime_error("Number of samples is not multiple of FFT size");
+ }
+ std::size_t batch = _nsamps/_fft_length;
+
+ // Only do these things once
+ CUFFT_ERROR_CHECK(cufftPlanMany(&_fft_plan, 1, _fft_length, 0, 1,
+ _fft_length, 1, 1, _fft_length, CUFFT_R2C, batch));
+
+ _edd_raw.resize(n64bit_words);
+ _edd_unpacked.resize(_nsamps);
+ _channelised.resize(nchans * batch);
+ _detected.resize(nchans * batch / _naccumulate);
+ _first_block = false;
+ }
+
+ if (_nsamps != nsamps_in_block)
+ {
+ throw std::runtime_error("Received incomplete block");
+ }
+
+ uint64_t* _edd_raw_ptr = thrust::raw_pointer_cast(_edd_raw.data());
+ float* _edd_unpacked_ptr = thrust::raw_pointer_cast(_edd_unpacked.data());
+
+ //Copy DADA buffer to GPU
+ thrust::copy(block.ptr(), block.ptr()+block.used_bytes(), (char*) _edd_raw_ptr);
+
+ if (_nbits == 12)
+ {
+ unpack_edd_12bit_to_float32<<< 64, 1024>>>(_edd_raw_ptr, _edd_unpacked_ptr, _edd_raw.size());
+ CUDA_ERROR_CHECK(cudaDeviceSynchronize());
+ }
+ else if (_nbits == 8)
+ {
+ throw std::runtime_error("Only 12-bit mode supported");
+ }
+ else
+ {
+ throw std::runtime_error("Only 12-bit mode supported");
+ }
+
+ cufftComplex* _channelised_ptr = thrust::raw_pointer_cast(_channelised.data());
+ CUFFT_ERROR_CHECK(cufftExecuteR2C(_fft_plan, (cufftReal*)_edd_unpacked_ptr, _channelised_ptr, CUFFT_FORWARD));
+
+ //thrust::copy(_edd_unpacked.begin(), _edd_unpacked.end(), block.ptr());
+ //_handler(block);
+}
+
+} //edd
+} //effelsberg
+} //psrdada_cpp
+
+#include "psrdada_cpp/effelsberg/edd/detail/edd_simple_fft_spectrometer.cu"
+#endif //PSRDADA_CPP_EFFELSBERG_EDD_SIMPLE_FFT_SPECTROMETER_HPP
\ No newline at end of file
diff --git a/psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer.cu b/psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer.cu
new file mode 100644
index 00000000..eca878fc
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer.cu
@@ -0,0 +1,147 @@
+#include "psrdada_cpp/common.hpp"
+#include "psrdada_cpp/cuda_utils.hpp"
+
+#define BSWAP64(x) ((0xFF00000000000000 & x) >> 56) | \
+ ((0x00FF000000000000 & x) >> 40) | \
+ ((0x0000FF0000000000 & x) >> 24) | \
+ ((0x000000FF00000000 & x) >> 8) | \
+ ((0x00000000FF000000 & x) << 8) | \
+ ((0x0000000000FF0000 & x) << 24) | \
+ ((0x000000000000FF00 & x) << 40) | \
+ ((0x00000000000000FF & x) << 56)
+
+
+namespace psrdada_cpp {
+namespace effelsberg {
+namespace edd {
+namespace kernels {
+
+ __device__ __forceinline__ uint64_t swap64(uint64_t x)
+ {
+ uint64_t result;
+ uint2 t;
+ asm("mov.b64 {%0,%1},%2; \n\t"
+ : "=r"(t.x), "=r"(t.y) : "l"(x));
+ t.x = __byte_perm(t.x, 0, 0x0123);
+ t.y = __byte_perm(t.y, 0, 0x0123);
+ asm("mov.b64 %0,{%1,%2}; \n\t"
+ : "=l"(result) : "r"(t.y), "r"(t.x));
+ return result;
+ }
+
+
+ __global__
+ void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n)
+ {
+ for (int idx = blockIdx.x * blockDim.x + threadIdx.x ; (3*idx+2) < n ; idx+=gridDim.x*blockDim.x)
+ {
+ uint64_t val;
+ uint64_t rest;
+ int read_idx = 3*idx;
+ int write_idx = 16*idx;
+ float* sout = out+write_idx;
+ val = swap64(in[read_idx]);
+ sout[0] = (float)((int64_t)(( 0xFFF0000000000000 & val) << 0) >> 52);
+ sout[1] = (float)((int64_t)(( 0x000FFF0000000000 & val) << 12) >> 52);
+ sout[2] = (float)((int64_t)(( 0x000000FFF0000000 & val) << 24) >> 52);
+ sout[3] = (float)((int64_t)(( 0x000000000FFF0000 & val) << 36) >> 52);
+ sout[4] = (float)((int64_t)(( 0x000000000000FFF0 & val) << 48) >> 52);
+ rest = ( 0x000000000000000F & val) << 60;
+ val = swap64(in[read_idx+1]);
+ sout[5] = (float)((int64_t)((( 0xFF00000000000000 & val) >> 4) | rest) >> 52);
+ sout[6] = (float)((int64_t)(( 0x00FFF00000000000 & val) << 8) >> 52);
+ sout[7] = (float)((int64_t)(( 0x00000FFF00000000 & val) << 20) >> 52);
+ sout[8] = (float)((int64_t)(( 0x00000000FFF00000 & val) << 32) >> 52);
+ sout[9] = (float)((int64_t)(( 0x00000000000FFF00 & val) << 44) >> 52);
+ rest = ( 0x00000000000000FF & val) << 56;
+ val = swap64(in[read_idx+2]);
+ sout[10] = (float)((int64_t)((( 0xF000000000000000 & val) >> 8) | rest) >> 52);
+ sout[11] = (float)((int64_t)(( 0x0FFF000000000000 & val) << 4) >> 52);
+ sout[12] = (float)((int64_t)(( 0x0000FFF000000000 & val) << 16) >> 52);
+ sout[13] = (float)((int64_t)(( 0x0000000FFF000000 & val) << 28) >> 52);
+ sout[14] = (float)((int64_t)(( 0x0000000000FFF000 & val) << 40) >> 52);
+ }
+ }
+
+} //kernels
+
+namespace test
+{
+ void unpack_edd_12bit_to_float32_cpu(thrust::host_vector<uint64_t>const& input, thrust::host_vector<float>& output)
+ {
+ uint64_t val;
+ uint64_t rest;
+ output.reserve(16 * input.size() / 3);
+ for (int ii=0; ii<input.size(); ii+=3)
+ {
+ int idx = ii;
+ val = be64toh(input[idx]);
+ output.push_back( (float)((int64_t)(( 0xFFF0000000000000 & val) << 0) >> 52));
+ output.push_back( (float)((int64_t)(( 0x000FFF0000000000 & val) << 12) >> 52));
+ output.push_back( (float)((int64_t)(( 0x000000FFF0000000 & val) << 24) >> 52));
+ output.push_back( (float)((int64_t)(( 0x000000000FFF0000 & val) << 36) >> 52));
+ output.push_back( (float)((int64_t)(( 0x000000000000FFF0 & val) << 48) >> 52));
+ rest = ( 0x000000000000000F & val) << 60;
+
+ val = be64toh(input[++idx]);
+ output.push_back( (float)((int64_t)((( 0xFF00000000000000 & val) >> 4) | rest) >> 52));
+ output.push_back( (float)((int64_t)(( 0x00FFF00000000000 & val) << 8) >> 52));
+ output.push_back( (float)((int64_t)(( 0x00000FFF00000000 & val) << 20) >> 52));
+ output.push_back( (float)((int64_t)(( 0x00000000FFF00000 & val) << 32) >> 52));
+ output.push_back( (float)((int64_t)(( 0x00000000000FFF00 & val) << 44) >> 52));
+ rest = ( 0x00000000000000FF & val) << 56;
+
+ val = be64toh(input[++idx]);
+ output.push_back( (float)((int64_t)((( 0xF000000000000000 & val) >> 8) | rest) >> 52));
+ output.push_back( (float)((int64_t)(( 0x0FFF000000000000 & val) << 4) >> 52));
+ output.push_back( (float)((int64_t)(( 0x0000FFF000000000 & val) << 16) >> 52));
+ output.push_back( (float)((int64_t)(( 0x0000000FFF000000 & val) << 28) >> 52));
+ output.push_back( (float)((int64_t)(( 0x0000000000FFF000 & val) << 40) >> 52));
+ output.push_back( (float)((int64_t)(( 0x0000000000000FFF & val) << 52) >> 52));
+ }
+ }
+
+ /*
+ void unpack_edd_12bit_to_float32_test()
+ {
+
+ int nvalues = 1600;
+ int nlongs = 3 * nvalues / 16;
+ int nthreads = nlongs/3;
+ int nblocks = 1;
+ int smem_size = nthreads * 16 * sizeof(float);
+
+ thrust::host_vector<uint64_t> input;
+
+ for (int val=0; val<nlongs; ++val)
+ {
+ input.push_back(val);
+ }
+
+ // Run CUDA version
+ thrust::device_vector<uint64_t> input_d = input;
+ thrust::device_vector<float> output_d(nvalues);
+ uint64_t* input_ptr = thrust::raw_pointer_cast(input_d.data());
+ float* output_ptr = thrust::raw_pointer_cast(output_d.data())
+ unpack_edd_12bit_to_float32<<<nblocks, nthreads, smem_size, 0>>>(input_ptr, output_ptr, nlongs);
+ CUDA_ERROR_CHECK(cudaDeviceSynchronize());
+ thrust::host_vector<float> output = output_d;
+
+ // Run host version
+ thrust::host_vector<float> test_output;
+ unpack_edd_12bit_to_float32_cpu(input, test_output);
+
+ for (int ii=0; ii<nvalues; ++ii)
+ {
+ if (output[ii] != test_output[ii])
+ {
+ printf("Error at index %d -> %f != %f \n", ii, output[ii], test_output[ii]);
+ }
+ }
+ }
+ */
+} //test
+
+} //edd
+} //effelsberg
+} //psrdada_cpp
\ No newline at end of file
diff --git a/psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer_test.cu b/psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer_test.cu
new file mode 100644
index 00000000..5382faeb
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/src/edd_simple_fft_spectrometer_test.cu
@@ -0,0 +1,19 @@
+#include "psrdada_cpp/common.h"
+#include "psrdada_cpp/raw_bytes.h"
+#include "psrdada_cpp/effelsberg/ebb/edd_simple_fft_spectrometer.cuh"
+
+#include "thrust/host_vector.h"
+
+int main()
+{
+ std::size_t size = 4096 * 12 * 4096 / 8
+ thrust::host_vector<char> data;
+ data.resize(size);
+ RawBytes dada_input(data.data(), data.size());
+ SimpleFFTSpectrometer spectrometer(8192, 1, 12, NULL);
+ for (int ii=0; ii<100; ++ii)
+ {
+ spectrometer(dada_input);
+ }
+ return 0;
+}
\ No newline at end of file
--
GitLab