diff --git a/psrdada_cpp/effelsberg/edd/CMakeLists.txt b/psrdada_cpp/effelsberg/edd/CMakeLists.txt
index bd1b566e70cbf52306ad9a88ffd8327b57ba5986..4f144b6d6313c1321d5bcfc237aee1ec89f040c4 100644
--- a/psrdada_cpp/effelsberg/edd/CMakeLists.txt
+++ b/psrdada_cpp/effelsberg/edd/CMakeLists.txt
@@ -6,19 +6,21 @@ set(PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES
     ${DEPENDENCY_LIBRARIES})
 
 set(psrdada_cpp_effelsberg_edd_src
-    src/eddfft.cu
+    #src/eddfft.cu
+    src/Unpacker.cu
     )
 
 cuda_add_library(${CMAKE_PROJECT_NAME}_effelsberg_edd ${psrdada_cpp_effelsberg_edd_src})
 
 #simple FFT spectrometer test
-cuda_add_executable(eddfft_test src/eddfft_test.cu)
-target_link_libraries(eddfft_test ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} cufft)
-install(TARGETS eddfft_test DESTINATION bin)
+#cuda_add_executable(eddfft_test src/eddfft_test.cu)
+#target_link_libraries(eddfft_test ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} cufft)
+#install(TARGETS eddfft_test DESTINATION bin)
 
 #simple FFT spectrometer interface
-cuda_add_executable(eddfft src/eddfft_cli.cu)
-target_link_libraries(eddfft ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} cufft)
-install(TARGETS eddfft DESTINATION bin)
+#cuda_add_executable(eddfft src/eddfft_cli.cu)
+#target_link_libraries(eddfft ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} cufft)
+#install(TARGETS eddfft DESTINATION bin)
 
+add_subdirectory(test)
 endif(ENABLE_CUDA)
diff --git a/psrdada_cpp/effelsberg/edd/Unpacker.cuh b/psrdada_cpp/effelsberg/edd/Unpacker.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..198e8be4a31e064e6a8ce3e19dcb0c01a35e422d
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/Unpacker.cuh
@@ -0,0 +1,46 @@
+#ifndef PSRDADA_CPP_EFFELSBERG_EDD_UNPACKER_CUH
+#define PSRDADA_CPP_EFFELSBERG_EDD_UNPACKER_CUH
+
+#include "psrdada_cpp/common.hpp"
+#include <thrust/device_vector.h>
+
+namespace psrdada_cpp {
+namespace effelsberg {
+namespace edd {
+namespace kernels {
+
+__global__
+void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n);
+
+__global__
+void unpack_edd_8bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n);
+
+}
+
+class Unpacker
+{
+public:
+    typedef thrust::device_vector<uint64_t> InputType;
+    typedef thrust::device_vector<float> OutputType;
+
+public:
+
+    Unpacker(cudaStream_t stream);
+    ~Unpacker();
+    Unpacker(Unpacker const&) = delete;
+
+    template <int Nbits>
+    void unpack(InputType const& input, OutputType& output);
+
+private:
+    cudaStream_t _copy_stream;
+};
+
+} //namespace edd
+} //namespace effelsberg
+} //namespace psrdada_cpp
+
+#endif // PSRDADA_CPP_EFFELSBERG_EDD_UNPACKER_CUH
+
+
+
diff --git a/psrdada_cpp/effelsberg/edd/src/Unpacker.cu b/psrdada_cpp/effelsberg/edd/src/Unpacker.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c309aafc7e00a94a0a88969d8f8395af4b3c4e5f
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/src/Unpacker.cu
@@ -0,0 +1,155 @@
+#include "psrdada_cpp/effelsberg/edd/Unpacker.cuh"
+#include "psrdada_cpp/cuda_utils.hpp"
+
+#define EDD_NTHREADS_UNPACK 512
+
+namespace psrdada_cpp {
+namespace effelsberg {
+namespace edd {
+namespace kernels {
+
+__device__ __forceinline__ uint64_t swap64(uint64_t x)
+{
+    uint64_t result;
+    uint2 t;
+    asm("mov.b64 {%0,%1},%2; \n\t"
+        : "=r"(t.x), "=r"(t.y) : "l"(x));
+    t.x = __byte_perm(t.x, 0, 0x0123);
+    t.y = __byte_perm(t.y, 0, 0x0123);
+    asm("mov.b64 %0,{%1,%2}; \n\t"
+        : "=l"(result) : "r"(t.y), "r"(t.x));
+    return result;
+}
+
+__global__
+void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n)
+{
+    /**
+     * Note: This kernels will not work with more than 512 threads.
+     */
+    __shared__ volatile float tmp_out[NTHREADS_UNPACK * 16];
+    __shared__ volatile uint64_t tmp_in[NTHREADS_UNPACK * 3];
+    int block_idx = blockIdx.x;
+    uint64_t val;
+    uint64_t rest;
+    volatile float* sout = tmp_out + (16 * threadIdx.x);
+    for (int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        (3 * idx + 2) < n;
+        idx+=gridDim.x*blockDim.x)
+    {
+        //Read to shared memeory
+        int block_read_start = block_idx * NTHREADS_UNPACK * 3;
+        tmp_in[threadIdx.x]                = in[block_read_start + threadIdx.x];
+        tmp_in[NTHREADS_UNPACK + threadIdx.x]     = in[block_read_start + NTHREADS_UNPACK + threadIdx.x];
+        tmp_in[NTHREADS_UNPACK * 2 + threadIdx.x] = in[block_read_start + NTHREADS_UNPACK * 2 + threadIdx.x];
+        __syncthreads();
+        val  = swap64(tmp_in[3*threadIdx.x]);
+        sout[0] = (float)((int64_t)(( 0xFFF0000000000000 & val) <<  0) >> 52);
+        sout[1] = (float)((int64_t)(( 0x000FFF0000000000 & val) << 12) >> 52);
+        sout[2] = (float)((int64_t)(( 0x000000FFF0000000 & val) << 24) >> 52);
+        sout[3] = (float)((int64_t)(( 0x000000000FFF0000 & val) << 36) >> 52);
+        sout[4] = (float)((int64_t)(( 0x000000000000FFF0 & val) << 48) >> 52);
+        rest    =                   ( 0x000000000000000F & val) << 60;
+        val  = swap64(tmp_in[3*threadIdx.x + 1]);
+        sout[5] = (float)((int64_t)((( 0xFF00000000000000 & val) >> 4) | rest) >> 52);
+        sout[6] = (float)((int64_t)((  0x00FFF00000000000 & val) << 8)  >> 52);
+        sout[7] = (float)((int64_t)((  0x00000FFF00000000 & val) << 20) >> 52);
+        sout[8] = (float)((int64_t)((  0x00000000FFF00000 & val) << 32) >> 52);
+        sout[9] = (float)((int64_t)((  0x00000000000FFF00 & val) << 44) >> 52);
+        rest    =                   (  0x00000000000000FF & val) << 56;
+        val  = swap64(tmp_in[3*threadIdx.x + 2]);
+        sout[10] = (float)((int64_t)((( 0xF000000000000000 & val) >>  8) | rest) >> 52);
+        sout[11] = (float)((int64_t)((  0x0FFF000000000000 & val) <<  4) >> 52);
+        sout[12] = (float)((int64_t)((  0x0000FFF000000000 & val) << 16) >> 52);
+        sout[13] = (float)((int64_t)((  0x0000000FFF000000 & val) << 28) >> 52);
+        sout[14] = (float)((int64_t)((  0x0000000000FFF000 & val) << 40) >> 52);
+        __syncthreads();
+        int block_write_start = block_idx * NTHREADS_UNPACK * 16;
+        for (int ii = threadIdx.x; ii < 16 * NTHREADS_UNPACK; ii += blockDim.x)
+        {
+            out[block_write_start + ii] = tmp_out[ii];
+        }
+        block_idx += gridDim.x;
+    }
+}
+
+__global__
+void unpack_edd_8bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n);
+{
+    /**
+     * Note: This kernels will not work with more than 512 threads.
+     */
+    __shared__ volatile float tmp_out[NTHREADS_UNPACK * 8];
+    int block_idx = blockIdx.x;
+    uint64_t val;
+    volatile float* sout = tmp_out + (8 * threadIdx.x);
+
+    for (int idx = blockIdx.x * blockDim.x + threadIdx.x ; idx < n ; idx+=gridDim.x*blockDim.x)
+    {
+        int block_read_start = block_idx * NTHREADS_UNPACK;
+        val = swap64(in[block_read_start + threadIdx.x]);
+        sout[0] = (float)((int64_t)(( 0xFF00000000000000 & val) <<  0) >> 56);
+        sout[1] = (float)((int64_t)(( 0x00FF000000000000 & val) <<  8) >> 56);
+        sout[2] = (float)((int64_t)(( 0x0000FF0000000000 & val) << 16) >> 56);
+        sout[3] = (float)((int64_t)(( 0x000000FF00000000 & val) << 24) >> 56);
+        sout[4] = (float)((int64_t)(( 0x00000000FF000000 & val) << 32) >> 56);
+        sout[5] = (float)((int64_t)(( 0x0000000000FF0000 & val) << 40) >> 56);
+        sout[6] = (float)((int64_t)(( 0x000000000000FF00 & val) << 48) >> 56);
+        sout[7] = (float)((int64_t)(( 0x00000000000000FF & val) << 56) >> 56);
+        __syncthreads();
+        int block_write_start = block_idx * NTHREADS_UNPACK * 8;
+        for (int ii = threadIdx.x; ii < 8 * NTHREADS_UNPACK; ii+=blockDim.x)
+        {
+            out[block_write_start+ii] = tmp_out[ii];
+        }
+        block_idx += gridDim.x;
+    }
+}
+
+} //namespace kernels
+
+
+Unpacker::Unpacker(cudaStream_t stream)
+    : _stream(stream)
+{
+
+}
+
+Unpacker::~Unpacker()
+{
+
+}
+
+template <>
+void Unpacker::unpack<12>(InputType const& input, OutputType& output)
+{
+    BOOST_LOG_TRIVIAL(debug) << "Unpacking 12-bit data";
+    std::size_t output_size = input.size() * 16 / 3;
+    BOOST_LOG_TRIVIAL(debug) << "Resizing output buffer to " << output_size << " elements";
+    output.resize(output_size);
+    int nblocks = input.size() / EDD_NTHREADS_UNPACK;
+    InputType::value_type const* input_ptr = thrust::raw_pointer_cast(input.data())
+    OutputType::value_type* output_ptr = thrust::raw_pointer_cast(output.data())
+    kernels::unpack_edd_12bit_to_float32<<< nblocks, EDD_NTHREADS_UNPACK, 0, _stream>>>(
+            input_ptr, output_ptr, input.size());
+    CUDA_ERROR_CHECK(cudaStreamSynchronize(_stream));
+}
+
+template <>
+void Unpacker::unpack<8>(InputType const& input, OutputType& output)
+{
+    BOOST_LOG_TRIVIAL(debug) << "Unpacking 12-bit data";
+    std::size_t output_size = input.size() * 8;
+    BOOST_LOG_TRIVIAL(debug) << "Resizing output buffer to " << output_size << " elements";
+    output.resize(output_size);
+    int nblocks = input.size() / EDD_NTHREADS_UNPACK;
+    InputType::value_type const* input_ptr = thrust::raw_pointer_cast(input.data())
+    OutputType::value_type* output_ptr = thrust::raw_pointer_cast(output.data())
+    kernels::unpack_edd_8bit_to_float32<<< nblocks, EDD_NTHREADS_UNPACK, 0, _stream>>>(
+            input_ptr, output_ptr, input.size());
+    CUDA_ERROR_CHECK(cudaStreamSynchronize(_stream));
+}
+
+} //namespace edd
+} //namespace effelsberg
+} //namespace psrdada_cpp
\ No newline at end of file
diff --git a/psrdada_cpp/effelsberg/edd/test/CMakeLists.txt b/psrdada_cpp/effelsberg/edd/test/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69f911afd6bf24a3e5ac79929f82664a36cdb53e
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/test/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(${GTEST_INCLUDE_DIR})
+
+link_directories(${GTEST_LIBRARY_DIR})
+
+set(
+    gtest_edd_src
+    src/UnpackerTester.cu
+
+)
+cuda_add_executable(gtest_edd ${gtest_edd_src} )
+target_link_libraries(gtest_edd ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES})
+add_test(gtest_edd gtest_edd --test_data "${CMAKE_CURRENT_LIST_DIR}/data")
diff --git a/psrdada_cpp/effelsberg/edd/test/UnpackerTester.cuh b/psrdada_cpp/effelsberg/edd/test/UnpackerTester.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..986efa9c0c1620a85277736de2c53ed6acf5cd50
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/test/UnpackerTester.cuh
@@ -0,0 +1,45 @@
+#ifndef PSRDADA_CPP_EFFELSBERG_EDD_UNPACKERTESTER_CUH
+#define PSRDADA_CPP_EFFELSBERG_EDD_UNPACKERTESTER_CUH
+
+namespace psrdada_cpp {
+namespace effelsberg {
+namespace edd {
+namespace test {
+
+class UnpackerTester: public ::testing::Test
+{
+public:
+    typedef std::vector<uint64_t> InputType;
+    typedef std::vector<float> OutputType;
+
+protected:
+    void SetUp() override;
+    void TearDown() override;
+
+public:
+    UnpackerTester();
+    ~UnpackerTester();
+
+protected:
+    void unpacker_12_to_32_c_reference(
+        InputType const& input,
+        OutputType& output);
+
+    void unpacker_8_to_32_c_reference(
+        InputType const& input,
+        OutputType& output);
+
+    void compare_against_host(
+        Unpacker::OutputType const& gpu_output,
+        OutputType const& host_output);
+
+protected:
+    cudaStream_t _stream;
+};
+
+} //namespace test
+} //namespace edd
+} //namespace meerkat
+} //namespace psrdada_cpp
+
+#endif //PSRDADA_CPP_EFFELSBERG_EDD_UNPACKERTESTER_CUH
diff --git a/psrdada_cpp/effelsberg/edd/test/src/UnpackerTester.cu b/psrdada_cpp/effelsberg/edd/test/src/UnpackerTester.cu
new file mode 100644
index 0000000000000000000000000000000000000000..437ce0d830a8f19f4fa0e90ab7caf5abf81a7e4e
--- /dev/null
+++ b/psrdada_cpp/effelsberg/edd/test/src/UnpackerTester.cu
@@ -0,0 +1,74 @@
+#include "psrdada_cpp/effelsberg/edd/test/UnpackerTester.cuh"
+
+#define BSWAP64(x) ((0xFF00000000000000 & x) >> 56) | \
+                   ((0x00FF000000000000 & x) >> 40) | \
+                   ((0x0000FF0000000000 & x) >> 24) | \
+                   ((0x000000FF00000000 & x) >>  8) | \
+                   ((0x00000000FF000000 & x) <<  8) | \
+                   ((0x0000000000FF0000 & x) << 24) | \
+                   ((0x000000000000FF00 & x) << 40) | \
+                   ((0x00000000000000FF & x) << 56)
+
+namespace psrdada_cpp {
+namespace effelsberg {
+namespace edd {
+namespace test {
+
+UnpackerTester::UnpackerTester()
+    : ::testing::Test()
+    , _stream(0)
+{
+
+}
+
+UnpackerTester::~UnpackerTester()
+{
+
+}
+
+void UnpackerTester::SetUp()
+{
+    CUDA_ERROR_CHECK(cudaStreamCreate(&_stream));
+}
+
+void UnpackerTester::TearDown()
+{
+    CUDA_ERROR_CHECK(cudaStreamDestroy(_stream));
+}
+
+void UnpackerTester::unpacker_12_to_32_c_reference(
+    InputType const& input,
+    OutputType& output)
+{
+
+}
+
+void UnpackerTester::unpacker_8_to_32_c_reference(
+    InputType const& input,
+    OutputType& output)
+{
+
+}
+
+void UnpackerTester::compare_against_host(
+    Unpacker::OutputType const& gpu_output,
+    OutputType const& host_output)
+{
+
+}
+
+TEST_F(UnpackerTester, 12_bit_unpack_test)
+{
+
+}
+
+TEST_F(UnpackerTester, 8_bit_unpack_test)
+{
+
+}
+
+
+} //namespace test
+} //namespace edd
+} //namespace meerkat
+} //namespace psrdada_cpp
\ No newline at end of file