diff --git a/psrdada_cpp/effelsberg/edd/CMakeLists.txt b/psrdada_cpp/effelsberg/edd/CMakeLists.txt
index 681dde81d220c288b0dfa8a89742f6d4ee633004..210d1d673d002b84763c56649124ee59bfe23343 100644
--- a/psrdada_cpp/effelsberg/edd/CMakeLists.txt
+++ b/psrdada_cpp/effelsberg/edd/CMakeLists.txt
@@ -8,6 +8,7 @@ set(PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES
 set(psrdada_cpp_effelsberg_edd_src
     src/Unpacker.cu
     src/DetectorAccumulator.cu
+    src/ScaledTransposeTFtoTFT.cu
     )
 
 cuda_add_library(${CMAKE_PROJECT_NAME}_effelsberg_edd ${psrdada_cpp_effelsberg_edd_src})
diff --git a/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu b/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu
index f2193df4194c14821794ba7fc923beb526db10e0..effbcc2b28b9d60f01b06047ba1b0e29054757d3 100644
--- a/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu
+++ b/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu
@@ -17,7 +17,7 @@ void tf_to_tft_transpose(
     const float scale,
     const float offset)
 {
-    extern __shared__ char2* temp; //nbytes = sizeof(char2) * nsamps_per_load * nchans;
+    extern __shared__ char2 temp[]; //nbytes = sizeof(char2) * nsamps_per_load * nchans;
     const int load_offset = nsamps_per_packet * blockIdx.x * nchans;
     for (int sub_samp_load_idx = 0;
         sub_samp_load_idx < nsamps_per_packet/nsamps_per_load;
@@ -41,17 +41,16 @@ void tf_to_tft_transpose(
             }
         }
         __syncthreads();
-        int store_offset = load_offset + nsamps_per_load * sub_samp_load_idx;
         for (int chan_store_idx = threadIdx.y;
             chan_store_idx < nchans;
             chan_store_idx += blockDim.y)
         {
             for (int samp_store_idx = threadIdx.x;
                 samp_store_idx < nsamps_per_load;
-                samp_store_idx += blockDix.x)
+                samp_store_idx += blockIdx.x)
             {
                 int store_idx = (load_offset + chan_store_idx * nsamps_per_packet 
-                    + samps_per_load * sub_samp_load_idx + samp_store_idx);
+                    + nsamps_per_load * sub_samp_load_idx + samp_store_idx);
                 output[store_idx] = temp[samp_store_idx * nsamps_per_load + chan_store_idx];
             }
         }
@@ -88,7 +87,7 @@ void ScaledTransposeTFtoTFT::transpose(InputType const& input, OutputType& outpu
     assert(input.size() % (_nchans * _nsamps_per_packet) == 0 /* Input is not a multiple of _nchans * _nsamps_per_packet*/);
     output.resize(input.size());
     const int nsamps_per_load = 16;
-    assert(_nsamps_per_packet % nsamps_per_load) == 0;
+    assert((_nsamps_per_packet % nsamps_per_load) == 0);
     const int nsamps = input.size() / _nchans;
     int shared_mem_bytes = sizeof(OutputType::value_type) * _nchans * nsamps_per_load;
     int nblocks = nsamps / _nsamps_per_packet;
diff --git a/psrdada_cpp/effelsberg/edd/test/CMakeLists.txt b/psrdada_cpp/effelsberg/edd/test/CMakeLists.txt
index e16d105afc30b3957d92def77de33d8b86b75d91..541c4c6e6efd7836008c4a48c55c003162719add 100644
--- a/psrdada_cpp/effelsberg/edd/test/CMakeLists.txt
+++ b/psrdada_cpp/effelsberg/edd/test/CMakeLists.txt
@@ -7,7 +7,7 @@ set(
     src/DetectorAccumulatorTester.cu
     src/FftSpectrometerTester.cu
     src/UnpackerTester.cu
-    src/ScaledTransposeTFtoTFT.cu
+    src/ScaledTransposeTFtoTFTTester.cu
 )
 cuda_add_executable(gtest_edd ${gtest_edd_src} )
 target_link_libraries(gtest_edd ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} ${CUDA_CUFFT_LIBRARIES})
diff --git a/psrdada_cpp/effelsberg/edd/test/src/ScaledTransposeTFtoTFTTester.cu b/psrdada_cpp/effelsberg/edd/test/src/ScaledTransposeTFtoTFTTester.cu
index e75b229148be739cd1c0954cf4143013a616319c..ceb46ed46b5d432447af1e6308e71ab471e69eb2 100644
--- a/psrdada_cpp/effelsberg/edd/test/src/ScaledTransposeTFtoTFTTester.cu
+++ b/psrdada_cpp/effelsberg/edd/test/src/ScaledTransposeTFtoTFTTester.cu
@@ -37,11 +37,11 @@ void ScaledTransposeTFtoTFTTester::transpose_c_reference(
         const int nsamps, 
         const int nsamps_per_packet,
         const float scale,
-        const float offset);
+        const float offset)
 {
     int nsamples = input.size() / nchans;
     int outer_t_dim = nsamps / nsamps_per_packet;
-    output.size(input.size());
+    output.resize(input.size());
     for (int outer_t_idx = 0; outer_t_idx < outer_t_dim; ++outer_t_idx)
     {
         for (int chan_idx = 0; chan_idx < nchans; ++chan_idx)
@@ -68,7 +68,8 @@ void ScaledTransposeTFtoTFTTester::compare_against_host(
     ASSERT_EQ(host_output.size(), copy_from_gpu.size());
     for (std::size_t ii = 0; ii < host_output.size(); ++ii)
     {
-	ASSERT_EQ(host_output[ii], copy_from_gpu[ii]);
+	ASSERT_EQ(host_output[ii].x, copy_from_gpu[ii].x);
+	ASSERT_EQ(host_output[ii].y, copy_from_gpu[ii].y);
     }
 }
 
@@ -78,7 +79,7 @@ TEST_F(ScaledTransposeTFtoTFTTester, counter_test)
     int nsamps_per_packet = 8192/nchans;
     float stdev = 64.0f;
     float scale = 4.0f;
-    int nsamps = nsamps_per_packet * 1024
+    int nsamps = nsamps_per_packet * 1024;
     int n = nchans * nsamps;
     std::default_random_engine generator;
     std::normal_distribution<float> distribution(0.0, stdev);
@@ -91,9 +92,9 @@ TEST_F(ScaledTransposeTFtoTFTTester, counter_test)
     ScaledTransposeTFtoTFT::InputType gpu_input = host_input;
     ScaledTransposeTFtoTFT::OutputType gpu_output;
     OutputType host_output;
-    ScaledTransposeTFtoTFT transposer(nchans, nsamps_per_packet, nscale, 0.0, _stream);
+    ScaledTransposeTFtoTFT transposer(nchans, nsamps_per_packet, scale, 0.0, _stream);
     transposer.transpose(gpu_input, gpu_output);
-    detect_c_reference(host_input, host_output, nchans, nsamps, nsamps_per_packet, scale, 0.0);
+    transpose_c_reference(host_input, host_output, nchans, nsamps, nsamps_per_packet, scale, 0.0);
     compare_against_host(gpu_output, host_output);
 }