diff --git a/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu b/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu
index effbcc2b28b9d60f01b06047ba1b0e29054757d3..6242c8237e8cf1a9a5c7b05cbbb0b14bab55fe0e 100644
--- a/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu
+++ b/psrdada_cpp/effelsberg/edd/src/ScaledTransposeTFtoTFT.cu
@@ -79,7 +79,7 @@ ScaledTransposeTFtoTFT::~ScaledTransposeTFtoTFT()
 
 void ScaledTransposeTFtoTFT::transpose(InputType const& input, OutputType& output)
 {
-
+    BOOST_LOG_TRIVIAL(debug) << "Preparing scaled transpose";
     const int max_threads = 1024;
     const int dim_x = std::min(_nchans, max_threads);
     const int dim_y = max_threads/dim_x;
@@ -88,16 +88,19 @@ void ScaledTransposeTFtoTFT::transpose(InputType const& input, OutputType& outpu
     output.resize(input.size());
     const int nsamps_per_load = 16;
     assert((_nsamps_per_packet % nsamps_per_load) == 0);
-    const int nsamps = input.size() / _nchans;
+    const int nsamps = input.size() / _nchans;   
     int shared_mem_bytes = sizeof(OutputType::value_type) * _nchans * nsamps_per_load;
     int nblocks = nsamps / _nsamps_per_packet;
+    BOOST_LOG_TRIVIAL(debug) << "Scaled transpose will use " << shared_mem_bytes << " bytes of shared memory.";
     dim3 grid(nblocks);
     dim3 block(dim_x, dim_y);
     InputType::value_type const* input_ptr = thrust::raw_pointer_cast(input.data());
     OutputType::value_type* output_ptr = thrust::raw_pointer_cast(output.data());
+    BOOST_LOG_TRIVIAL(debug) << "Executing scaled transpose";
     kernels::tf_to_tft_transpose<<<grid, block, shared_mem_bytes, _stream>>>(
         input_ptr, output_ptr, _nchans, nsamps, _nsamps_per_packet, nsamps_per_load, _scale, _offset);
     CUDA_ERROR_CHECK(cudaStreamSynchronize(_stream));
+    BOOST_LOG_TRIVIAL(debug) << "Scaled transpose complete";
 }
 
 } //namespace edd