diff --git a/psrdada_cpp/effelsberg/edd/GatedSpectrometer.cuh b/psrdada_cpp/effelsberg/edd/GatedSpectrometer.cuh
index 7d9622629091245beb8c3b71f7100e89f8bbd08e..922d21399a5405aa119a6a6c1c5f61dee87ebb62 100644
--- a/psrdada_cpp/effelsberg/edd/GatedSpectrometer.cuh
+++ b/psrdada_cpp/effelsberg/edd/GatedSpectrometer.cuh
@@ -56,6 +56,7 @@ struct PolarizationData
     DoubleDeviceBuffer<RawVoltageType> _raw_voltage;
     /// Side channel data
     DoubleDeviceBuffer<uint64_t> _sideChannelData;
+    DoubleHostBuffer<uint64_t> _sideChannelData_h;
 
     /// Baseline in gate 0 state
     thrust::device_vector<UnpackedVoltageType> _baseLineG0;
@@ -164,6 +165,10 @@ struct PowerSpectrumOutput
     /// Number of samples integrated in this output block
     DoubleDeviceBuffer<uint64_cu> _noOfBitSets;
 
+    /// Number of samples overflowed
+    DoubleHostBuffer<uint64_cu> _noOfOverflowed;
+
+
     /// Swap output buffers and reset the buffer in given stream for new integration
     void swap(cudaStream_t &_proc_stream);
 };
@@ -237,6 +242,9 @@ struct FullStokesOutput
     /// Number of samples integrated in this output block
     DoubleDeviceBuffer<uint64_cu> _noOfBitSets;
 
+    /// Number of samples overflowed
+    DoubleHostBuffer<uint64_cu> _noOfOverflowed;
+
     /// Swap output buffers
     void swap(cudaStream_t &_proc_stream);
 };
@@ -316,23 +324,27 @@ public:
    */
   bool operator()(RawBytes &block);
 
-private:
-  // Processing strategy for single pol mode
+  // make the relevant processing methods proteceed only for testing
+protected:
+  /// Processing strategy for single pol mode
   void process(SinglePolarizationInput *inputDataStream, GatedPowerSpectrumOutput *outputDataStream);
 
-  // Processing strategy for dual pol  power mode
+  /// Processing strategy for dual pol  power mode
   //void process(DualPolarizationInput*inputDataStream, GatedPowerSpectrumOutput *outputDataStream);
 
-  // Processing strategy for full stokes mode
+  /// Processing strategy for full stokes mode
   void process(DualPolarizationInput *inputDataStream, GatedFullStokesOutput *outputDataStream);
 
+  OutputType* outputDataStream;
+  InputType* inputDataStream;
+
+private:
   // gate the data from the input stream and fft data per gate. Number of bit
   // sets in every gate is stored in the output datasets
   void gated_fft(PolarizationData &data,
     thrust::device_vector<uint64_cu> &_noOfBitSetsIn_G0,
     thrust::device_vector<uint64_cu> &_noOfBitSetsIn_G1);
 
-private:
   DadaBufferLayout _dadaBufferLayout;
   std::size_t _fft_length;
   std::size_t _naccumulate;
@@ -349,9 +361,6 @@ private:
 
   std::unique_ptr<Unpacker> _unpacker;
 
-  OutputType* outputDataStream;
-  InputType* inputDataStream;
-
   // Temporary processing block
   // ToDo: Use inplace FFT to avoid temporary voltage array
   thrust::device_vector<UnpackedVoltageType> _unpacked_voltage_G0;
diff --git a/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu b/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu
index 735acd857b07e2eb953c6b29be1073bc711f46c5..96480e6b7f8f26b0a1ab949cc6b862deeab0025f 100644
--- a/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu
+++ b/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu
@@ -364,8 +364,6 @@ void GatedSpectrometer<HandlerType, InputType, OutputType>::process(SinglePolari
 {
   gated_fft(*inputDataStream, outputDataStream->G0._noOfBitSets.a(), outputDataStream->G1._noOfBitSets.a());
 
-
-
   kernels::detect_and_accumulate<IntegratedPowerType> <<<1024, 1024, 0, _proc_stream>>>(
             thrust::raw_pointer_cast(inputDataStream->_channelised_voltage_G0.data()),
             thrust::raw_pointer_cast(outputDataStream->G0.data.a().data()),
@@ -382,6 +380,34 @@ void GatedSpectrometer<HandlerType, InputType, OutputType>::process(SinglePolari
             _naccumulate / _nBlocks,
             1, 0., 1, 0);
 
+    // count saturated samples
+    for(int output_block_number = 0; output_block_number <_nBlocks; output_block_number++)
+    {
+        outputDataStream->G0._noOfOverflowed.a()[output_block_number] = 0;
+        outputDataStream->G1._noOfOverflowed.a()[output_block_number] = 0;
+
+        const int heaps_per_output_spectra = inputDataStream->_sideChannelData_h.size() / _naccumulate / _nBlocks;
+        for (int j = output_block_number * heaps_per_output_spectra ; j < (output_block_number+1) * heaps_per_output_spectra * _dadaBufferLayout.getNSideChannels(); j+=_dadaBufferLayout.getNSideChannels())
+        {
+            if (TEST_BIT(inputDataStream->_sideChannelData_h.a().data()[j], 42))
+            { // heap was lost
+                continue;
+            }
+
+            uint16_t n_saturated = (inputDataStream->_sideChannelData_h.a().data()[j] >> 32);
+            if (TEST_BIT(inputDataStream->_sideChannelData_h.a().data()[j], _selectedBit))
+            {
+                outputDataStream->G1._noOfOverflowed.a()[output_block_number] += n_saturated;
+            }
+            else
+            {
+                outputDataStream->G0._noOfOverflowed.a()[output_block_number] += n_saturated;
+            }
+        }
+        BOOST_LOG_TRIVIAL(debug) << "Number of saturated samples G0: " << outputDataStream->G0._noOfOverflowed.a()[output_block_number]
+                    << ", G1: " << outputDataStream->G1._noOfOverflowed.a()[output_block_number];
+    }
+
 }
 
 
@@ -418,6 +444,43 @@ void GatedSpectrometer<HandlerType, InputType, OutputType>::process(DualPolariza
               thrust::raw_pointer_cast(outputDataStream->G1.V.a().data() + output_offset),
               _nchans, _naccumulate / _nBlocks
               );
+
+      // count saturated samples
+        outputDataStream->G0._noOfOverflowed.a()[output_block_number] = 0;
+        outputDataStream->G1._noOfOverflowed.a()[output_block_number] = 0;
+
+        size_t lostHeaps = 0;
+
+        const int heaps_per_output_spectra = inputDataStream->polarization0._sideChannelData_h.size() / _naccumulate / _nBlocks;
+        for (int j = output_block_number * heaps_per_output_spectra ; j < (output_block_number+1) * heaps_per_output_spectra * _dadaBufferLayout.getNSideChannels(); j+=_dadaBufferLayout.getNSideChannels())
+        {
+            if (TEST_BIT(inputDataStream->polarization0._sideChannelData_h.a().data()[j], 42) || TEST_BIT(inputDataStream->polarization1._sideChannelData_h.a().data()[j], 42))
+            {
+                lostHeaps++;
+               // thiss heap was lost, do not count
+               continue;
+            }
+
+            uint16_t n_saturated = (inputDataStream->polarization0._sideChannelData_h.a().data()[j] >> 32) + (inputDataStream->polarization1._sideChannelData_h.a().data()[j] >> 32);
+
+
+            if (TEST_BIT(inputDataStream->polarization0._sideChannelData_h.a().data()[j], _selectedBit))
+            {
+                outputDataStream->G1._noOfOverflowed.a()[output_block_number] += n_saturated;
+            }
+            else
+            {
+                outputDataStream->G0._noOfOverflowed.a()[output_block_number] += n_saturated;
+            }
+
+        }
+
+        BOOST_LOG_TRIVIAL(debug) << "Number of saturated samples G0: " << outputDataStream->G0._noOfOverflowed.a()[output_block_number]
+                    << ", G1: " << outputDataStream->G1._noOfOverflowed.a()[output_block_number];
+
+        BOOST_LOG_TRIVIAL(debug) << "Lost heaps: " << lostHeaps;
+
+
   }
 
   //  cudaDeviceSynchronize();
diff --git a/psrdada_cpp/effelsberg/edd/src/GatedSpectrometer.cu b/psrdada_cpp/effelsberg/edd/src/GatedSpectrometer.cu
index 016bce664e5f462aa8b4499c3c2b332a75312650..d593300669cc48419b2f1c5dbe05203876e10a71 100644
--- a/psrdada_cpp/effelsberg/edd/src/GatedSpectrometer.cu
+++ b/psrdada_cpp/effelsberg/edd/src/GatedSpectrometer.cu
@@ -28,7 +28,9 @@ __global__ void gating(float* __restrict__ G0,
         const float*  __restrict__ _baseLineG1,
         float* __restrict__ baseLineNG0,
         float* __restrict__ baseLineNG1,
-        uint64_cu* stats_G0, uint64_cu* stats_G1) {
+        uint64_cu* stats_G0,
+        uint64_cu* stats_G1)
+{
   // statistics values for samopels to G0, G1
   uint32_t _G0stats = 0;
   uint32_t _G1stats = 0;
@@ -91,6 +93,9 @@ __global__ void gating(float* __restrict__ G0,
 
 
 
+
+
+
 // Updates the baselines of the gates for the polarization set for the next
 // block
 // only few output blocks per input block thus execution on only one thread.
@@ -188,6 +193,7 @@ void PolarizationData::resize(size_t rawVolttageBufferBytes, size_t nsidechannel
     _channelised_voltage_G0.resize(channelized_samples);
     _channelised_voltage_G1.resize(channelized_samples);
     _sideChannelData.resize(nsidechannelitems);
+    _sideChannelData_h.resize(nsidechannelitems);
     BOOST_LOG_TRIVIAL(debug) << "  Channelised voltages size: " << _channelised_voltage_G0.size();
 }
 
@@ -196,8 +202,8 @@ SinglePolarizationInput::SinglePolarizationInput(size_t fft_length, size_t nbits
         &dadaBufferLayout) : PolarizationData(nbits), _fft_length(fft_length), _dadaBufferLayout(dadaBufferLayout)
 {
 
-  size_t nsamps_per_buffer = _dadaBufferLayout.sizeOfData() * 8 / nbits;
-  size_t _batch = nsamps_per_buffer / _fft_length;
+    size_t nsamps_per_buffer = _dadaBufferLayout.sizeOfData() * 8 / nbits;
+    size_t _batch = nsamps_per_buffer / _fft_length;
 
     resize(_dadaBufferLayout.sizeOfData(), _dadaBufferLayout.getNSideChannels() * _dadaBufferLayout.getNHeaps(), (_fft_length / 2 + 1) * _batch);
 };
@@ -229,6 +235,11 @@ void SinglePolarizationInput::getFromBlock(RawBytes &block, cudaStream_t &_h2d_s
       static_cast<void *>(_sideChannelData.a_ptr()),
       static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()),
       _dadaBufferLayout.sizeOfSideChannelData(), cudaMemcpyHostToDevice, _h2d_stream));
+
+    std::memcpy(static_cast<void *>(_sideChannelData_h.a_ptr()),
+      static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()),
+      _dadaBufferLayout.sizeOfSideChannelData());
+
   BOOST_LOG_TRIVIAL(debug) << "First side channel item: 0x" <<   std::setw(16)
       << std::setfill('0') << std::hex <<
       (reinterpret_cast<uint64_t*>(block.ptr() + _dadaBufferLayout.sizeOfData()
@@ -298,6 +309,7 @@ CUDA_ERROR_CHECK(cudaMemcpy2DAsync(
     _dadaBufferLayout.sizeOfSideChannelData() / 2 / sizeof(uint64_t),
     cudaMemcpyHostToDevice, _h2d_stream));
 
+
 CUDA_ERROR_CHECK(cudaMemcpy2DAsync(
     static_cast<void *>(polarization1._sideChannelData.a_ptr()),
     sizeof(uint64_t),
@@ -306,6 +318,14 @@ CUDA_ERROR_CHECK(cudaMemcpy2DAsync(
     sizeof(uint64_t),
     _dadaBufferLayout.sizeOfSideChannelData() / 2 / sizeof(uint64_t), cudaMemcpyHostToDevice, _h2d_stream));
 
+    uint64_t * sci = reinterpret_cast<uint64_t *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap());
+    for(int i =0; i < _dadaBufferLayout.sizeOfSideChannelData() / sizeof(uint64_t); i+=2)
+    {
+       polarization0._sideChannelData_h.a()[i / 2] = sci[i];
+       polarization1._sideChannelData_h.a()[i / 2] = sci[i + 1];
+    }
+
+
 BOOST_LOG_TRIVIAL(debug) << "First side channel item: 0x" <<   std::setw(16)
     << std::setfill('0') << std::hex <<
     (reinterpret_cast<uint64_t*>(block.ptr() + _dadaBufferLayout.sizeOfData()
@@ -319,6 +339,7 @@ PowerSpectrumOutput::PowerSpectrumOutput(size_t size, size_t blocks)
     BOOST_LOG_TRIVIAL(debug) << "Setting size of power spectrum output size = " << size << ", blocks =  " << blocks;
    data.resize(size * blocks);
    _noOfBitSets.resize(blocks);
+   _noOfOverflowed.resize(blocks);
 }
 
 
@@ -326,6 +347,7 @@ void PowerSpectrumOutput::swap(cudaStream_t &_proc_stream)
 {
     data.swap();
     _noOfBitSets.swap();
+    _noOfOverflowed.swap();
     thrust::fill(thrust::cuda::par.on(_proc_stream), data.a().begin(), data.a().end(), 0.);
     thrust::fill(thrust::cuda::par.on(_proc_stream), _noOfBitSets.a().begin(), _noOfBitSets.a().end(), 0L);
 }
@@ -337,7 +359,7 @@ G1(nchans, blocks)
 {
   // on the host both power are stored in the same data buffer together with
   // the number of bit sets
-  _host_power.resize( 2 * ( _nchans * sizeof(IntegratedPowerType) + sizeof(size_t) ) * G0._noOfBitSets.size());
+  _host_power.resize( 2 * ( _nchans * sizeof(IntegratedPowerType) + 2 * sizeof(size_t) ) * G0._noOfBitSets.size());
 }
 
 
@@ -360,7 +382,7 @@ void GatedPowerSpectrumOutput::data2Host(cudaStream_t &_d2h_stream)
     // size of individual spectrum + meta
     size_t memslicesize = (_nchans * sizeof(IntegratedPowerType));
     // number of spectra per output
-    size_t memOffset = 2 * i * (memslicesize + sizeof(size_t));
+    size_t memOffset = 2 * i * (memslicesize + 2*sizeof(size_t));
 
     // copy 2x channel data
     CUDA_ERROR_CHECK(
@@ -382,11 +404,20 @@ void GatedPowerSpectrumOutput::data2Host(cudaStream_t &_d2h_stream)
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
 
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType) + sizeof(size_t)),
+             static_cast<void *>(G0._noOfOverflowed.b_ptr() + i),
+            1 * sizeof(size_t));
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType) + sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType) + 2 * sizeof(size_t)),
           static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+
+
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType) + 3*sizeof(size_t)),
+             static_cast<void *>(G1._noOfOverflowed.b_ptr() + i),
+            1 * sizeof(size_t));
   }
 }
 
@@ -398,6 +429,7 @@ void FullStokesOutput::swap(cudaStream_t &_proc_stream)
     U.swap();
     V.swap();
     _noOfBitSets.swap();
+    _noOfOverflowed.swap();
     thrust::fill(thrust::cuda::par.on(_proc_stream), I.a().begin(), I.a().end(), 0.);
     thrust::fill(thrust::cuda::par.on(_proc_stream), Q.a().begin(), Q.a().end(), 0.);
     thrust::fill(thrust::cuda::par.on(_proc_stream), U.a().begin(), U.a().end(), 0.);
@@ -413,6 +445,7 @@ FullStokesOutput::FullStokesOutput(size_t size, size_t blocks)
     U.resize(size * blocks);
     V.resize(size * blocks);
     _noOfBitSets.resize(blocks);
+   _noOfOverflowed.resize(blocks);
 }
 
 
@@ -421,7 +454,7 @@ GatedFullStokesOutput::GatedFullStokesOutput(size_t nchans, size_t blocks): Outp
 G1(nchans, blocks)
 {
     BOOST_LOG_TRIVIAL(debug) << "Output with " << _blocks << " blocks a " << _nchans << " channels";
-    _host_power.resize( 8 * ( _nchans * sizeof(IntegratedPowerType) + sizeof(size_t) ) * _blocks);
+    _host_power.resize( 8 * ( _nchans * sizeof(IntegratedPowerType) + 2 * sizeof(size_t) ) * _blocks);
     BOOST_LOG_TRIVIAL(debug) << "Allocated " << _host_power.size() << " bytes.";
 };
 
@@ -439,7 +472,7 @@ void GatedFullStokesOutput::data2Host(cudaStream_t &_d2h_stream)
 for (size_t i = 0; i < G0._noOfBitSets.size(); i++)
 {
     size_t memslicesize = (_nchans * sizeof(IntegratedPowerType));
-    size_t memOffset = 8 * i * (memslicesize + sizeof(size_t));
+    size_t memOffset = 8 * i * (memslicesize + 2 * sizeof(size_t));
     // Copy  II QQ UU VV
     CUDA_ERROR_CHECK(
         cudaMemcpyAsync(static_cast<void *>(_host_power.a_ptr() + memOffset) ,
@@ -495,41 +528,64 @@ for (size_t i = 0; i < G0._noOfBitSets.size(); i++)
           static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 1 * sizeof(size_t) ),
+            static_cast<void *>(G0._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 1 * sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 2 * sizeof(size_t)),
           static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 3 * sizeof(size_t) ),
+            static_cast<void *>(G1._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 2 * sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 4 * sizeof(size_t)),
           static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 5 * sizeof(size_t) ),
+            static_cast<void *>(G0._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 3 * sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 6 * sizeof(size_t)),
           static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 7 * sizeof(size_t) ),
+            static_cast<void *>(G1._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 4 * sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 8 * sizeof(size_t)),
           static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 9 * sizeof(size_t) ),
+            static_cast<void *>(G0._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 5 * sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 10 * sizeof(size_t)),
           static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 11 * sizeof(size_t) ),
+            static_cast<void *>(G1._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 6 * sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 12 * sizeof(size_t)),
           static_cast<void *>(G0._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 13 * sizeof(size_t) ),
+            static_cast<void *>(G0._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
+
     CUDA_ERROR_CHECK(
-        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 7 * sizeof(size_t)),
+        cudaMemcpyAsync( static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 14 * sizeof(size_t)),
           static_cast<void *>(G1._noOfBitSets.b_ptr() + i ),
             1 * sizeof(size_t),
             cudaMemcpyDeviceToHost, _d2h_stream));
+    std::memcpy(static_cast<void *>(_host_power.a_ptr() + memOffset + 8 * memslicesize + 15 * sizeof(size_t) ),
+            static_cast<void *>(G1._noOfOverflowed.b_ptr() + i ), 1 * sizeof(size_t) );
   }
 }
 
diff --git a/psrdada_cpp/effelsberg/edd/test/src/GatedSpectrometerTest.cu b/psrdada_cpp/effelsberg/edd/test/src/GatedSpectrometerTest.cu
index 24ef94bd4fdfb994cdfd56361652b30b0a39e9d6..9a42371adecfcd1b7fe1bcbf9b1dd373437f6184 100644
--- a/psrdada_cpp/effelsberg/edd/test/src/GatedSpectrometerTest.cu
+++ b/psrdada_cpp/effelsberg/edd/test/src/GatedSpectrometerTest.cu
@@ -276,34 +276,34 @@ TEST(GatedSpectrometer, array_sum) {
 }
 
 
-class GatedTestSink{
+class GatedTestSinkSinglePol{
     private:
         size_t fft_length;
         size_t call_count;
         size_t nHeaps;
         size_t naccumulate;
     public:
-        GatedTestSink(size_t fft_length, size_t nHeaps, size_t naccumulate): fft_length(fft_length), nHeaps(nHeaps), naccumulate(naccumulate), call_count(0) {};
+        GatedTestSinkSinglePol(size_t fft_length, size_t nHeaps, size_t naccumulate): fft_length(fft_length), nHeaps(nHeaps), naccumulate(naccumulate), call_count(0) {};
     // Test the correctness of output of the processing test
         void init(psrdada_cpp::RawBytes&){
         };
         bool operator()(psrdada_cpp::RawBytes& buf)
         {
-            const size_t number_of_sc_items = 1;
+            const size_t number_of_sc_items = 2;
             const size_t nchans = fft_length / 2 + 1;
             EXPECT_EQ(buf.used_bytes(), (32 / 8 * nchans + number_of_sc_items* 64 / 8) * 2);
 
-            // the first elements of the spectraa are related to the power
             float *G0 = reinterpret_cast<float*>(buf.ptr());
             float *G1 = reinterpret_cast<float*>(buf.ptr() + nchans * 32 /8);
-            uint64_t *S = reinterpret_cast<uint64_t*>(buf.ptr() + 2 *nchans * 32 /8);
 
             // Expected half number of samples per gate
+            uint64_t *S = reinterpret_cast<uint64_t*>(buf.ptr() + 2 *nchans * 32 /8);
             EXPECT_EQ(S[0], nHeaps * 4096 / 2);
-            EXPECT_EQ(S[1], nHeaps * 4096 / 2);
+            EXPECT_EQ(S[2], nHeaps * 4096 / 2);
 
-            //EXPECT_FLOAT_EQ(G0[0], 12.) << "Call count: " << call_count ;
-            //EXPECT_FLOAT_EQ(G1[1], 23.) << "Call count: " << call_count ;;
+            // Correct number of overflowed samples
+            EXPECT_EQ(S[1], 27);
+            EXPECT_EQ(S[3], 23);  // First heap has 23 and bit set, thus G1
 
             call_count ++;
             return false;
@@ -311,7 +311,7 @@ class GatedTestSink{
 
 };
 
-TEST(GatedSpectrometer, processing)
+TEST(GatedSpectrometer, processingSinglePol)
 {
     const size_t nbits = 8;
     const size_t nHeaps = 1024;
@@ -326,9 +326,9 @@ TEST(GatedSpectrometer, processing)
 
     psrdada_cpp::effelsberg::edd::DadaBufferLayout bufferLayout(idbuffer.key(), heapSize, 1);
 
-    GatedTestSink sink(fft_length, nHeaps, naccumulate);
+    GatedTestSinkSinglePol sink(fft_length, nHeaps, naccumulate);
     psrdada_cpp::effelsberg::edd::GatedSpectrometer<
-        GatedTestSink,
+        GatedTestSinkSinglePol,
         psrdada_cpp::effelsberg::edd::SinglePolarizationInput,
         psrdada_cpp::effelsberg::edd::GatedPowerSpectrumOutput>
             spectrometer(bufferLayout,
@@ -342,35 +342,114 @@ TEST(GatedSpectrometer, processing)
     psrdada_cpp::RawBytes buff(raw_buffer, inputBufferSize, inputBufferSize);
     EXPECT_NO_THROW(spectrometer.init(buff));
 
-
-    for (int i = 0; i < nHeaps / 2; i++)
+    //// fill sci data
+    uint64_t* sc_items = reinterpret_cast<uint64_t*>(raw_buffer + nHeaps * 4096);
+    for (int i = 0; i < nHeaps; i+=2)
     {
-        raw_buffer[i] = 0;
+        sc_items[i] = 0;
+        sc_items[i+1] = 0;
+        SET_BIT(sc_items[i], 0);
     }
+    sc_items[0] |= (23UL) << 32;
+    sc_items[1] |= (27UL) << 32;
 
-    // fill data alternating 2,3 per heap
-    for (int i = 0; i < nHeaps; i+=2)
+    for (int i = 0; i < 12; i++)
     {
-        for (int j = 0; j < 4096; j++)
-        {
-            raw_buffer[i * 4096 + j] = 2;
-        }
-        for (int j = 4096; j < 2 * 4096; j++)
-        {
-            raw_buffer[i * 4096 + j] = 3;
-        }
+        EXPECT_NO_THROW(spectrometer(buff));
     }
+
+    delete [] raw_buffer;
+}
+
+
+class GatedTestSinkFullStokes{
+    private:
+        size_t fft_length;
+        size_t call_count;
+        size_t nHeaps;
+        size_t naccumulate;
+    public:
+        GatedTestSinkFullStokes(size_t fft_length, size_t nHeaps, size_t naccumulate): fft_length(fft_length), nHeaps(nHeaps), naccumulate(naccumulate), call_count(0) {};
+    // Test the correctness of output of the processing test
+        void init(psrdada_cpp::RawBytes&){
+        };
+        bool operator()(psrdada_cpp::RawBytes& buf)
+        {
+            const size_t number_of_sc_items = 2;
+            const size_t nchans = fft_length / 2 + 1;
+            EXPECT_EQ(buf.used_bytes(), (32 / 8 * nchans + number_of_sc_items* 64 / 8) * 2 * 4); // 4 Spectra (IQUV), 2 Gates
+
+            // Expected half number of samples per gate
+            for(int i =0; i <4; i++)
+            {
+                uint64_t *S = reinterpret_cast<uint64_t*>(buf.ptr() + 8 *nchans * 32 /8 + i * 2 *sizeof(size_t));
+                EXPECT_EQ(S[0], nHeaps * 4096); // expect samples from two polarizations
+                EXPECT_EQ(S[2], nHeaps * 4096);
+
+                // Correct number of overflowed samples
+                EXPECT_EQ(S[1], 27 + 7);
+                EXPECT_EQ(S[3], 23 + 3);  // First heap has 23 and bit set, thus G1
+            }
+
+            call_count ++;
+            return false;
+        };
+
+};
+
+
+
+
+TEST(GatedSpectrometer, processingFullStokes)
+{
+    const size_t nbits = 8;
+    const size_t nHeaps = 1024;
+    const size_t fft_length = 1024 * 64;
+    const size_t naccumulate = 4096 * nHeaps / fft_length;
+
+    const size_t heapSize = 4096 * nbits / 8;
+    const size_t inputBufferSize = 2 * nHeaps * (heapSize + 64 / 8) ;
+
+    psrdada_cpp::DadaDB idbuffer(5, inputBufferSize, 1, 4096);
+    idbuffer.create();
+
+    psrdada_cpp::effelsberg::edd::DadaBufferLayout bufferLayout(idbuffer.key(), heapSize, 1);
+
+    GatedTestSinkFullStokes sink(fft_length, nHeaps, naccumulate);
+    psrdada_cpp::effelsberg::edd::GatedSpectrometer<
+        GatedTestSinkFullStokes,
+        psrdada_cpp::effelsberg::edd::DualPolarizationInput,
+        psrdada_cpp::effelsberg::edd::GatedFullStokesOutput>
+            spectrometer(bufferLayout,
+                    0, 0,
+                    fft_length,
+                    naccumulate, nbits,
+                    sink);
+
+    char  *raw_buffer = new char[inputBufferSize];
+
+    psrdada_cpp::RawBytes buff(raw_buffer, inputBufferSize, inputBufferSize);
+    EXPECT_NO_THROW(spectrometer.init(buff));
+
     //// fill sci data
-    uint64_t* sc_items = reinterpret_cast<uint64_t*>(raw_buffer + nHeaps * 4096);
-    for (int i = 0; i < nHeaps; i+=2)
+    uint64_t* sc_items = reinterpret_cast<uint64_t*>(raw_buffer + 2*nHeaps * 4096);
+    for (int i = 0; i < 2 * nHeaps; i+=4)
     {
+        sc_items[i] = 0;
+        sc_items[i+1] = 0;
+        sc_items[i+2] = 0;
+        sc_items[i+3] = 0;
         SET_BIT(sc_items[i], 0);
-        //SET_BIT(sc_items[i+1], 0);
+        SET_BIT(sc_items[i + 1], 0);
     }
+    sc_items[0] |= (23UL) << 32;
+    sc_items[1] |= (3UL) << 32;
 
+    sc_items[2] |= (27UL) << 32;
+    sc_items[3] |= (7UL) << 32;
 
 
-    for (int i = 0; i < 5; i++)
+    for (int i = 0; i < 12; i++)
     {
         EXPECT_NO_THROW(spectrometer(buff));
     }
@@ -386,9 +465,6 @@ TEST(GatedSpectrometer, processing)
 
 
 
-
-
-
 struct gated_params
 {
     std::size_t fft_length;