diff --git a/psrdada_cpp/effelsberg/edd/eddfft.cuh b/psrdada_cpp/effelsberg/edd/eddfft.cuh
index bc18907f7aaffe0fc0f05a88a5973ec2bcfe319a..94dc6229f7429af6ce25fb08784275e393b51db8 100644
--- a/psrdada_cpp/effelsberg/edd/eddfft.cuh
+++ b/psrdada_cpp/effelsberg/edd/eddfft.cuh
@@ -6,6 +6,8 @@
 #include "thrust/host_vector.h"
 #include "cufft.h"
 
+#define NTHREADS_UNPACK 512
+
 namespace psrdada_cpp {
 namespace effelsberg {
 namespace edd {
@@ -15,7 +17,7 @@ namespace kernels {
     void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__ out, int n);
 
     __global__
-    void detect_and_accumulate(cufftComplex* __restrict__ in, float* __restrict__ out, int nchans, int nsamps, int naccumulate)
+    void detect_and_accumulate(cufftComplex* __restrict__ in, float* __restrict__ out, int nchans, int nsamps, int naccumulate);
 
 
 } //kernels
diff --git a/psrdada_cpp/effelsberg/edd/src/eddfft.cu b/psrdada_cpp/effelsberg/edd/src/eddfft.cu
index 0c4ab129c50dcca26fa3d305ec5cd2dc9f0f5bad..4d8ddfba21ce8be73b4ae7cea8bc1ef83e16151b 100644
--- a/psrdada_cpp/effelsberg/edd/src/eddfft.cu
+++ b/psrdada_cpp/effelsberg/edd/src/eddfft.cu
@@ -39,8 +39,8 @@ void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__
      * Note: This kernels will not work with more than 512 threads.
      */
 
-    __shared__ volatile float tmp_out[NTHREADS * 16];
-    __shared__ volatile uint64_t tmp_in[NTHREADS * 3];
+    __shared__ volatile float tmp_out[NTHREADS_UNPACK * 16];
+    __shared__ volatile uint64_t tmp_in[NTHREADS_UNPACK * 3];
     int block_idx = blockIdx.x;
 
     uint64_t val;
@@ -51,10 +51,10 @@ void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__
     {
 
         //Read to shared memeory
-        int block_read_start = block_idx * NTHREADS * 3;
+        int block_read_start = block_idx * NTHREADS_UNPACK * 3;
         tmp_in[threadIdx.x]                = in[block_read_start + threadIdx.x];
-        tmp_in[NTHREADS + threadIdx.x]     = in[block_read_start + NTHREADS + threadIdx.x];
-        tmp_in[NTHREADS * 2 + threadIdx.x] = in[block_read_start + NTHREADS * 2 + threadIdx.x];
+        tmp_in[NTHREADS_UNPACK + threadIdx.x]     = in[block_read_start + NTHREADS_UNPACK + threadIdx.x];
+        tmp_in[NTHREADS_UNPACK * 2 + threadIdx.x] = in[block_read_start + NTHREADS_UNPACK * 2 + threadIdx.x];
 
         __syncthreads();
 
@@ -83,9 +83,9 @@ void unpack_edd_12bit_to_float32(uint64_t* __restrict__ in, float* __restrict__
 
         __syncthreads();
 
-        int block_write_start = block_idx * NTHREADS * 16;
+        int block_write_start = block_idx * NTHREADS_UNPACK * 16;
 
-        for (int ii = threadIdx.x; ii < 16 * NTHREADS; ii+=blockDim.x)
+        for (int ii = threadIdx.x; ii < 16 * NTHREADS_UNPACK; ii+=blockDim.x)
         {
             out[block_write_start+ii] = tmp_out[ii];
         }