__global__ void my_pack_c_kernel_double(const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, double* src, double* dst)
#else
__global__ void my_pack_c_kernel_single(const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, float* src, float* dst)
#endif
{
int b_id, t_id ;
int dst_ind ;
b_id = blockIdx.y;
t_id = threadIdx.x;
dst_ind = b_id * stripe_width + t_id;
if (dst_ind < max_idx)
{
// dimension of dst - lnev, nblk
// dimension of src - stripe_width,a_dim2,stripe_count
__global__ void my_unpack_c_kernel_double( const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, double* src, double* dst)
#else
__global__ void my_unpack_c_kernel_single( const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, float* src, float* dst)
extern "C" void launch_my_pack_c_kernel_double(const int row_count, const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, double* a_dev, double* row_group_dev)
#else
extern "C" void launch_my_pack_c_kernel_single(const int row_count, const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, float* a_dev, float* row_group_dev)
extern "C" void launch_my_unpack_c_kernel_double( const int row_count, const int n_offset, const int max_idx, const int stripe_width,const int a_dim2, const int stripe_count, const int l_nev, double* row_group_dev, double* a_dev)
#else
extern "C" void launch_my_unpack_c_kernel_single( const int row_count, const int n_offset, const int max_idx, const int stripe_width,const int a_dim2, const int stripe_count, const int l_nev, float* row_group_dev, float* a_dev)
__global__ void my_pack_c_kernel_double(const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, double* src, double* dst)
#else
__global__ void my_pack_c_kernel_single(const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, float* src, float* dst)
#endif
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
__global__ void my_pack_c_kernel_complex_double(const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, cuDoubleComplex* src, cuDoubleComplex* dst)
#else
__global__ void my_pack_c_kernel_complex_single(const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, cuFloatComplex* src, cuFloatComplex* dst)
#endif
#endif
{
int b_id, t_id ;
int dst_ind ;
...
...
@@ -151,19 +229,33 @@ __global__ void my_pack_c_kernel_complex_single(const int n_offset, const int ma
dst_ind = b_id * stripe_width + t_id;
if (dst_ind < max_idx)
{
// dimension of dst - lnev, nblk
// dimension of src - stripe_width,a_dim2,stripe_count
// dimension of dst - lnev, nblk
// dimension of src - stripe_width,a_dim2,stripe_count
__global__ void my_unpack_c_kernel_double( const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, double* src, double* dst)
#else
__global__ void my_unpack_c_kernel_single( const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, float* src, float* dst)
#endif
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
__global__ void my_unpack_c_kernel_complex_double( const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, cuDoubleComplex* src, cuDoubleComplex* dst)
#else
__global__ void my_unpack_c_kernel_complex_single( const int n_offset, const int max_idx, const int stripe_width, const int a_dim2, const int stripe_count, const int l_nev, cuFloatComplex* src, cuFloatComplex* dst)
#endif
#endif
{
int b_id, t_id ;
int src_ind;
...
...
@@ -173,27 +265,52 @@ __global__ void my_unpack_c_kernel_complex_single( const int n_offset, const in