Commit c204c68e authored by Andreas Marek's avatar Andreas Marek

Fix bug in GPU version

The associaten with Cuda memory as Fortran arrays was sometimes
done wrongly , since not the number of array elements but instead
the number of array elements times the sizeof(datatype) was used.
This was a mix-up between the C allocation and the Fortran reshape.

This could lead to memory corruption
parent f760d48e
......@@ -240,7 +240,7 @@
num = (max_local_rows*max_stored_rows) * size_of_datatype
successCUDA = cuda_malloc_host(hvm1_host,num)
check_alloc_cuda("trans_ev: hvm1_host", successCUDA)
call c_f_pointer(hvm1_host,hvm1,(/num/))
call c_f_pointer(hvm1_host,hvm1,(/(max_local_rows*max_stored_rows)/))
num = (max_stored_rows*max_stored_rows) * size_of_datatype
successCUDA = cuda_malloc_host(tmat_host,num)
......@@ -250,12 +250,12 @@
num = (max_local_cols*max_stored_rows) * size_of_datatype
successCUDA = cuda_malloc_host(tmp1_host,num)
check_alloc_cuda("trans_ev: tmp1_host", successCUDA)
call c_f_pointer(tmp1_host,tmp1,(/num/))
call c_f_pointer(tmp1_host,tmp1,(/(max_local_cols*max_stored_rows)/))
num = (max_local_cols*max_stored_rows) * size_of_datatype
successCUDA = cuda_malloc_host(tmp2_host,num)
check_alloc_cuda("trans_ev: tmp2_host", successCUDA)
call c_f_pointer(tmp2_host,tmp2,(/num/))
call c_f_pointer(tmp2_host,tmp2,(/(max_local_cols*max_stored_rows)/))
successCUDA = cuda_malloc(tmat_dev, max_stored_rows * max_stored_rows * size_of_datatype)
check_alloc_cuda("trans_ev", successCUDA)
......
......@@ -288,22 +288,22 @@ call prmat(na, useGpu, a_mat, a_dev, matrixRows, matrixCols, nblk, my_prow, my_p
num = (max_local_rows+1) * size_of_datatype
successCUDA = cuda_malloc_host(v_row_host,num)
check_host_alloc_cuda("tridiag: v_row_host", successCUDA)
call c_f_pointer(v_row_host,v_row,(/num/))
call c_f_pointer(v_row_host,v_row,(/(max_local_rows+1)/))
num = (max_local_cols) * size_of_datatype
successCUDA = cuda_malloc_host(v_col_host,num)
check_host_alloc_cuda("tridiag: v_col_host", successCUDA)
call c_f_pointer(v_col_host,v_col,(/num/))
call c_f_pointer(v_col_host,v_col,(/(max_local_cols)/))
num = (max_local_cols) * size_of_datatype
successCUDA = cuda_malloc_host(u_col_host,num)
check_host_alloc_cuda("tridiag: u_col_host", successCUDA)
call c_f_pointer(u_col_host,u_col,(/num/))
call c_f_pointer(u_col_host,u_col,(/(max_local_cols)/))
num = (max_local_rows) * size_of_datatype
successCUDA = cuda_malloc_host(u_row_host,num)
check_host_alloc_cuda("tridiag: u_row_host", successCUDA)
call c_f_pointer(u_row_host,u_row,(/num/))
call c_f_pointer(u_row_host,u_row,(/(max_local_rows)/))
num = (max_local_rows * 2*max_stored_uv) * size_of_datatype
successCUDA = cuda_host_register(int(loc(vu_stored_rows),kind=c_intptr_t),num,&
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment