Commit 47315fc7 authored by Serhiy Mochalskyy's avatar Serhiy Mochalskyy

Darray version

parent 9ab3d025
......@@ -24,20 +24,31 @@ program MPIIO_helloworld
message = "World!"
endif
write(500+rank,*) arr(:)
!write(500+rank,*) arr(:)
offset = rank*8
if (rank==0) number=arr(1)
if (rank==1) number=arr(2)
if (rank>1) number=arr(3)
!offset = rank*msgsize
call MPI_File_open(MPI_COMM_WORLD, "helloworld.txt", &
ior(MPI_MODE_CREATE,MPI_MODE_WRONLY),&
MPI_INFO_NULL, fileno, ierr)
call MPI_File_seek (fileno, offset, MPI_SEEK_SET, ierr)
call MPI_File_write(fileno, number, 1, MPI_DOUBLE_PRECISION, &
wstatus, ierr)
!call MPI_File_seek (fileno, offset, MPI_SEEK_SET, ierr)
!call MPI_File_write(fileno, number, 1, MPI_DOUBLE_PRECISION, &
! wstatus, ierr)
!call MPI_File_write_at_all(fileno, offset, message, msgsize, &
! MPI_CHARACTER, wstatus, ierr)
call MPI_File_write_at_all(fileno, offset, number, 1, &
MPI_DOUBLE_PRECISION, wstatus, ierr)
call MPI_File_close(fileno, ierr)
call MPI_Finalize(ierr)
end program MPIIO_helloworld
......
No preview for this file type
hexdump -v -e '9/8 "%10f "' -e '"\n"' testfile1.txt
File added
......@@ -13,7 +13,9 @@ module sca
integer :: INFO,INFO_A,INFO_B,INFO_Z,INFO_C
real :: ORFAC
integer :: lwork_cooficient
real, dimension(:),allocatable :: WORK
end module sca
......@@ -25,7 +27,7 @@ implicit none
integer :: rank,numtasks,ERRORCODE,ier
integer :: rank,numtasks,ier,request,ERRORCODE
integer :: sqrtnp,step
integer :: ntri_p_loc_b,ntri_p_loc_e, ntri_w_loc_b,ntri_w_loc_e
real :: time1,time2,time3
......@@ -40,7 +42,10 @@ implicit none
integer, dimension(mpi_status_size) :: wstatus
integer, dimension (MPI_STATUS_SIZE) :: status
integer (kind = MPI_OFFSET_KIND) :: offset, empty
integer size
integer(kind=MPI_OFFSET_KIND) :: disp
integer size,counter
integer, dimension(2) :: pdims, dims, distribs, dargs
integer :: my_darray
integer NUMROC
EXTERNAL NUMROC
......@@ -75,9 +80,6 @@ implicit none
NPROW = numtasks / NPCOL
!Check that the number of process passed as the argument matches the
!number of
!processes in the processor grid
CALL BLACS_PINFO(MYPNUM, NPROCS)
CALL BLACS_GET(-1, 0, CONTEXT)
......@@ -92,21 +94,21 @@ implicit none
CALL BLACS_GRIDINFO (CONTEXT, NPROW, NPCOL, MYROW, MYCOL)
size=9
size=150000
n_matrix_row=size
n_matrix_col=size
allocate(Arr(n_matrix_row,n_matrix_col),arr_glo_print(n_matrix_row,n_matrix_col),stat=ier)
IF (IER /= 0) THEN
WRITE (*,*) "global matrix can not allocate"
STOP
END IF
!allocate(Arr(n_matrix_row,n_matrix_col),arr_glo_print(n_matrix_row,n_matrix_col),stat=ier)
!IF (IER /= 0) THEN
! WRITE (*,*) "global matrix can not allocate"
! STOP
! END IF
do i=1,n_matrix_row
do j=1, n_matrix_col
Arr(i,j)=i*10.0+j
enddo
enddo
! do i=1,n_matrix_row
! do j=1, n_matrix_col
! Arr(i,j)=i*10.0+j
! enddo
! enddo
! if(rank==0) then
......@@ -125,7 +127,7 @@ implicit none
! write(*,*) Arr(:,:)
NB=2
NB=64
MP_A=NUMROC(n_matrix_row, NB, MYROW, 0, NPROW)
NQ_A=NUMROC(n_matrix_col, NB, MYCOL, 0, NPCOL)
......@@ -142,15 +144,22 @@ implicit none
! write(*,*) rank, " ",MP_A, NQ_A
do i=1,size
do j=1, size
call ScaLAPACK_mapping_i(i,i_loc,inside_i)
if (inside_i) then
call ScaLAPACK_mapping_j(j,j_loc,inside_j)
if (inside_j) then
arr_loc(i_loc,j_loc)=Arr(i,j)
endif
endif
!do i=1,size
! do j=1, size
! call ScaLAPACK_mapping_i(i,i_loc,inside_i)
! if (inside_i) then
! call ScaLAPACK_mapping_j(j,j_loc,inside_j)
! if (inside_j) then
! arr_loc(i_loc,j_loc)=Arr(i,j)
! endif
! endif
! enddo
!enddo
do i=1,MP_A
do j=1, NQ_A
Arr_loc(i,j)=rank+0.1123+i+j
enddo
enddo
......@@ -190,37 +199,49 @@ implicit none
!=======================================================
!==========================================================================
!allocate(WORK(LDA_A*10),stat=ier)
!CALL DESCINIT( DESCB, size, size, NB, NB, 0, 0, CONTEXT, LDA_A, INFO_A )
!CALL PDLAPRNT( size, size, arr_loc, 1, 1, DESCB, 0, 0, '', 0, WORK )
!===========================================================================
call MPI_FILE_OPEN(MPI_COMM_WORLD, 'testfile1.txt', &
MPI_MODE_WRONLY + MPI_MODE_CREATE, &
MPI_INFO_NULL, thefile, ier)
call MPI_BARRIER(MPI_COMM_WORLD, ier)
dims=[size, size]
distribs = [MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC]
dargs = [nb, nb]
pdims = [NPROW, NPCOL]
offset=0
!call MPI_Type_create_darray(procs, mpirank, 2, dims, distribs, dargs,pdims, MPI_ORDER_FORTRAN, MPI_REAL, darray,ierr)
time1=MPI_WTIME()
call MPI_Type_create_darray(numtasks, rank, 2, dims, distribs, dargs, pdims,MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, my_darray, ierr)
call MPI_Type_commit(my_darray,ierr)
call MPI_FILE_OPEN(MPI_COMM_WORLD, 'testfile2.txt', &
MPI_MODE_WRONLY + MPI_MODE_CREATE, &
MPI_INFO_NULL, thefile, ier)
call MPI_BARRIER(MPI_COMM_WORLD, ier)
DO i_loc = 1,MP_A
IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW)
DO j_loc = 1,NQ_A
JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL)
offset=((IC-1)*n_matrix_col+JC-1)*8
call MPI_File_seek (thefile, offset, MPI_SEEK_SET, ierr)
call MPI_File_write(thefile, arr_loc(i_loc,j_loc), 1, MPI_DOUBLE_PRECISION, &
status, ierr)
END DO
END DO
disp=0
call MPI_File_set_view(thefile, disp, MPI_DOUBLE_PRECISION,my_darray, "native",MPI_INFO_NULL, ierr)
call MPI_File_write_all(thefile, arr_loc, MP_A*NQ_A, MPI_DOUBLE_PRECISION, status, ierr)
call MPI_BARRIER(MPI_COMM_WORLD, ier)
call MPI_FILE_CLOSE(thefile, ier)
time2=MPI_WTIME()
if(rank==0) write(*,*) 'Matrix with size =', ((8.0*dble(size)**2)/1024.0/1024.0/1024.0) , ' GB was prinetd'
if(rank==0) write(*,*) 'Total wall clock time =',(time2-time1) , 'seconds'
if(rank==0) write(*,*) 'Total wall clock time =',(time2-time1)/3600.0, ' hours'
if(rank==0) write(*,*) '======================================================================='
......
No preview for this file type
module sca
implicit none
integer :: MYPNUM,NPROCS,CONTEXT,NPROW,NPCOL,MYCOL,MYROW,NB,MP_A,NQ_A,LDA_A
integer :: ipc,ipr
integer :: LDA_wp,LDA_pwe, LDA_pp,LDA_ep,LDA_ew, LDA_pwe_s,&
LDA_we,LDA_ee,LDA_ww,LDA_rw,LDA_sww, LDA_s_ww_inv
integer :: LDA_ey,LDA_ye,LDA_dee
integer :: DESCA(9),DESCB(9),DESCZ(9),DESCC(9)
integer :: DESC_ye(9)
integer :: INFO,INFO_A,INFO_B,INFO_Z,INFO_C
real :: ORFAC
integer :: lwork_cooficient
real, dimension(:),allocatable :: WORK
end module sca
program parallel_IO
use sca
use mpi
implicit none
integer :: rank,numtasks,ERRORCODE,ier
integer :: sqrtnp,step
integer :: ntri_p_loc_b,ntri_p_loc_e, ntri_w_loc_b,ntri_w_loc_e
real :: time1,time2,time3
integer :: n_matrix_row,n_matrix_col
real*8,dimension(:,:),allocatable :: arr, arr_loc, arr_glo_print
integer :: i,j,ierr
logical inside_i,inside_j
integer i_loc,j_loc
integer ::IC,JC
integer, dimension(mpi_status_size) :: wstatus
integer, dimension (MPI_STATUS_SIZE) :: status
integer (kind = MPI_OFFSET_KIND) :: offset, empty
integer size
integer NUMROC
EXTERNAL NUMROC
integer INDXL2G
EXTERNAL INDXL2G
integer thefile
!=====================================
call MPI_INIT(ier)
if (ier .ne. MPI_SUCCESS) then
print *,'Error starting MPI program. Terminating!!'
call MPI_ABORT(MPI_COMM_WORLD, ERRORCODE, ier)
endif
call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ier)
call MPI_COMM_SIZE(MPI_COMM_WORLD, numtasks, ier)
!In order to measure total wallclock time
call MPI_BARRIER(MPI_COMM_WORLD,ier)
time1=MPI_WTIME()
sqrtnp=int(sqrt(real(numtasks))+1)
do i=1,sqrtnp
if(mod(numtasks,i).eq.0) NPCOL=i
enddo
NPROW = numtasks / NPCOL
!Check that the number of process passed as the argument matches the
!number of
!processes in the processor grid
CALL BLACS_PINFO(MYPNUM, NPROCS)
CALL BLACS_GET(-1, 0, CONTEXT)
IF (NPROCS /= NPROW * NPCOL) THEN
WRITE(*,*) 'Error! Number of processors passed does not match with processors in the grid. &
NPROW=',NPROW,"NPCOL=",NPCOL,"NPROCS=",NPROCS
STOP
END IF
CALL BLACS_GRIDINIT (CONTEXT, 'R', NPROW, NPCOL )
CALL BLACS_GRIDINFO (CONTEXT, NPROW, NPCOL, MYROW, MYCOL)
size=9
n_matrix_row=size
n_matrix_col=size
allocate(Arr(n_matrix_row,n_matrix_col),arr_glo_print(n_matrix_row,n_matrix_col),stat=ier)
IF (IER /= 0) THEN
WRITE (*,*) "global matrix can not allocate"
STOP
END IF
do i=1,n_matrix_row
do j=1, n_matrix_col
Arr(i,j)=i*10.0+j
enddo
enddo
! if(rank==0) then
! do i=1,n_matrix_row
! do j=1, n_matrix_col
! write(*,*) Arr(i,j)
! enddo
! enddo
! endif
! write(*,*) Arr(:,:)
NB=2
MP_A=NUMROC(n_matrix_row, NB, MYROW, 0, NPROW)
NQ_A=NUMROC(n_matrix_col, NB, MYCOL, 0, NPCOL)
LDA_A= MAX(1, MP_A);
allocate(Arr_loc(MP_A,NQ_A), stat=ier)
IF (IER /= 0) THEN
WRITE (*,*) "matrix_pp Can not allocate local matrix a_pp: MY_PROC_NUM=",MYPNUM
STOP
END IF
! write(*,*) rank, " ",MP_A, NQ_A
do i=1,size
do j=1, size
call ScaLAPACK_mapping_i(i,i_loc,inside_i)
if (inside_i) then
call ScaLAPACK_mapping_j(j,j_loc,inside_j)
if (inside_j) then
arr_loc(i_loc,j_loc)=Arr(i,j)
endif
endif
enddo
enddo
! do i=1,MP_A
! do j=1, NQ_A
! write(100+rank,*) i,j,Arr_loc(i,j)
! enddo
! enddo
!==========================================Print
! DO i_loc = 1,MP_A
! IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW)
! DO j_loc = 1,NQ_A
! JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL)
! arr_glo_print(IC,JC) = arr_loc(i_loc,j_loc)
! END DO
! END DO
! if(rank==0) then
! call MPI_REDUCE(MPI_IN_PLACE, arr_glo_print, n_matrix_row*n_matrix_col, &
! MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IER)
! else
! call MPI_REDUCE(arr_glo_print, arr_glo_print, n_matrix_row*n_matrix_col, &
! MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IER)
!
! endif
! if(rank==0) then
! write(200,*) arr_glo_print(:,:)
! endif
!=======================================================
!1- global size
allocate(WORK(LDA_A*10),stat=ier)
CALL DESCINIT( DESCB, size, size, NB, NB, 0, 0, CONTEXT, LDA_A, INFO_A )
CALL PDLAPRNT( size, size, arr_loc, 1, 1, DESCB, 0, 0, '', 0, WORK )
call MPI_FILE_OPEN(MPI_COMM_WORLD, 'testfile1.txt', &
MPI_MODE_WRONLY + MPI_MODE_CREATE, &
MPI_INFO_NULL, thefile, ier)
call MPI_BARRIER(MPI_COMM_WORLD, ier)
offset=0
time1=MPI_WTIME()
DO i_loc = 1,MP_A
IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW)
DO j_loc = 1,NQ_A
JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL)
offset=((IC-1)*n_matrix_col+JC-1)*8
call MPI_File_seek (thefile, offset, MPI_SEEK_SET, ierr)
call MPI_File_write(thefile, arr_loc(i_loc,j_loc), 1, MPI_DOUBLE_PRECISION, &
status, ierr)
END DO
END DO
call MPI_FILE_CLOSE(thefile, ier)
time2=MPI_WTIME()
if(rank==0) write(*,*) 'Total wall clock time =',(time2-time1)/3600.0, ' hours'
if(rank==0) write(*,*) '======================================================================='
!call MPI_File_seek (thefile, offset, MPI_SEEK_SET, ierr)
!call MPI_File_write(thefile, arr_loc(1,1), 1, MPI_DOUBLE_PRECISION, &
! status, ierr)
!call MPI_FILE_SET_VIEW(thefile, empty, MPI_DOUBLE_PRECISION, MPI_DOUBLE_PRECISION, 'native', MPI_INFO_NULL, ier)
!call MPI_File_write_at(thefile, offset, arr_loc(1,1), 1, MPI_DOUBLE_PRECISION, status, ier)
!call MPI_FILE_WRITE_AT(fh, offset, bucket, region, MPI_REAL4,status, ier)
!call MPI_FILE_CLOSE(thefile, ier)
!time2=MPI_WTIME()
!if(rank==0) write(*,*) 'Total wall clock time including output=',(time2-time1)/3600.0, ' hours'
!if(rank==0) write(*,*) '======================================================================='
call MPI_FINALIZE(ier)
end program parallel_IO
subroutine ScaLAPACK_mapping_i(i,i_loc,inside_row)
! ----------------------------------------------------------------------
! purpose: 20/08/2015
! Check the mapping "i" index for ScaLAPCK local matrix
!! ----------------------------------------------------------------------
use sca ! new module for ScaLAPACK variables
! ----------------------------------------------------------------------
implicit none
integer INDXG2L,INDXG2P
EXTERNAL INDXG2L,INDXG2P
integer i,i_loc
logical inside_row
inside_row = .false.
ipr = INDXG2P(i,NB,0,0,NPROW)
if (ipr .eq. MYROW) then
inside_row = .true.
i_loc = INDXG2L(i,NB,0,0,NPROW)
endif
end subroutine ScaLAPACK_mapping_i
!DIR$ ATTRIBUTES FORCEINLINE :: ScaLAPACK_mapping_j
subroutine ScaLAPACK_mapping_j(j,j_loc,inside_col)
! ----------------------------------------------------------------------
! purpose: 20/08/2015
! Check the mapping "j" index for ScaLAPCK local matrix
!! ----------------------------------------------------------------------
use sca ! new module for ScaLAPACK variables
! ----------------------------------------------------------------------
implicit none
integer INDXG2L,INDXG2P
EXTERNAL INDXG2L,INDXG2P
integer j,j_loc
logical inside_col
inside_col = .false.
ipc = INDXG2P(j,NB,0,0,NPCOL)
if (ipc.eq.MYCOL) then
inside_col = .true.
j_loc = INDXG2L(j,NB,0,0,NPCOL)
endif
end subroutine ScaLAPACK_mapping_j
module sca
implicit none
integer :: MYPNUM,NPROCS,CONTEXT,NPROW,NPCOL,MYCOL,MYROW,NB,MP_A,NQ_A,LDA_A
integer :: ipc,ipr
integer :: LDA_wp,LDA_pwe, LDA_pp,LDA_ep,LDA_ew, LDA_pwe_s,&
LDA_we,LDA_ee,LDA_ww,LDA_rw,LDA_sww, LDA_s_ww_inv
integer :: LDA_ey,LDA_ye,LDA_dee
integer :: DESCA(9),DESCB(9),DESCZ(9),DESCC(9)
integer :: DESC_ye(9)
integer :: INFO,INFO_A,INFO_B,INFO_Z,INFO_C
real :: ORFAC
integer :: lwork_cooficient
real, dimension(:),allocatable :: WORK
end module sca
program parallel_IO
use sca
use mpi
implicit none
integer :: rank,numtasks,ier,request,ERRORCODE
integer :: sqrtnp,step
integer :: ntri_p_loc_b,ntri_p_loc_e, ntri_w_loc_b,ntri_w_loc_e
real :: time1,time2,time3
integer :: n_matrix_row,n_matrix_col
real*8,dimension(:,:),allocatable :: arr, arr_loc, arr_glo_print
integer :: i,j,ierr
logical inside_i,inside_j
integer i_loc,j_loc
integer ::IC,JC
integer, dimension(mpi_status_size) :: wstatus
integer, dimension (MPI_STATUS_SIZE) :: status
integer (kind = MPI_OFFSET_KIND) :: offset, empty
integer size
integer NUMROC
EXTERNAL NUMROC
integer INDXL2G
EXTERNAL INDXL2G
integer thefile
!=====================================
call MPI_INIT(ier)
if (ier .ne. MPI_SUCCESS) then
print *,'Error starting MPI program. Terminating!!'
call MPI_ABORT(MPI_COMM_WORLD, ERRORCODE, ier)
endif
call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ier)
call MPI_COMM_SIZE(MPI_COMM_WORLD, numtasks, ier)
!In order to measure total wallclock time
call MPI_BARRIER(MPI_COMM_WORLD,ier)
time1=MPI_WTIME()
sqrtnp=int(sqrt(real(numtasks))+1)
do i=1,sqrtnp
if(mod(numtasks,i).eq.0) NPCOL=i
enddo
NPROW = numtasks / NPCOL
CALL BLACS_PINFO(MYPNUM, NPROCS)
CALL BLACS_GET(-1, 0, CONTEXT)
IF (NPROCS /= NPROW * NPCOL) THEN