diff --git a/Hello.f90 b/Hello.f90 index 1e13bfec793d069f00bee73c01e46931b222b070..00aa3895037c866e643b1190f62c5bf38637e748 100644 --- a/Hello.f90 +++ b/Hello.f90 @@ -24,20 +24,31 @@ program MPIIO_helloworld message = "World!" endif - write(500+rank,*) arr(:) + !write(500+rank,*) arr(:) offset = rank*8 + if (rank==0) number=arr(1) if (rank==1) number=arr(2) if (rank>1) number=arr(3) + !offset = rank*msgsize + call MPI_File_open(MPI_COMM_WORLD, "helloworld.txt", & ior(MPI_MODE_CREATE,MPI_MODE_WRONLY),& MPI_INFO_NULL, fileno, ierr) - call MPI_File_seek (fileno, offset, MPI_SEEK_SET, ierr) - call MPI_File_write(fileno, number, 1, MPI_DOUBLE_PRECISION, & - wstatus, ierr) + + !call MPI_File_seek (fileno, offset, MPI_SEEK_SET, ierr) + !call MPI_File_write(fileno, number, 1, MPI_DOUBLE_PRECISION, & + ! wstatus, ierr) + + !call MPI_File_write_at_all(fileno, offset, message, msgsize, & + ! MPI_CHARACTER, wstatus, ierr) + + call MPI_File_write_at_all(fileno, offset, number, 1, & + MPI_DOUBLE_PRECISION, wstatus, ierr) + call MPI_File_close(fileno, ierr) call MPI_Finalize(ierr) end program MPIIO_helloworld diff --git a/Hello.out b/Hello.out index f212edc54dbe3126d71e1b0f7981dbc9a5dd7002..08823842937cbfbab5146a1cf46640b783e97bcb 100755 Binary files a/Hello.out and b/Hello.out differ diff --git a/commands.dat b/commands.dat new file mode 100644 index 0000000000000000000000000000000000000000..fc53744beb90f9a766c7c545ce880607e3983e29 --- /dev/null +++ b/commands.dat @@ -0,0 +1 @@ +hexdump -v -e '9/8 "%10f "' -e '"\n"' testfile1.txt diff --git a/helloworld.txt b/helloworld.txt new file mode 100644 index 0000000000000000000000000000000000000000..42fed14ac8730bc6f1f3cc60f4271611e721f7e3 Binary files /dev/null and b/helloworld.txt differ diff --git a/parallel_IO.f90 b/parallel_IO.f90 index 021c8aac2975d1cf11e7025cce77b6c858b42798..550cad6bc78612d93762abc95ed6154de6515129 100644 --- a/parallel_IO.f90 +++ b/parallel_IO.f90 @@ -13,7 +13,9 @@ module sca integer :: INFO,INFO_A,INFO_B,INFO_Z,INFO_C real :: ORFAC integer :: lwork_cooficient - + real, dimension(:),allocatable :: WORK + + end module sca @@ -25,7 +27,7 @@ implicit none - integer :: rank,numtasks,ERRORCODE,ier + integer :: rank,numtasks,ier,request,ERRORCODE integer :: sqrtnp,step integer :: ntri_p_loc_b,ntri_p_loc_e, ntri_w_loc_b,ntri_w_loc_e real :: time1,time2,time3 @@ -40,7 +42,10 @@ implicit none integer, dimension(mpi_status_size) :: wstatus integer, dimension (MPI_STATUS_SIZE) :: status integer (kind = MPI_OFFSET_KIND) :: offset, empty - integer size + integer(kind=MPI_OFFSET_KIND) :: disp + integer size,counter + integer, dimension(2) :: pdims, dims, distribs, dargs + integer :: my_darray integer NUMROC EXTERNAL NUMROC @@ -75,9 +80,6 @@ implicit none NPROW = numtasks / NPCOL -!Check that the number of process passed as the argument matches the -!number of -!processes in the processor grid CALL BLACS_PINFO(MYPNUM, NPROCS) CALL BLACS_GET(-1, 0, CONTEXT) @@ -92,21 +94,21 @@ implicit none CALL BLACS_GRIDINFO (CONTEXT, NPROW, NPCOL, MYROW, MYCOL) - size=9 + size=150000 n_matrix_row=size n_matrix_col=size - allocate(Arr(n_matrix_row,n_matrix_col),arr_glo_print(n_matrix_row,n_matrix_col),stat=ier) - IF (IER /= 0) THEN - WRITE (*,*) "global matrix can not allocate" - STOP - END IF + !allocate(Arr(n_matrix_row,n_matrix_col),arr_glo_print(n_matrix_row,n_matrix_col),stat=ier) + !IF (IER /= 0) THEN + ! WRITE (*,*) "global matrix can not allocate" + ! STOP + ! END IF - do i=1,n_matrix_row - do j=1, n_matrix_col - Arr(i,j)=i*10.0+j - enddo - enddo + ! do i=1,n_matrix_row + ! do j=1, n_matrix_col + ! Arr(i,j)=i*10.0+j + ! enddo + ! enddo ! if(rank==0) then @@ -125,7 +127,7 @@ implicit none ! write(*,*) Arr(:,:) - NB=2 + NB=64 MP_A=NUMROC(n_matrix_row, NB, MYROW, 0, NPROW) NQ_A=NUMROC(n_matrix_col, NB, MYCOL, 0, NPCOL) @@ -142,15 +144,22 @@ implicit none ! write(*,*) rank, " ",MP_A, NQ_A - do i=1,size - do j=1, size - call ScaLAPACK_mapping_i(i,i_loc,inside_i) - if (inside_i) then - call ScaLAPACK_mapping_j(j,j_loc,inside_j) - if (inside_j) then - arr_loc(i_loc,j_loc)=Arr(i,j) - endif - endif + !do i=1,size + ! do j=1, size + ! call ScaLAPACK_mapping_i(i,i_loc,inside_i) + ! if (inside_i) then + ! call ScaLAPACK_mapping_j(j,j_loc,inside_j) + ! if (inside_j) then + ! arr_loc(i_loc,j_loc)=Arr(i,j) + ! endif + ! endif + ! enddo + !enddo + + + do i=1,MP_A + do j=1, NQ_A + Arr_loc(i,j)=rank+0.1123+i+j enddo enddo @@ -190,37 +199,49 @@ implicit none -!======================================================= +!========================================================================== + !allocate(WORK(LDA_A*10),stat=ier) + !CALL DESCINIT( DESCB, size, size, NB, NB, 0, 0, CONTEXT, LDA_A, INFO_A ) + !CALL PDLAPRNT( size, size, arr_loc, 1, 1, DESCB, 0, 0, '', 0, WORK ) +!=========================================================================== + + + -call MPI_FILE_OPEN(MPI_COMM_WORLD, 'testfile1.txt', & - MPI_MODE_WRONLY + MPI_MODE_CREATE, & - MPI_INFO_NULL, thefile, ier) - -call MPI_BARRIER(MPI_COMM_WORLD, ier) +dims=[size, size] +distribs = [MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC] +dargs = [nb, nb] +pdims = [NPROW, NPCOL] -offset=0 + +!call MPI_Type_create_darray(procs, mpirank, 2, dims, distribs, dargs,pdims, MPI_ORDER_FORTRAN, MPI_REAL, darray,ierr) time1=MPI_WTIME() +call MPI_Type_create_darray(numtasks, rank, 2, dims, distribs, dargs, pdims,MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, my_darray, ierr) +call MPI_Type_commit(my_darray,ierr) + + +call MPI_FILE_OPEN(MPI_COMM_WORLD, 'testfile2.txt', & + MPI_MODE_WRONLY + MPI_MODE_CREATE, & + MPI_INFO_NULL, thefile, ier) +call MPI_BARRIER(MPI_COMM_WORLD, ier) -DO i_loc = 1,MP_A - IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW) - DO j_loc = 1,NQ_A - JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL) - offset=((IC-1)*n_matrix_col+JC-1)*8 - call MPI_File_seek (thefile, offset, MPI_SEEK_SET, ierr) - call MPI_File_write(thefile, arr_loc(i_loc,j_loc), 1, MPI_DOUBLE_PRECISION, & - status, ierr) - - END DO - END DO +disp=0 +call MPI_File_set_view(thefile, disp, MPI_DOUBLE_PRECISION,my_darray, "native",MPI_INFO_NULL, ierr) +call MPI_File_write_all(thefile, arr_loc, MP_A*NQ_A, MPI_DOUBLE_PRECISION, status, ierr) +call MPI_BARRIER(MPI_COMM_WORLD, ier) call MPI_FILE_CLOSE(thefile, ier) time2=MPI_WTIME() + + +if(rank==0) write(*,*) 'Matrix with size =', ((8.0*dble(size)**2)/1024.0/1024.0/1024.0) , ' GB was prinetd' +if(rank==0) write(*,*) 'Total wall clock time =',(time2-time1) , 'seconds' if(rank==0) write(*,*) 'Total wall clock time =',(time2-time1)/3600.0, ' hours' if(rank==0) write(*,*) '=======================================================================' diff --git a/parallel_IO.out b/parallel_IO.out index 95a3d14876f3a20e02fa1c64612054927bb8cdb9..8ad20060518a0734603e39c8a883ee30a36d1f39 100755 Binary files a/parallel_IO.out and b/parallel_IO.out differ diff --git a/parallel_IO_2.f90 b/parallel_IO_2.f90 new file mode 100644 index 0000000000000000000000000000000000000000..a5820e4ddf12c861316b0b5255eb13d0d2abc5db --- /dev/null +++ b/parallel_IO_2.f90 @@ -0,0 +1,337 @@ +module sca + + + implicit none + + integer :: MYPNUM,NPROCS,CONTEXT,NPROW,NPCOL,MYCOL,MYROW,NB,MP_A,NQ_A,LDA_A + integer :: ipc,ipr + integer :: LDA_wp,LDA_pwe, LDA_pp,LDA_ep,LDA_ew, LDA_pwe_s,& + LDA_we,LDA_ee,LDA_ww,LDA_rw,LDA_sww, LDA_s_ww_inv + integer :: LDA_ey,LDA_ye,LDA_dee + integer :: DESCA(9),DESCB(9),DESCZ(9),DESCC(9) + integer :: DESC_ye(9) + integer :: INFO,INFO_A,INFO_B,INFO_Z,INFO_C + real :: ORFAC + integer :: lwork_cooficient + real, dimension(:),allocatable :: WORK + + +end module sca + + +program parallel_IO + +use sca +use mpi +implicit none + + + + integer :: rank,numtasks,ERRORCODE,ier + integer :: sqrtnp,step + integer :: ntri_p_loc_b,ntri_p_loc_e, ntri_w_loc_b,ntri_w_loc_e + real :: time1,time2,time3 + + integer :: n_matrix_row,n_matrix_col + real*8,dimension(:,:),allocatable :: arr, arr_loc, arr_glo_print + integer :: i,j,ierr + logical inside_i,inside_j + integer i_loc,j_loc + integer ::IC,JC + + integer, dimension(mpi_status_size) :: wstatus + integer, dimension (MPI_STATUS_SIZE) :: status + integer (kind = MPI_OFFSET_KIND) :: offset, empty + integer size + + integer NUMROC + EXTERNAL NUMROC + integer INDXL2G + EXTERNAL INDXL2G + + + integer thefile + +!===================================== + + + + + call MPI_INIT(ier) + if (ier .ne. MPI_SUCCESS) then + print *,'Error starting MPI program. Terminating!!' + call MPI_ABORT(MPI_COMM_WORLD, ERRORCODE, ier) + endif + + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ier) + call MPI_COMM_SIZE(MPI_COMM_WORLD, numtasks, ier) + + !In order to measure total wallclock time + call MPI_BARRIER(MPI_COMM_WORLD,ier) + time1=MPI_WTIME() + + sqrtnp=int(sqrt(real(numtasks))+1) + do i=1,sqrtnp + if(mod(numtasks,i).eq.0) NPCOL=i + enddo + + NPROW = numtasks / NPCOL + +!Check that the number of process passed as the argument matches the +!number of +!processes in the processor grid + + CALL BLACS_PINFO(MYPNUM, NPROCS) + CALL BLACS_GET(-1, 0, CONTEXT) + + IF (NPROCS /= NPROW * NPCOL) THEN + WRITE(*,*) 'Error! Number of processors passed does not match with processors in the grid. & + NPROW=',NPROW,"NPCOL=",NPCOL,"NPROCS=",NPROCS + STOP + END IF + + CALL BLACS_GRIDINIT (CONTEXT, 'R', NPROW, NPCOL ) + CALL BLACS_GRIDINFO (CONTEXT, NPROW, NPCOL, MYROW, MYCOL) + + + size=9 + n_matrix_row=size + n_matrix_col=size + + allocate(Arr(n_matrix_row,n_matrix_col),arr_glo_print(n_matrix_row,n_matrix_col),stat=ier) + IF (IER /= 0) THEN + WRITE (*,*) "global matrix can not allocate" + STOP + END IF + + do i=1,n_matrix_row + do j=1, n_matrix_col + Arr(i,j)=i*10.0+j + enddo + enddo + + + ! if(rank==0) then + + ! do i=1,n_matrix_row + ! do j=1, n_matrix_col + ! write(*,*) Arr(i,j) + ! enddo + ! enddo + + + ! endif + + + + ! write(*,*) Arr(:,:) + + + NB=2 + MP_A=NUMROC(n_matrix_row, NB, MYROW, 0, NPROW) + NQ_A=NUMROC(n_matrix_col, NB, MYCOL, 0, NPCOL) + + LDA_A= MAX(1, MP_A); + + + + allocate(Arr_loc(MP_A,NQ_A), stat=ier) + IF (IER /= 0) THEN + WRITE (*,*) "matrix_pp Can not allocate local matrix a_pp: MY_PROC_NUM=",MYPNUM + STOP + END IF + + ! write(*,*) rank, " ",MP_A, NQ_A + + + do i=1,size + do j=1, size + call ScaLAPACK_mapping_i(i,i_loc,inside_i) + if (inside_i) then + call ScaLAPACK_mapping_j(j,j_loc,inside_j) + if (inside_j) then + arr_loc(i_loc,j_loc)=Arr(i,j) + endif + endif + enddo + enddo + + +! do i=1,MP_A +! do j=1, NQ_A + ! write(100+rank,*) i,j,Arr_loc(i,j) +! enddo +! enddo + + + +!==========================================Print + + + ! DO i_loc = 1,MP_A + ! IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW) + ! DO j_loc = 1,NQ_A + ! JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL) + ! arr_glo_print(IC,JC) = arr_loc(i_loc,j_loc) + ! END DO + ! END DO + +! if(rank==0) then +! call MPI_REDUCE(MPI_IN_PLACE, arr_glo_print, n_matrix_row*n_matrix_col, & +! MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IER) +! else +! call MPI_REDUCE(arr_glo_print, arr_glo_print, n_matrix_row*n_matrix_col, & +! MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IER) +! +! endif + +! if(rank==0) then +! write(200,*) arr_glo_print(:,:) +! endif + + + + +!======================================================= + + +!1- global size + + allocate(WORK(LDA_A*10),stat=ier) + + CALL DESCINIT( DESCB, size, size, NB, NB, 0, 0, CONTEXT, LDA_A, INFO_A ) + + CALL PDLAPRNT( size, size, arr_loc, 1, 1, DESCB, 0, 0, '', 0, WORK ) + + + + + +call MPI_FILE_OPEN(MPI_COMM_WORLD, 'testfile1.txt', & + MPI_MODE_WRONLY + MPI_MODE_CREATE, & + MPI_INFO_NULL, thefile, ier) + +call MPI_BARRIER(MPI_COMM_WORLD, ier) + + +offset=0 + +time1=MPI_WTIME() + + + +DO i_loc = 1,MP_A + IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW) + DO j_loc = 1,NQ_A + JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL) + offset=((IC-1)*n_matrix_col+JC-1)*8 + + call MPI_File_seek (thefile, offset, MPI_SEEK_SET, ierr) + call MPI_File_write(thefile, arr_loc(i_loc,j_loc), 1, MPI_DOUBLE_PRECISION, & + status, ierr) + + END DO + END DO + + +call MPI_FILE_CLOSE(thefile, ier) +time2=MPI_WTIME() + +if(rank==0) write(*,*) 'Total wall clock time =',(time2-time1)/3600.0, ' hours' +if(rank==0) write(*,*) '=======================================================================' + + + + + + +!call MPI_File_seek (thefile, offset, MPI_SEEK_SET, ierr) +!call MPI_File_write(thefile, arr_loc(1,1), 1, MPI_DOUBLE_PRECISION, & +! status, ierr) + + + + + +!call MPI_FILE_SET_VIEW(thefile, empty, MPI_DOUBLE_PRECISION, MPI_DOUBLE_PRECISION, 'native', MPI_INFO_NULL, ier) + +!call MPI_File_write_at(thefile, offset, arr_loc(1,1), 1, MPI_DOUBLE_PRECISION, status, ier) + +!call MPI_FILE_WRITE_AT(fh, offset, bucket, region, MPI_REAL4,status, ier) + + + + + +!call MPI_FILE_CLOSE(thefile, ier) +!time2=MPI_WTIME() + +!if(rank==0) write(*,*) 'Total wall clock time including output=',(time2-time1)/3600.0, ' hours' +!if(rank==0) write(*,*) '=======================================================================' + + + + +call MPI_FINALIZE(ier) + + +end program parallel_IO + + + +subroutine ScaLAPACK_mapping_i(i,i_loc,inside_row) +! ---------------------------------------------------------------------- +! purpose: 20/08/2015 +! Check the mapping "i" index for ScaLAPCK local matrix +!! ---------------------------------------------------------------------- + + use sca ! new module for ScaLAPACK variables +! ---------------------------------------------------------------------- + implicit none + + integer INDXG2L,INDXG2P + EXTERNAL INDXG2L,INDXG2P + + integer i,i_loc + logical inside_row + + inside_row = .false. + ipr = INDXG2P(i,NB,0,0,NPROW) + if (ipr .eq. MYROW) then + inside_row = .true. + i_loc = INDXG2L(i,NB,0,0,NPROW) + endif + +end subroutine ScaLAPACK_mapping_i + + +!DIR$ ATTRIBUTES FORCEINLINE :: ScaLAPACK_mapping_j +subroutine ScaLAPACK_mapping_j(j,j_loc,inside_col) +! ---------------------------------------------------------------------- +! purpose: 20/08/2015 +! Check the mapping "j" index for ScaLAPCK local matrix +!! ---------------------------------------------------------------------- + + use sca ! new module for ScaLAPACK variables +! ---------------------------------------------------------------------- + implicit none + + + integer INDXG2L,INDXG2P + EXTERNAL INDXG2L,INDXG2P + + integer j,j_loc + logical inside_col + + inside_col = .false. + + ipc = INDXG2P(j,NB,0,0,NPCOL) + if (ipc.eq.MYCOL) then + + inside_col = .true. + j_loc = INDXG2L(j,NB,0,0,NPCOL) + + endif + +end subroutine ScaLAPACK_mapping_j + diff --git a/parallel_IO_3.f90 b/parallel_IO_3.f90 new file mode 100644 index 0000000000000000000000000000000000000000..a81e75294108f74cd4852005fbf05a647b39f65e --- /dev/null +++ b/parallel_IO_3.f90 @@ -0,0 +1,338 @@ +module sca + + + implicit none + + integer :: MYPNUM,NPROCS,CONTEXT,NPROW,NPCOL,MYCOL,MYROW,NB,MP_A,NQ_A,LDA_A + integer :: ipc,ipr + integer :: LDA_wp,LDA_pwe, LDA_pp,LDA_ep,LDA_ew, LDA_pwe_s,& + LDA_we,LDA_ee,LDA_ww,LDA_rw,LDA_sww, LDA_s_ww_inv + integer :: LDA_ey,LDA_ye,LDA_dee + integer :: DESCA(9),DESCB(9),DESCZ(9),DESCC(9) + integer :: DESC_ye(9) + integer :: INFO,INFO_A,INFO_B,INFO_Z,INFO_C + real :: ORFAC + integer :: lwork_cooficient + real, dimension(:),allocatable :: WORK + + +end module sca + + +program parallel_IO + +use sca +use mpi +implicit none + + + + integer :: rank,numtasks,ier,request,ERRORCODE + integer :: sqrtnp,step + integer :: ntri_p_loc_b,ntri_p_loc_e, ntri_w_loc_b,ntri_w_loc_e + real :: time1,time2,time3 + + integer :: n_matrix_row,n_matrix_col + real*8,dimension(:,:),allocatable :: arr, arr_loc, arr_glo_print + integer :: i,j,ierr + logical inside_i,inside_j + integer i_loc,j_loc + integer ::IC,JC + + integer, dimension(mpi_status_size) :: wstatus + integer, dimension (MPI_STATUS_SIZE) :: status + integer (kind = MPI_OFFSET_KIND) :: offset, empty + integer size + + integer NUMROC + EXTERNAL NUMROC + integer INDXL2G + EXTERNAL INDXL2G + + + integer thefile + +!===================================== + + + + + call MPI_INIT(ier) + if (ier .ne. MPI_SUCCESS) then + print *,'Error starting MPI program. Terminating!!' + call MPI_ABORT(MPI_COMM_WORLD, ERRORCODE, ier) + endif + + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ier) + call MPI_COMM_SIZE(MPI_COMM_WORLD, numtasks, ier) + + !In order to measure total wallclock time + call MPI_BARRIER(MPI_COMM_WORLD,ier) + time1=MPI_WTIME() + + sqrtnp=int(sqrt(real(numtasks))+1) + do i=1,sqrtnp + if(mod(numtasks,i).eq.0) NPCOL=i + enddo + + NPROW = numtasks / NPCOL + + + CALL BLACS_PINFO(MYPNUM, NPROCS) + CALL BLACS_GET(-1, 0, CONTEXT) + + IF (NPROCS /= NPROW * NPCOL) THEN + WRITE(*,*) 'Error! Number of processors passed does not match with processors in the grid. & + NPROW=',NPROW,"NPCOL=",NPCOL,"NPROCS=",NPROCS + STOP + END IF + + CALL BLACS_GRIDINIT (CONTEXT, 'R', NPROW, NPCOL ) + CALL BLACS_GRIDINFO (CONTEXT, NPROW, NPCOL, MYROW, MYCOL) + + + size=9 + n_matrix_row=size + n_matrix_col=size + + allocate(Arr(n_matrix_row,n_matrix_col),arr_glo_print(n_matrix_row,n_matrix_col),stat=ier) + IF (IER /= 0) THEN + WRITE (*,*) "global matrix can not allocate" + STOP + END IF + + do i=1,n_matrix_row + do j=1, n_matrix_col + Arr(i,j)=i*10.0+j + enddo + enddo + + + ! if(rank==0) then + + ! do i=1,n_matrix_row + ! do j=1, n_matrix_col + ! write(*,*) Arr(i,j) + ! enddo + ! enddo + + + ! endif + + + + ! write(*,*) Arr(:,:) + + + NB=2 + MP_A=NUMROC(n_matrix_row, NB, MYROW, 0, NPROW) + NQ_A=NUMROC(n_matrix_col, NB, MYCOL, 0, NPCOL) + + LDA_A= MAX(1, MP_A); + + + + allocate(Arr_loc(MP_A,NQ_A), stat=ier) + IF (IER /= 0) THEN + WRITE (*,*) "matrix_pp Can not allocate local matrix a_pp: MY_PROC_NUM=",MYPNUM + STOP + END IF + + ! write(*,*) rank, " ",MP_A, NQ_A + + + do i=1,size + do j=1, size + call ScaLAPACK_mapping_i(i,i_loc,inside_i) + if (inside_i) then + call ScaLAPACK_mapping_j(j,j_loc,inside_j) + if (inside_j) then + arr_loc(i_loc,j_loc)=Arr(i,j) + endif + endif + enddo + enddo + + + do i=1,MP_A + do j=1, NQ_A + ! Arr_loc(i,j)=rank*1000+i+j+0.1123*rank + enddo + enddo + + +! do i=1,MP_A +! do j=1, NQ_A + ! write(100+rank,*) i,j,Arr_loc(i,j) +! enddo +! enddo + + + +!==========================================Print + + + ! DO i_loc = 1,MP_A + ! IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW) + ! DO j_loc = 1,NQ_A + ! JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL) + ! arr_glo_print(IC,JC) = arr_loc(i_loc,j_loc) + ! END DO + ! END DO + +! if(rank==0) then +! call MPI_REDUCE(MPI_IN_PLACE, arr_glo_print, n_matrix_row*n_matrix_col, & +! MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IER) +! else +! call MPI_REDUCE(arr_glo_print, arr_glo_print, n_matrix_row*n_matrix_col, & +! MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IER) +! +! endif + +! if(rank==0) then +! write(200,*) arr_glo_print(:,:) +! endif + + + + +!========================================================================== + !allocate(WORK(LDA_A*10),stat=ier) + !CALL DESCINIT( DESCB, size, size, NB, NB, 0, 0, CONTEXT, LDA_A, INFO_A ) + !CALL PDLAPRNT( size, size, arr_loc, 1, 1, DESCB, 0, 0, '', 0, WORK ) +!=========================================================================== + + + +call MPI_FILE_OPEN(MPI_COMM_WORLD, 'testfile1.txt', & + MPI_MODE_RDWR + MPI_MODE_CREATE, & + MPI_INFO_NULL, thefile, ier) + +call MPI_BARRIER(MPI_COMM_WORLD, ier) + + +offset=0 + +time1=MPI_WTIME() + + + +DO i_loc = 1,MP_A + IC= INDXL2G( i_loc, NB, MYROW, 0, NPROW) + DO j_loc = 1,NQ_A + JC= INDXL2G( j_loc, NB, MYCOL, 0, NPCOL) + offset=((IC-1)*n_matrix_col+JC-1)*8 + + !call MPI_File_iwrite_at_all(thefile, offset, arr_loc(i_loc,j_loc), 1, & + !MPI_DOUBLE_PRECISION, request, ierr) + + call MPI_File_write_at_all(thefile, offset, arr_loc(i_loc,j_loc), 1, & + MPI_DOUBLE_PRECISION, status, ierr) + + END DO + END DO + + +call MPI_BARRIER(MPI_COMM_WORLD, ier) + +call MPI_FILE_CLOSE(thefile, ier) +time2=MPI_WTIME() + +if(rank==0) write(*,*) 'Total wall clock time =',(time2-time1)/3600.0, ' hours' +if(rank==0) write(*,*) '=======================================================================' + + + + + + +!call MPI_File_seek (thefile, offset, MPI_SEEK_SET, ierr) +!call MPI_File_write(thefile, arr_loc(1,1), 1, MPI_DOUBLE_PRECISION, & +! status, ierr) + + + + + +!call MPI_FILE_SET_VIEW(thefile, empty, MPI_DOUBLE_PRECISION, MPI_DOUBLE_PRECISION, 'native', MPI_INFO_NULL, ier) + +!call MPI_File_write_at(thefile, offset, arr_loc(1,1), 1, MPI_DOUBLE_PRECISION, status, ier) + +!call MPI_FILE_WRITE_AT(fh, offset, bucket, region, MPI_REAL4,status, ier) + + + + + +!call MPI_FILE_CLOSE(thefile, ier) +!time2=MPI_WTIME() + +!if(rank==0) write(*,*) 'Total wall clock time including output=',(time2-time1)/3600.0, ' hours' +!if(rank==0) write(*,*) '=======================================================================' + + + + +call MPI_FINALIZE(ier) + + +end program parallel_IO + + + +subroutine ScaLAPACK_mapping_i(i,i_loc,inside_row) +! ---------------------------------------------------------------------- +! purpose: 20/08/2015 +! Check the mapping "i" index for ScaLAPCK local matrix +!! ---------------------------------------------------------------------- + + use sca ! new module for ScaLAPACK variables +! ---------------------------------------------------------------------- + implicit none + + integer INDXG2L,INDXG2P + EXTERNAL INDXG2L,INDXG2P + + integer i,i_loc + logical inside_row + + inside_row = .false. + ipr = INDXG2P(i,NB,0,0,NPROW) + if (ipr .eq. MYROW) then + inside_row = .true. + i_loc = INDXG2L(i,NB,0,0,NPROW) + endif + +end subroutine ScaLAPACK_mapping_i + + +!DIR$ ATTRIBUTES FORCEINLINE :: ScaLAPACK_mapping_j +subroutine ScaLAPACK_mapping_j(j,j_loc,inside_col) +! ---------------------------------------------------------------------- +! purpose: 20/08/2015 +! Check the mapping "j" index for ScaLAPCK local matrix +!! ---------------------------------------------------------------------- + + use sca ! new module for ScaLAPACK variables +! ---------------------------------------------------------------------- + implicit none + + + integer INDXG2L,INDXG2P + EXTERNAL INDXG2L,INDXG2P + + integer j,j_loc + logical inside_col + + inside_col = .false. + + ipc = INDXG2P(j,NB,0,0,NPCOL) + if (ipc.eq.MYCOL) then + + inside_col = .true. + j_loc = INDXG2L(j,NB,0,0,NPCOL) + + endif + +end subroutine ScaLAPACK_mapping_j + diff --git a/sca.mod b/sca.mod index f19af21e6a3def5f4445e45f14c69b88e9942b5c..1becd8e0a8e09d91e62ab0b6651c7a3eac394623 100644 Binary files a/sca.mod and b/sca.mod differ diff --git a/submit_script_marconi_STARWALL b/submit_script_marconi_STARWALL index cea31151d234bb107c3b593bc5ec032dc60e393f..25ff8edb33bed7142f44b2ef642cb2701b2718fe 100644 --- a/submit_script_marconi_STARWALL +++ b/submit_script_marconi_STARWALL @@ -1,7 +1,7 @@ #!/bin/bash #PBS -q xfuaprod -#PBS -l walltime=01:55:00 -#PBS -l select=1:ncpus=36:mpiprocs=6:mem=100GB +#PBS -l walltime=00:55:00 +#PBS -l select=2:ncpus=36:mpiprocs=36:mem=100GB #PBS -j oe #PBS -A FUSIO_HLST #PBS -N MPI_IO @@ -15,7 +15,7 @@ FOLDER_NAME=${PBS_JOBNAME}_${PBS_JOBID} BINARY=parallel_IO.out mkdir $FOLDER_NAME -cp {boundary.txt,coil.txt,input,submit_script_marconi_STARWALL,$BINARY} $FOLDER_NAME +cp {submit_script_marconi_STARWALL,$BINARY} $FOLDER_NAME #cp {input,submit_script_marconi,$BINARY} $FOLDER_NAME cd $FOLDER_NAME