Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
db110dfc
Commit
db110dfc
authored
May 15, 2012
by
auckenth
Browse files
rank-2 QR-decomposition replaced by fully blocked QR-decomposition; makefile adapted accordingly
parent
009323b9
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
ELPA_development_version/src/elpa2.f90
View file @
db110dfc
...
...
@@ -10,7 +10,6 @@ module ELPA2
! Version 1.1.2, 2011-02-21
USE
ELPA1
USE
blockedQR
implicit
none
...
...
@@ -38,7 +37,7 @@ module ELPA2
integer
,
public
::
which_qr_decomposition
=
1
! defines, which QR-decomposition algorithm will be used
! 0 for unblocked
! 1 for
rank-2
! 1 for
blocked (maxrank: nblk)
!-------------------------------------------------------------------------------
...
...
@@ -364,14 +363,16 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, tma
integer
i
,
j
,
lcs
,
lce
,
lre
,
lc
,
lr
,
cur_pcol
,
n_cols
,
nrow
integer
istep
,
ncol
,
lch
,
lcx
,
nlc
integer
tile_size
,
l_rows_tile
,
l_cols_tile
integer
work_size
real
*
8
eps
real
*
8
vnorm2
,
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
real
*
8
,
allocatable
::
tmp
(:,:),
vr
(:),
vmr
(:,:),
umc
(:,:)
real
*
8
,
allocatable
::
work
(:)
! needed for blocked QR
! needed for blocked QR decomposition
integer
PQRPARAM
(
11
),
work_size
real
*
8
dwork_size
(
1
)
real
*
8
,
allocatable
::
work_blocked
(:),
tauvector
(:),
blockheuristic
(:)
integer
pcol
,
prow
...
...
@@ -403,15 +404,16 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, tma
l_cols_tile
=
tile_size
/
np_cols
! local cols of a tile
if
(
which_qr_decomposition
==
1
)
then
l_rows
=
local_index
(
na
,
my_prow
,
np_rows
,
nblk
,
-1
)
work_size
=
max
(
4
*
np_rows
,
2
*
nbw
)
work_size
=
max
(
l_rows
+1
,
work_size
)
work_size
=
max
(
16
,
work_size
)
work_size
=
max
(
2
*
(
l_rows
+1
),
work_size
)
work_size
=
max
(
2+4
*
(
nbw
+1
),
work_size
)
allocate
(
work
(
work_size
))
work
=
0
eps
=
1.0d0
call
tum_pqrparam_init
(
pqrparam
,
nblk
,
'M'
,
0
,
nblk
,
'M'
,
0
,
nblk
,
'M'
,
1
,
's'
)
allocate
(
tauvector
(
na
))
allocate
(
blockheuristic
(
nblk
))
l_rows
=
local_index
(
na
,
my_prow
,
np_rows
,
nblk
,
-1
)
call
tum_pdgeqrf_2dcomm
(
a
,
lda
,
vmr
,
max
(
l_rows
,
1
),
tauvector
(
1
),
tmat
(
1
,
1
,
1
),
nbw
,
dwork_size
(
1
),
-1
,
na
,
nbw
,
nblk
,
nblk
,
na
,
na
,
1
,
0
,
PQRPARAM
,
mpi_comm_rows
,
mpi_comm_cols
,
blockheuristic
)
work_size
=
dwork_size
(
1
)
allocate
(
work_blocked
(
work_size
))
work_blocked
=
0.0d0
endif
do
istep
=
(
na
-1
)/
nbw
,
1
,
-1
...
...
@@ -435,7 +437,7 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, tma
! Reduce current block to lower triangular form
if
(
which_qr_decomposition
==
1
)
then
call
qr_rank2_real
(
a
,
lda
,
vmr
,
max
(
l_rows
,
1
),
t
mat
,
nbw
,
istep
,
n_cols
,
nblk
,
mpi_comm_rows
,
mpi_comm_cols
,
work
,
eps
)
call
tum_pdgeqrf_2dcomm
(
a
,
lda
,
vmr
,
max
(
l_rows
,
1
),
t
auvector
(
1
),
tmat
(
1
,
1
,
istep
),
nbw
,
work_blocked
,
work_size
,
na
,
n_cols
,
nblk
,
nblk
,
istep
*
nbw
+
n_cols
-
nbw
,
istep
*
nbw
+
n_cols
,
1
,
0
,
PQRPARAM
,
mpi_comm_rows
,
mpi_comm_cols
,
blockheuristic
)
else
do
lc
=
n_cols
,
1
,
-1
...
...
@@ -528,8 +530,6 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, tma
enddo
endif
! Calculate scalar products of stored Householder vectors.
! This can be done in different ways, we use dsyrk
...
...
@@ -547,6 +547,8 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, tma
tmat
(
lc
,
lc
+1
:
n_cols
,
istep
)
=
-
tau
*
vav
(
lc
+1
:
n_cols
,
lc
)
endif
enddo
endif
! Transpose vmr -> vmc (stored in umc, second half)
...
...
@@ -634,7 +636,8 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, tma
enddo
if
(
which_qr_decomposition
==
1
)
then
deallocate
(
work
)
deallocate
(
work_blocked
)
deallocate
(
tauvector
)
endif
end
subroutine
bandred_real
...
...
ELPA_development_version/src/elpa_qr/elpa_pdgeqrf.f90
0 → 100644
View file @
db110dfc
This diff is collapsed.
Click to expand it.
ELPA_development_version/src/elpa_qr/elpa_pdlarfb.f90
0 → 100644
View file @
db110dfc
This diff is collapsed.
Click to expand it.
ELPA_development_version/src/elpa_qr/elpa_qrkernels.f90
0 → 100644
View file @
db110dfc
This diff is collapsed.
Click to expand it.
ELPA_development_version/src/elpa_qr/tum_utils.f90
0 → 100644
View file @
db110dfc
module
tum_utils
implicit
none
PRIVATE
public
::
local_size_offset_1d
public
::
reverse_vector_local
public
::
reverse_matrix_local
public
::
reverse_matrix_1dcomm
public
::
reverse_matrix_2dcomm_ref
public
::
tsqr_groups_size
public
::
tsqr_groups_initialize
public
::
tsqr_groups_finalize
contains
! rev parameter is critical, even in rev only mode!
! pdgeqrf_2dcomm uses rev=0 version to determine the process columns
! involved in the qr decomposition
subroutine
local_size_offset_1d
(
n
,
nb
,
baseidx
,
idx
,
rev
,
rank
,
nprocs
,
&
lsize
,
baseoffset
,
offset
)
use
ELPA1
implicit
none
! input
integer
n
,
nb
,
baseidx
,
idx
,
rev
,
rank
,
nprocs
! output
integer
lsize
,
baseoffset
,
offset
! local scalars
integer
rank_idx
rank_idx
=
MOD
((
idx
-1
)/
nb
,
nprocs
)
! calculate local size and offsets
if
(
rev
.eq.
1
)
then
if
(
idx
>
0
)
then
lsize
=
local_index
(
idx
,
rank
,
nprocs
,
nb
,
-1
)
else
lsize
=
0
end
if
baseoffset
=
1
offset
=
1
else
offset
=
local_index
(
idx
,
rank
,
nprocs
,
nb
,
1
)
baseoffset
=
local_index
(
baseidx
,
rank
,
nprocs
,
nb
,
1
)
lsize
=
local_index
(
n
,
rank
,
nprocs
,
nb
,
-1
)
!print *,'baseidx,idx',baseidx,idx,lsize,n
lsize
=
lsize
-
offset
+
1
baseoffset
=
offset
-
baseoffset
+
1
end
if
end
subroutine
subroutine
reverse_vector_local
(
n
,
x
,
incx
,
work
,
lwork
)
implicit
none
! input
integer
incx
,
n
,
lwork
double precision
x
(
*
),
work
(
*
)
! local scalars
double precision
temp
integer
srcoffset
,
destoffset
,
ientry
if
(
lwork
.eq.
-1
)
then
work
(
1
)
=
0.0d0
return
end
if
do
ientry
=
1
,
n
/
2
srcoffset
=
1
+
(
ientry
-1
)
*
incx
destoffset
=
1
+
(
n
-
ientry
)
*
incx
temp
=
x
(
srcoffset
)
x
(
srcoffset
)
=
x
(
destoffset
)
x
(
destoffset
)
=
temp
end
do
end
subroutine
subroutine
reverse_matrix_local
(
trans
,
m
,
n
,
a
,
lda
,
work
,
lwork
)
implicit
none
! input
integer
lda
,
m
,
n
,
lwork
,
trans
double precision
a
(
lda
,
*
),
work
(
*
)
! local scalars
double precision
temp
,
dworksize
(
1
)
integer
srcoffset
,
destoffset
,
ientry
integer
incx
integer
dimsize
integer
i
if
(
trans
.eq.
1
)
then
incx
=
lda
dimsize
=
n
else
incx
=
1
dimsize
=
m
end
if
if
(
lwork
.eq.
-1
)
then
call
reverse_vector_local
(
dimsize
,
a
,
incx
,
dworksize
,
-1
)
work
(
1
)
=
dworksize
(
1
)
return
end
if
if
(
trans
.eq.
1
)
then
do
i
=
1
,
m
call
reverse_vector_local
(
dimsize
,
a
(
i
,
1
),
incx
,
work
,
lwork
)
end
do
else
do
i
=
1
,
n
call
reverse_vector_local
(
dimsize
,
a
(
1
,
i
),
incx
,
work
,
lwork
)
end
do
end
if
end
subroutine
subroutine
reverse_matrix_2dcomm_ref
(
m
,
n
,
mb
,
nb
,
a
,
lda
,
work
,
lwork
,
mpicomm_cols
,
mpicomm_rows
)
implicit
none
! input
integer
m
,
n
,
lda
,
lwork
,
mpicomm_cols
,
mpicomm_rows
,
mb
,
nb
double precision
a
(
lda
,
*
),
work
(
*
)
! local scalars
double precision
reverse_column_size
(
1
)
double precision
reverse_row_size
(
1
)
integer
mpirank_cols
,
mpirank_rows
integer
mpiprocs_cols
,
mpiprocs_rows
integer
mpierr
integer
lrows
,
lcols
,
offset
,
baseoffset
call
MPI_Comm_rank
(
mpicomm_cols
,
mpirank_cols
,
mpierr
)
call
MPI_Comm_rank
(
mpicomm_rows
,
mpirank_rows
,
mpierr
)
call
MPI_Comm_size
(
mpicomm_cols
,
mpiprocs_cols
,
mpierr
)
call
MPI_Comm_size
(
mpicomm_rows
,
mpiprocs_rows
,
mpierr
)
call
local_size_offset_1d
(
m
,
mb
,
1
,
1
,
0
,
mpirank_cols
,
mpiprocs_cols
,
&
lrows
,
baseoffset
,
offset
)
call
local_size_offset_1d
(
n
,
nb
,
1
,
1
,
0
,
mpirank_rows
,
mpiprocs_rows
,
&
lcols
,
baseoffset
,
offset
)
if
(
lwork
.eq.
-1
)
then
call
reverse_matrix_1dcomm
(
0
,
m
,
lcols
,
mb
,
a
,
lda
,
reverse_column_size
,
-1
,
mpicomm_cols
)
call
reverse_matrix_1dcomm
(
1
,
lrows
,
n
,
nb
,
a
,
lda
,
reverse_row_size
,
-1
,
mpicomm_rows
)
work
(
1
)
=
max
(
reverse_column_size
(
1
),
reverse_row_size
(
1
))
return
end
if
call
reverse_matrix_1dcomm
(
0
,
m
,
lcols
,
mb
,
a
,
lda
,
work
,
lwork
,
mpicomm_cols
)
call
reverse_matrix_1dcomm
(
1
,
lrows
,
n
,
nb
,
a
,
lda
,
work
,
lwork
,
mpicomm_rows
)
end
subroutine
! b: if trans = 'N': b is size of block distribution between rows
! b: if trans = 'T': b is size of block distribution between columns
subroutine
reverse_matrix_1dcomm
(
trans
,
m
,
n
,
b
,
a
,
lda
,
work
,
lwork
,
mpicomm
)
use
mpi
implicit
none
! input
integer
trans
integer
m
,
n
,
b
,
lda
,
lwork
,
mpicomm
double precision
a
(
lda
,
*
),
work
(
*
)
! local scalars
integer
mpirank
,
mpiprocs
,
mpierr
,
mpistatus
(
MPI_STATUS_SIZE
)
integer
nr_blocks
,
dest_process
,
src_process
,
step
integer
src_block
,
dest_block
integer
lsize
,
baseoffset
,
offset
integer
current_index
,
destblk
,
srcblk
,
icol
,
next_index
integer
sendcount
,
recvcount
integer
sendoffset
,
recvoffset
integer
newmatrix_offset
,
work_offset
integer
lcols
,
lrows
,
lroffset
,
lcoffset
,
dimsize
,
fixedsize
double precision
dworksize
(
1
)
call
MPI_Comm_rank
(
mpicomm
,
mpirank
,
mpierr
)
call
MPI_Comm_size
(
mpicomm
,
mpiprocs
,
mpierr
)
if
(
trans
.eq.
1
)
then
call
local_size_offset_1d
(
n
,
b
,
1
,
1
,
0
,
mpirank
,
mpiprocs
,
&
lcols
,
baseoffset
,
lcoffset
)
lrows
=
m
else
call
local_size_offset_1d
(
m
,
b
,
1
,
1
,
0
,
mpirank
,
mpiprocs
,
&
lrows
,
baseoffset
,
lroffset
)
lcols
=
n
end
if
if
(
lwork
.eq.
-1
)
then
call
reverse_matrix_local
(
trans
,
lrows
,
lcols
,
a
,
max
(
lrows
,
lcols
),
dworksize
,
-1
)
work
(
1
)
=
DBLE
(
3
*
lrows
*
lcols
)
+
dworksize
(
1
)
return
end
if
sendoffset
=
1
recvoffset
=
sendoffset
+
lrows
*
lcols
newmatrix_offset
=
recvoffset
+
lrows
*
lcols
work_offset
=
newmatrix_offset
+
lrows
*
lcols
if
(
trans
.eq.
1
)
then
dimsize
=
n
fixedsize
=
m
else
dimsize
=
m
fixedsize
=
n
end
if
if
(
dimsize
.le.
1
)
then
return
! nothing to do
end
if
! 1. adjust step size to remainder size
nr_blocks
=
dimsize
/
b
nr_blocks
=
nr_blocks
*
b
step
=
dimsize
-
nr_blocks
if
(
step
.eq.
0
)
step
=
b
! 2. iterate over destination blocks starting with process 0
current_index
=
1
do
while
(
current_index
.le.
dimsize
)
destblk
=
(
current_index
-1
)
/
b
dest_process
=
mod
(
destblk
,
mpiprocs
)
srcblk
=
(
dimsize
-
current_index
)
/
b
src_process
=
mod
(
srcblk
,
mpiprocs
)
next_index
=
current_index
+
step
! block for dest_process is located on mpirank if lsize > 0
call
local_size_offset_1d
(
dimsize
-
current_index
+1
,
b
,
dimsize
-
next_index
+2
,
dimsize
-
next_index
+2
,
0
,
&
src_process
,
mpiprocs
,
lsize
,
baseoffset
,
offset
)
sendcount
=
lsize
*
fixedsize
recvcount
=
sendcount
! TODO: this send/recv stuff seems to blow up on BlueGene/P
! TODO: is there actually room for the requested matrix part? the target
! process might not have any parts at all (thus no room)
if
((
src_process
.eq.
mpirank
)
.and.
(
dest_process
.eq.
src_process
))
then
! 5. pack data
if
(
trans
.eq.
1
)
then
do
icol
=
offset
,
offset
+
lsize
-1
work
(
sendoffset
+
(
icol
-
offset
)
*
lrows
:
sendoffset
+
(
icol
-
offset
+1
)
*
lrows
-1
)
=
&
a
(
1
:
lrows
,
icol
)
end
do
else
do
icol
=
1
,
lcols
work
(
sendoffset
+
(
icol
-1
)
*
lsize
:
sendoffset
+
icol
*
lsize
-1
)
=
&
a
(
offset
:
offset
+
lsize
-1
,
icol
)
end
do
end
if
! 7. reverse data
if
(
trans
.eq.
1
)
then
call
reverse_matrix_local
(
1
,
lrows
,
lsize
,
work
(
sendoffset
),
lrows
,
work
(
work_offset
),
lwork
)
else
call
reverse_matrix_local
(
0
,
lsize
,
lcols
,
work
(
sendoffset
),
lsize
,
work
(
work_offset
),
lwork
)
end
if
! 8. store in temp matrix
if
(
trans
.eq.
1
)
then
do
icol
=
1
,
lsize
work
(
newmatrix_offset
+
(
icol
-1
)
*
lrows
:
newmatrix_offset
+
icol
*
lrows
-1
)
=
&
work
(
sendoffset
+
(
icol
-1
)
*
lrows
:
sendoffset
+
icol
*
lrows
-1
)
end
do
newmatrix_offset
=
newmatrix_offset
+
lsize
*
lrows
else
do
icol
=
1
,
lcols
work
(
newmatrix_offset
+
(
icol
-1
)
*
lrows
:
newmatrix_offset
+
(
icol
-1
)
*
lrows
+
lsize
-1
)
=
&
work
(
sendoffset
+
(
icol
-1
)
*
lsize
:
sendoffset
+
icol
*
lsize
-1
)
end
do
newmatrix_offset
=
newmatrix_offset
+
lsize
end
if
else
if
(
dest_process
.eq.
mpirank
)
then
! 6b. call MPI_Recv
call
MPI_Recv
(
work
(
recvoffset
),
recvcount
,
mpi_real8
,
&
src_process
,
current_index
,
mpicomm
,
mpistatus
,
mpierr
)
! 7. reverse data
if
(
trans
.eq.
1
)
then
call
reverse_matrix_local
(
1
,
lrows
,
lsize
,
work
(
recvoffset
),
lrows
,
work
(
work_offset
),
lwork
)
else
call
reverse_matrix_local
(
0
,
lsize
,
lcols
,
work
(
recvoffset
),
lsize
,
work
(
work_offset
),
lwork
)
end
if
! 8. store in temp matrix
if
(
trans
.eq.
1
)
then
do
icol
=
1
,
lsize
work
(
newmatrix_offset
+
(
icol
-1
)
*
lrows
:
newmatrix_offset
+
icol
*
lrows
-1
)
=
&
work
(
recvoffset
+
(
icol
-1
)
*
lrows
:
recvoffset
+
icol
*
lrows
-1
)
end
do
newmatrix_offset
=
newmatrix_offset
+
lsize
*
lrows
else
do
icol
=
1
,
lcols
work
(
newmatrix_offset
+
(
icol
-1
)
*
lrows
:
newmatrix_offset
+
(
icol
-1
)
*
lrows
+
lsize
-1
)
=
&
work
(
recvoffset
+
(
icol
-1
)
*
lsize
:
recvoffset
+
icol
*
lsize
-1
)
end
do
newmatrix_offset
=
newmatrix_offset
+
lsize
end
if
end
if
if
(
src_process
.eq.
mpirank
)
then
! 5. pack data
if
(
trans
.eq.
1
)
then
do
icol
=
offset
,
offset
+
lsize
-1
work
(
sendoffset
+
(
icol
-
offset
)
*
lrows
:
sendoffset
+
(
icol
-
offset
+1
)
*
lrows
-1
)
=
&
a
(
1
:
lrows
,
icol
)
end
do
else
do
icol
=
1
,
lcols
work
(
sendoffset
+
(
icol
-1
)
*
lsize
:
sendoffset
+
icol
*
lsize
-1
)
=
&
a
(
offset
:
offset
+
lsize
-1
,
icol
)
end
do
end
if
! 6a. call MPI_Send
call
MPI_Send
(
work
(
sendoffset
),
sendcount
,
mpi_real8
,
&
dest_process
,
current_index
,
mpicomm
,
mpierr
)
end
if
end
if
current_index
=
next_index
end
do
! 9. copy temp matrix to real matrix
newmatrix_offset
=
recvoffset
+
lrows
*
lcols
do
icol
=
1
,
lcols
a
(
1
:
lrows
,
icol
)
=
&
work
(
newmatrix_offset
+
(
icol
-1
)
*
lrows
:
newmatrix_offset
+
icol
*
lrows
-1
)
end
do
end
subroutine
integer
function
tsqr_groups_size
(
comm
,
treeorder
)
use
mpi
implicit
none
! input
integer
comm
,
treeorder
! local scalars
integer
mpiprocs
,
mpierr
integer
nr_groups
,
depth
,
treeprocs
call
MPI_Comm_size
(
comm
,
mpiprocs
,
mpierr
)
! integer logarithm with base treeorder
depth
=
1
treeprocs
=
treeorder
do
while
(
treeprocs
.lt.
mpiprocs
)
treeprocs
=
treeprocs
*
treeorder
depth
=
depth
+
1
end
do
tsqr_groups_size
=
nr_groups
end
function
subroutine
tsqr_groups_initialize
(
comm
,
treeorder
,
groups
)
use
mpi
implicit
none
! input
integer
comm
,
treeorder
! output
integer
,
allocatable
::
groups
(:)
! local scalars
integer
nr_groups
,
igroup
,
mpierr
,
mpirank
integer
split_color
,
split_key
integer
prev_treeorder
,
temp_treeorder
nr_groups
=
tsqr_groups_size
(
comm
,
treeorder
)
allocate
(
groups
(
nr_groups
))
groups
(
1
)
=
comm
call
MPI_Comm_rank
(
comm
,
mpirank
,
mpierr
)
prev_treeorder
=
1
temp_treeorder
=
treeorder
do
igroup
=
2
,
nr_groups
if
(
mod
(
mpirank
,
prev_treeorder
)
.eq.
0
)
then
split_color
=
mpirank
/
temp_treeorder
split_key
=
mod
(
mpirank
/
prev_treeorder
,
treeorder
)
else
split_color
=
MPI_UNDEFINED
split_key
=
0
! ignored due to MPI_UNDEFINED color
end
if
call
MPI_Comm_split
(
comm
,
split_color
,
split_key
,
groups
(
igroup
),
mpierr
)
prev_treeorder
=
temp_treeorder
temp_treeorder
=
temp_treeorder
*
treeorder
end
do
end
subroutine
subroutine
tsqr_groups_finalize
(
groups
,
treeorder
)
use
mpi
implicit
none
! input
integer
,
allocatable
::
groups
(:)
integer
treeorder
! local scalars
integer
nr_groups
,
igroup
,
mpierr
nr_groups
=
tsqr_groups_size
(
groups
(
1
),
treeorder
)
do
igroup
=
2
,
nr_groups
call
MPI_Comm_free
(
groups
(
igroup
),
mpierr
)
end
do
deallocate
(
groups
)
end
subroutine
end
module
ELPA_development_version/test/Makefile
View file @
db110dfc
...
...
@@ -3,10 +3,10 @@
# ------------------------------------------------------------------------------
# Settings for Intel Fortran (Linux):
#
F90
=
mpif90
-O3
-traceback
-g
-fpe0
F90OPT
=
$(F90)
-xSSE4
.2
LIBS
=
-L
/opt/intel/Compiler/11.0/069/mkl/lib/em64t
-lmkl_lapack
-lmkl
-lguide
-lpthread
\
-lmkl_scalapack_lp64
-lmkl_blacs_intelmpi_lp64
#
F90=mpif90 -O3 -traceback -g -fpe0
#
F90OPT=$(F90) -xSSE4.2
#
LIBS = -L/opt/intel/Compiler/11.0/069/mkl/lib/em64t -lmkl_lapack -lmkl -lguide -lpthread \
#
-lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64
#
# ------------------------------------------------------------------------------
# Settings for Intel Fortran on MacOSX (home-built BLACS and scalapack):
...
...
@@ -27,10 +27,10 @@ LIBS = -L/opt/intel/Compiler/11.0/069/mkl/lib/em64t -lmkl_lapack -lmkl -lguide -
# ------------------------------------------------------------------------------
# Settings for IBM BlueGene/P
#
#
F90 = mpixlf95_r -O3 -g -qarch=auto -qtune=auto
#
F90OPT = mpixlf95_r -O4 -g -qarch=auto -qtune=auto
#
LIBS = -L/usr/local/lib -lscalapack -llapack -lblacsF77init -lblacs -lblacsF77init -lblacs \
#
-L/opt/ibmmath/essl/4.4/lib -lesslbg -lc
F90
=
mpixlf95_r
-O3
-g
-qarch
=
auto
-qtune
=
auto
F90OPT
=
mpixlf95_r
-O4
-g
-qarch
=
auto
-qtune
=
auto
LIBS
=
-L
/usr/local/lib
-lscalapack
-llapack
-lblacsF77init
-lblacs
-lblacsF77init
-lblacs
\
-L
/opt/ibmmath/essl/4.4/lib
-lesslbg
-lc
#
# ------------------------------------------------------------------------------
...
...
@@ -54,11 +54,11 @@ read_real_gen: read_real_gen.o elpa1.o
test_complex_gen
:
test_complex_gen.o read_test_parameters.o elpa1.o
$(F90)
-o
$@
test_complex_gen.o read_test_parameters.o elpa1.o
$(LIBS)