Makefile 7.9 KB
Newer Older
1
2
3
# ------------------------------------------------------------------------------
# Please set the variables below according to your system!
# ------------------------------------------------------------------------------
4
# Settings for Intel Fortran (Linux), Intel Composer XE 2011 (ifort 12.1) with AVX for Sandy Bridge:
5
#
6
X86=1
7
F90=mpiifort -O3 -traceback -fpe0
8
9
10
11
12
13
F90OPT=$(F90) -mavx
#CC=mpiicc -O3
#CCOPT=$(CC) -mavx
CC=gcc -O3
CCOPT=$(CC) -mavx -funsafe-loop-optimizations -funsafe-math-optimizations -ftree-vect-loop-version -ftree-vectorize
MKL_HOME=/opt/intel/mkl/lib/intel64
14
LIBS = -mkl=sequential -L$(MKL_HOME) -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lstdc++
15
16
17
18
#
# ------------------------------------------------------------------------------
# Settings for Intel Fortran (Linux), Intel Composer XE 2011 (ifort 12.1) with SSE3:
#
19
20
21
22
23
#X86=1
#F90=mpiifort -O3 -traceback -fpe0 -g
#F90OPT=$(F90) -msse3
#CC=gcc -O3
#CCOPT=$(CC) -msse3 -funsafe-loop-optimizations -funsafe-math-optimizations -ftree-vect-loop-version -ftree-vectorize
24
25
#CC=icc -O3
#CCOPT=$(CC) -msse3
26
27
#MKL_HOME=/opt/intel/mkl/lib/intel64
#LIBS = -mkl=sequential -L$(MKL_HOME) -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#
# ------------------------------------------------------------------------------
# Settings for Intel Fortran (Linux), Intel Composer XE 2011 (ifort 12.1) and GCC 4.6 with FMA4 for AMD Bulldozer:
#
#X86=1
#F90=mpiifort -O3 -traceback -fpe0 -g
#CC=gcc -O3
#F90OPT=$(F90) -msse3
#CCOPT=$(CC) -funsafe-loop-optimizations -funsafe-math-optimizations -ftree-vect-loop-version -ftree-vectorize -mfma4 -mxop -march=bdver1 -D__USE_AVX128__
#LIBS = -L/opt/acml5.0.0/gfortran64_fma4/lib/ -lacml -lgfortran libscalapack.a
#
# ------------------------------------------------------------------------------
# Settings for Intel Fortran (Linux) old 11.x Toolchain, !!!!!!! do not use !!!!!!:
#
#X86=0
#F90=mpif90 -O3 -traceback -g -fpe0
#F90OPT=$(F90) -xSSE4.2
#LIBS = -L/opt/intel/Compiler/11.0/069/mkl/lib/em64t -lmkl_lapack -lmkl -lguide -lpthread \
#-lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64
47
48
49
50
#
# ------------------------------------------------------------------------------
# Settings for Intel Fortran on MacOSX (home-built BLACS and scalapack):
#
51
#X86=0
52
53
54
55
56
57
58
59
60
#F90=mpif90 -O3 -traceback -g -fpe0
#F90OPT=$(F90) # -xSSE4.2 ### on Mac OSX, the -xSSE4.2 option is possibly buggy in ifort!
#LIBS = -L/opt/intel/mkl/lib -I/opt/intel/mkl/include -lmkl_intel_lp64 -lmkl_sequential -lmkl_core \
#   /usr/local/BLACS/LIB/blacs_MPI-OSX-0.a /usr/local/BLACS/LIB/blacsF77init_MPI-OSX-0.a \
#   /usr/local/SCALAPACK-1.8.0/libscalapack.a 
#
# ------------------------------------------------------------------------------
# Settings for IBM AIX Power6
#
61
#X86=0
62
63
64
65
66
#F90 = mpxlf95_r -q64 -O2 -g -qarch=auto -qtune=auto
#F90OPT = mpxlf95_r -q64 -O4 -g -qarch=auto -qtune=auto
#LIBS = -L/usr/local/lib -lscalapack -llapack-essl -lessl -lblacsF77init -lblacs -lblacsF77init -lblacs -lc
#
# ------------------------------------------------------------------------------
67
# Settings for IBM AIX BlueGene
68
#
69
#X86=0
70
71
72
#F90 = mpixlf95_r -O3 -g -qarch=auto -qtune=auto
#F90OPT = mpixlf95_r -O4 -g -qarch=auto -qtune=auto
#LIBS = -L/usr/local/lib -lscalapack -llapack -lblacsF77init -lblacs -lblacsF77init -lblacs \
73
-L/opt/ibmmath/essl/4.4/lib -lesslbg -lc
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#
# ------------------------------------------------------------------------------

all: test_real read_real test_complex test_real_gen read_real_gen test_complex_gen test_real2 test_complex2

test_real: test_real.o elpa1.o
	$(F90) -o $@ test_real.o elpa1.o $(LIBS)

read_real: read_real.o elpa1.o
	$(F90) -o $@ read_real.o elpa1.o $(LIBS)

test_complex: test_complex.o elpa1.o
	$(F90) -o $@ test_complex.o elpa1.o $(LIBS)

test_real_gen: test_real_gen.o elpa1.o
	$(F90) -o $@ test_real_gen.o elpa1.o $(LIBS)

read_real_gen: read_real_gen.o elpa1.o
	$(F90) -o $@ read_real_gen.o elpa1.o $(LIBS)

test_complex_gen: test_complex_gen.o elpa1.o
	$(F90) -o $@ test_complex_gen.o elpa1.o $(LIBS)

97
ifeq ($(X86),1)
98
99
test_real2: test_real2.o elpa1.o elpa2.o elpa2_tum_kernels_complex_sse-avx_1hv.o elpa2_tum_kernels_complex_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o
	$(F90) -o $@ test_real2.o elpa1.o elpa2.o elpa2_tum_kernels_complex_sse-avx_1hv.o elpa2_tum_kernels_complex_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o $(LIBS)
100

101
102
test_complex2: test_complex2.o elpa1.o elpa2.o elpa2_tum_kernels_complex_sse-avx_1hv.o elpa2_tum_kernels_complex_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o
	$(F90) -o $@ test_complex2.o elpa1.o elpa2.o elpa2_tum_kernels_complex_sse-avx_1hv.o elpa2_tum_kernels_complex_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o $(LIBS)
103

104
105
106
107
108
#test_real2: test_real2.o elpa1.o elpa2.o elpa2_kernels_complex.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o
#	$(F90) -o $@ test_real2.o elpa1.o elpa2.o elpa2_kernels_complex.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o $(LIBS)
#
#test_complex2: test_complex2.o elpa1.o elpa2.o elpa2_kernels_complex.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o
#	$(F90) -o $@ test_complex2.o elpa1.o elpa2.o elpa2_kernels_complex.o elpa2_tum_kernels_real_sse-avx_2hv.o elpa2_tum_kernels_real_sse-avx_4hv.o elpa2_tum_kernels_real_sse-avx_6hv.o $(LIBS)
109
110
111
else
test_real2: test_real2.o elpa1.o elpa2.o elpa2_kernels_real.o elpa2_kernels_complex.o
	$(F90) -o $@ test_real2.o elpa1.o elpa2.o elpa2_kernels_real.o elpa2_kernels_complex.o $(LIBS)
112

113
114
115
test_complex2: test_complex2.o elpa1.o elpa2.o elpa2_kernels_real.o elpa2_kernels_complex.o
	$(F90) -o $@ test_complex2.o elpa1.o elpa2.o elpa2_kernels_real.o elpa2_kernels_complex.o $(LIBS)
endif
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

test_real.o: test_real.f90 elpa1.o
	$(F90) -c $<

read_real.o: read_real.f90 elpa1.o
	$(F90) -c $<

test_complex.o: test_complex.f90 elpa1.o
	$(F90) -c $<

test_real_gen.o: test_real_gen.f90 elpa1.o
	$(F90) -c $<

read_real_gen.o: read_real_gen.f90 elpa1.o
	$(F90) -c $<

test_complex_gen.o: test_complex_gen.f90 elpa1.o
	$(F90) -c $<

test_real2.o: test_real2.f90 elpa1.o elpa2.o
	$(F90) -c $<

test_complex2.o: test_complex2.f90 elpa1.o elpa2.o
	$(F90) -c $<

elpa1.o: ../src/elpa1.f90
	$(F90) -c $<

elpa2.o: ../src/elpa2.f90 elpa1.o
	$(F90) -c ../src/elpa2.f90

147
148
149
150
151
152
153
154
155
ifeq ($(X86),1)	
elpa2_tum_kernels_real_sse-avx_2hv.o: ../src/elpa2_kernels/elpa2_tum_kernels_real_sse-avx_2hv.c
	$(CCOPT) -c ../src/elpa2_kernels/elpa2_tum_kernels_real_sse-avx_2hv.c
	
elpa2_tum_kernels_real_sse-avx_4hv.o: ../src/elpa2_kernels/elpa2_tum_kernels_real_sse-avx_4hv.c
	$(CCOPT) -c ../src/elpa2_kernels/elpa2_tum_kernels_real_sse-avx_4hv.c
	
elpa2_tum_kernels_real_sse-avx_6hv.o: ../src/elpa2_kernels/elpa2_tum_kernels_real_sse-avx_6hv.c
	$(CCOPT) -c ../src/elpa2_kernels/elpa2_tum_kernels_real_sse-avx_6hv.c
156
157
158
159
160
161
162
163
164
	
elpa2_tum_kernels_complex_sse-avx_1hv.o: ../src/elpa2_kernels/elpa2_tum_kernels_complex_sse-avx_1hv.cpp
	$(CCOPT) -c ../src/elpa2_kernels/elpa2_tum_kernels_complex_sse-avx_1hv.cpp
	
elpa2_tum_kernels_complex_sse-avx_2hv.o: ../src/elpa2_kernels/elpa2_tum_kernels_complex_sse-avx_2hv.cpp
	$(CCOPT) -c ../src/elpa2_kernels/elpa2_tum_kernels_complex_sse-avx_2hv.cpp
	
elpa2_kernels_complex.o: ../src/elpa2_kernels/elpa2_kernels_complex.f90
	$(F90OPT) -c ../src/elpa2_kernels/elpa2_kernels_complex.f90
165
166
167
else
elpa2_kernels_real.o: ../src/elpa2_kernels/elpa2_kernels_real.f90
	$(F90OPT) -c ../src/elpa2_kernels/elpa2_kernels_real.f90
168
	
169
170
elpa2_kernels_complex.o: ../src/elpa2_kernels/elpa2_kernels_complex.f90
	$(F90OPT) -c ../src/elpa2_kernels/elpa2_kernels_complex.f90
171
endif
172
173

clean:
174
	rm -f *.o *.mod test_real test_complex test_real_gen test_complex_gen test_real2 test_complex2 read_real read_real_gen