run_ci_tests.sh 7.35 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/bash
set -e
set -x

#some defaults
makeTasks=1
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
13
skipStep=0
Andreas Marek's avatar
Andreas Marek committed
14
batchCommand=""
Andreas Marek's avatar
Andreas Marek committed
15
interactiveRun="yes"
Andreas Marek's avatar
Andreas Marek committed
16
slurmBatch="no"
Andreas Marek's avatar
Andreas Marek committed
17
18
19
20
21
22
23

function usage() {
	cat >&2 <<-EOF

		Call all the necessary steps to perform an ELPA CI test

		Usage:
Andreas Marek's avatar
Andreas Marek committed
24
		  run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm]"
Andreas Marek's avatar
Andreas Marek committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

		Options:
		 -c configure arguments
		    Line of arguments passed to configure call
		 -t MPI Tasks
		    Number of MPI processes used during test runs of ELPA tests

		 -m Matrix size
		    Size of the mxm matrix used during runs of ELPA tests

		 -n Number of eigenvectors
		    Number of eigenvectors to be computed during runs of ELPA tests

		 -b block size
		    Block size of block-cyclic distribution during runs of ELPA tests

		 -o OpenMP threads
		    Number of OpenMP threads used during runs of ELPA tests

Andreas Marek's avatar
Andreas Marek committed
44
		 -j makeTasks
Andreas Marek's avatar
Andreas Marek committed
45
		    Number of processes make should use during build (default 1)
46
47
48
49

		 -s skipStep
		    Skip the test run if 1 (default 0)

Andreas Marek's avatar
Andreas Marek committed
50
51
52
		 -q submit command
		    Job steps will be submitted via command to a batch system (default no submission)

Andreas Marek's avatar
Andreas Marek committed
53
54
55
		 -i interactive_run
		    if "yes" NO no batch command will be triggered

Andreas Marek's avatar
Andreas Marek committed
56
57
58
		 -S submit to slurm
		    if "yes" a SLURM batch job will be submitted

Andreas Marek's avatar
Andreas Marek committed
59
60
61
62
63
64
		 -h
		    Print this help text
	EOF
}


Andreas Marek's avatar
Andreas Marek committed
65
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
Andreas Marek's avatar
Andreas Marek committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
	case $opt in
		j)
			makeTasks=$OPTARG;;
		t)
			mpiTasks=$OPTARG;;
		m)
			matrixSize=$OPTARG;;
		n)
			nrEV=$OPTARG;;
		b)
			blockSize=$OPTARG;;
		o)
			ompThreads=$OPTARG;;
		c)
80
81
82
			configureArgs=$OPTARG;;
		s)
			skipStep=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
83
84
		q)
			batchCommand=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
85
86
		i)
			interactiveRun=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
87
		S)
Andreas Marek's avatar
Andreas Marek committed
88
			slurmBatch=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
89
90
91
92
93
94
95
96
97
98
		:)
			echo "Option -$OPTARG requires an argument" >&2;;
		h)
			usage
			exit 1;;
		*)
			exit 1;;
	esac
done

Andreas Marek's avatar
Andreas Marek committed
99

Andreas Marek's avatar
Andreas Marek committed
100
if [ $skipStep -eq 1 ]
101
102
103
then
  echo "Skipping the test since option -s has been specified"
  exit 0
Andreas Marek's avatar
Andreas Marek committed
104
fi
Andreas Marek's avatar
Andreas Marek committed
105
106
if [ "$slurmBatch" == "yes" ]
then
107
108
109

  # default exit code
  exitCode=1
Andreas Marek's avatar
Andreas Marek committed
110
111
112
113
114
  CLUSTER=""
  if [[ "$HOST" =~ "cobra" ]]
  then
    CLUSTER="cobra"
  fi
Andreas Marek's avatar
Andreas Marek committed
115
116
117
118
119
120
121
122
  if [[ "$HOST" =~ "talos" ]]
  then
    CLUSTER="talos"
  fi
  if [[ "$HOST" =~ "freya" ]]
  then
    CLUSTER="freya"
  fi
Andreas Marek's avatar
Andreas Marek committed
123
124
125
126
  if [[ "$HOST" =~ "draco" ]]
  then
    CLUSTER="draco"
  fi
Andreas Marek's avatar
Andreas Marek committed
127

128
  echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
Andreas Marek's avatar
Andreas Marek committed
129

130
131
  # GPU runners
  if [ "$CI_RUNNER_TAGS" == "gpu" ]
Andreas Marek's avatar
Andreas Marek committed
132
  then
133
134
135
136
137
138
139
140
141
    cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
    echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh

Andreas Marek's avatar
Andreas Marek committed
142
143
    echo " "
    echo "Job script for the run"
Andreas Marek's avatar
Andreas Marek committed
144
    cat ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
145
146
    echo " "
    echo "Submitting to SLURM"
Andreas Marek's avatar
Andreas Marek committed
147
148
149
150
151
152
    if sbatch -W ./run_${CLUSTER}_1node_2GPU.sh; then
      exitCode=$?
    else
      exitCode=$?
      echo "Submission exited with exitCode $exitCode"
    fi
Andreas Marek's avatar
Andreas Marek committed
153

154
155
    #if (( $exitCode > 0 ))
    #then
156
      cat ./ELPA_CI_2gpu.err.*
157
    #fi
158
159
    
  fi
Andreas Marek's avatar
Andreas Marek committed
160

161
162
163
164
165
166
167
168
169
170
171
172
  #SSE, AVX, AVX2, and AVX-512 runners
  if [[ "$CI_RUNNER_TAGS" =~ "sse" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx2" ]]  || [ ["$CI_RUNNER_TAGS" =~ "avx512" ]]
  then
    cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
    echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
    echo " " >> ./run_${CLUSTER}_1node.sh
    echo "make -j 16" >> ./run_${CLUSTER}_1node.sh
    echo " " >> ./run_${CLUSTER}_1node.sh
    echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
    echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
    echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node.sh

Andreas Marek's avatar
Andreas Marek committed
173
174
    echo " "
    echo "Job script for the run"
175
    cat ./run_${CLUSTER}_1node.sh
Andreas Marek's avatar
Andreas Marek committed
176
177
    echo " "
    echo "Submitting to SLURM"
Andreas Marek's avatar
Andreas Marek committed
178
179
180
181
182
183
    if sbatch -W ./run_${CLUSTER}_1node.sh; then
      exitCode=$?
    else
      exitCode=$?
      echo "Submission excited with exitCode $exitCode"
    fi
Andreas Marek's avatar
Andreas Marek committed
184

185
186
187
188
    echo " "
    echo "Exit Code of sbatch: $exitCode"
    echo " "
    cat ./ELPA_CI.out.*
189
190
    #if [ $exitCode -ne 0 ]
    #then
191
      cat ./ELPA_CI.err.*
192
    #fi
Andreas Marek's avatar
Andreas Marek committed
193

Andreas Marek's avatar
Andreas Marek committed
194
  fi
Andreas Marek's avatar
Andreas Marek committed
195

196
197
  #if [ $exitCode -ne 0 ]
  #then
Andreas Marek's avatar
Andreas Marek committed
198
199
  if [ -f ./test-suite.log ]
  then
200
    cat ./test-suite.log
Andreas Marek's avatar
Andreas Marek committed
201
  fi
202
  #fi
203
204

  exit $exitCode
Andreas Marek's avatar
Andreas Marek committed
205

Andreas Marek's avatar
Andreas Marek committed
206
207
fi

Andreas Marek's avatar
Andreas Marek committed
208
209
210
211
212
213
214
215
216
217

# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
  # this is the old, messy implementation for
  # - appdev
  # - freya-interactive
  # - draco
  # - buildtest
  # - virtual machine runners
Andreas Marek's avatar
Andreas Marek committed
218
  # hopefully this can be removed soon
Andreas Marek's avatar
Andreas Marek committed
219
  echo "Using old CI logic for appdev"
Andreas Marek's avatar
Andreas Marek committed
220
221
  if [ "$batchCommand" == "srun" ]
  then
Andreas Marek's avatar
Andreas Marek committed
222
    # use srun to start mpi jobs
Andreas Marek's avatar
Andreas Marek committed
223
    if [ "$interactiveRun" == "no" ]
Andreas Marek's avatar
Andreas Marek committed
224
    then
Andreas Marek's avatar
Andreas Marek committed
225
      # interactive runs are not possible
Andreas Marek's avatar
Andreas Marek committed
226
      echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
227
      # $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval  ./configure $configureArgs; }'
Andreas Marek's avatar
Andreas Marek committed
228
229
230
231
232
233
234
235
236
237
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"

      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=8 $SRUN_COMMANDLINE_BUILD ./ci_test_scripts/build_step.sh $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=2 $SRUN_COMMANDLINE_RUN ./ci_test_scripts/test_step.sh $mpiTasks $ompThreads "TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" "
      if [ $? -ne 0 ]; then exit 1; fi

      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
238
    else
Andreas Marek's avatar
Andreas Marek committed
239
240
241
      # interactive runs are possible
      #eval ./configure $configureArgs
      ./ci_test_scripts/configure_step.sh "$configureArgs"
242

Andreas Marek's avatar
Andreas Marek committed
243
      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
244
    
Andreas Marek's avatar
Andreas Marek committed
245
246
      make -j $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
247
    
Andreas Marek's avatar
Andreas Marek committed
248
249
      OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
      if [ $? -ne 0 ]; then exit 1; fi
250
     
Andreas Marek's avatar
Andreas Marek committed
251
252
      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
253

Andreas Marek's avatar
Andreas Marek committed
254
    fi
Andreas Marek's avatar
Andreas Marek committed
255

Andreas Marek's avatar
Andreas Marek committed
256
  else
Andreas Marek's avatar
Andreas Marek committed
257
258
    # do not use srun to start mpi applications
 
Andreas Marek's avatar
Andreas Marek committed
259
    #eval ./configure $configureArgs
260
    ./ci_test_scripts/configure_step.sh "$configureArgs"
Andreas Marek's avatar
Andreas Marek committed
261

Andreas Marek's avatar
Andreas Marek committed
262
263
264
265
266
    if [ $? -ne 0 ]; then cat config.log && exit 1; fi
    
    make -j $makeTasks
    if [ $? -ne 0 ]; then exit 1; fi
    
Andreas Marek's avatar
Andreas Marek committed
267
    OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
Andreas Marek's avatar
Andreas Marek committed
268
269
270
271
272
    if [ $? -ne 0 ]; then exit 1; fi
     
    grep -i "Expected %stop" test-suite.log && exit 1 || true ;
    if [ $? -ne 0 ]; then exit 1; fi
  fi
Andreas Marek's avatar
Andreas Marek committed
273
274
275
else
  # a submission to SLURM via a batch script will be done
  echo "Submitting to a SLURM batch system"
276
fi
Andreas Marek's avatar
Andreas Marek committed
277