run_ci_tests.sh 8.1 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/bash
set -e
set -x

#some defaults
makeTasks=1
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
13
skipStep=0
Andreas Marek's avatar
Andreas Marek committed
14
batchCommand=""
Andreas Marek's avatar
Andreas Marek committed
15
interactiveRun="yes"
Andreas Marek's avatar
Andreas Marek committed
16
slurmBatch="no"
Andreas Marek's avatar
Andreas Marek committed
17
gpuJob="no"
Andreas Marek's avatar
Andreas Marek committed
18
19
20
21
22
23
24

function usage() {
	cat >&2 <<-EOF

		Call all the necessary steps to perform an ELPA CI test

		Usage:
Andreas Marek's avatar
Andreas Marek committed
25
		  run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm] [-g GPU job]"
Andreas Marek's avatar
Andreas Marek committed
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

		Options:
		 -c configure arguments
		    Line of arguments passed to configure call
		 -t MPI Tasks
		    Number of MPI processes used during test runs of ELPA tests

		 -m Matrix size
		    Size of the mxm matrix used during runs of ELPA tests

		 -n Number of eigenvectors
		    Number of eigenvectors to be computed during runs of ELPA tests

		 -b block size
		    Block size of block-cyclic distribution during runs of ELPA tests

		 -o OpenMP threads
		    Number of OpenMP threads used during runs of ELPA tests

Andreas Marek's avatar
Andreas Marek committed
45
		 -j makeTasks
Andreas Marek's avatar
Andreas Marek committed
46
		    Number of processes make should use during build (default 1)
47
48
49
50

		 -s skipStep
		    Skip the test run if 1 (default 0)

Andreas Marek's avatar
Andreas Marek committed
51
52
53
		 -q submit command
		    Job steps will be submitted via command to a batch system (default no submission)

Andreas Marek's avatar
Andreas Marek committed
54
55
56
		 -i interactive_run
		    if "yes" NO no batch command will be triggered

Andreas Marek's avatar
Andreas Marek committed
57
58
59
		 -S submit to slurm
		    if "yes" a SLURM batch job will be submitted

Andreas Marek's avatar
Andreas Marek committed
60
61
		 -g gpu job
		    if "yes" a GPU job is assumed
Andreas Marek's avatar
Andreas Marek committed
62
63
64
65
66
67
		 -h
		    Print this help text
	EOF
}


Andreas Marek's avatar
Andreas Marek committed
68
while getopts "c:t:j:m:n:b:o:s:q:i:S:g:h" opt; do
Andreas Marek's avatar
Andreas Marek committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
	case $opt in
		j)
			makeTasks=$OPTARG;;
		t)
			mpiTasks=$OPTARG;;
		m)
			matrixSize=$OPTARG;;
		n)
			nrEV=$OPTARG;;
		b)
			blockSize=$OPTARG;;
		o)
			ompThreads=$OPTARG;;
		c)
83
84
85
			configureArgs=$OPTARG;;
		s)
			skipStep=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
86
87
		q)
			batchCommand=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
88
89
		i)
			interactiveRun=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
90
		S)
Andreas Marek's avatar
Andreas Marek committed
91
			slurmBatch=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
92
93
		g)
			gpuJob=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
94
95
96
97
98
99
100
101
102
103
		:)
			echo "Option -$OPTARG requires an argument" >&2;;
		h)
			usage
			exit 1;;
		*)
			exit 1;;
	esac
done

Andreas Marek's avatar
Andreas Marek committed
104

Andreas Marek's avatar
Andreas Marek committed
105
if [ $skipStep -eq 1 ]
106
107
108
then
  echo "Skipping the test since option -s has been specified"
  exit 0
Andreas Marek's avatar
Andreas Marek committed
109
fi
Andreas Marek's avatar
Andreas Marek committed
110
111
if [ "$slurmBatch" == "yes" ]
then
112
113
114

  # default exit code
  exitCode=1
Andreas Marek's avatar
Andreas Marek committed
115
116
117
118
119
  CLUSTER=""
  if [[ "$HOST" =~ "cobra" ]]
  then
    CLUSTER="cobra"
  fi
Andreas Marek's avatar
Andreas Marek committed
120
121
122
123
124
125
126
127
  if [[ "$HOST" =~ "talos" ]]
  then
    CLUSTER="talos"
  fi
  if [[ "$HOST" =~ "freya" ]]
  then
    CLUSTER="freya"
  fi
Andreas Marek's avatar
Andreas Marek committed
128
129
130
131
  if [[ "$HOST" =~ "draco" ]]
  then
    CLUSTER="draco"
  fi
Andreas Marek's avatar
Andreas Marek committed
132

133
  echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
Andreas Marek's avatar
Andreas Marek committed
134

135
  # GPU runners
Andreas Marek's avatar
Andreas Marek committed
136
  if [ "$gpuJob" == "yes" ]
Andreas Marek's avatar
Andreas Marek committed
137
  then
138
    cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
Andreas Marek's avatar
Andreas Marek committed
139
    echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
140
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
141
    echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
142
143
144
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
145
    echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh
146
147
148
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "#copy everything back from /tmp/elpa to runner directory"  >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "cp -r * \$runner_path"  >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
149
    echo "cd .. && rm -rf /tmp/elpa_\$SLURM_JOB_ID" >> ./run_${CLUSTER}_1node_2GPU.sh
150

Andreas Marek's avatar
Andreas Marek committed
151
152
    echo " "
    echo "Job script for the run"
Andreas Marek's avatar
Andreas Marek committed
153
    cat ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
154
155
    echo " "
    echo "Submitting to SLURM"
Andreas Marek's avatar
Andreas Marek committed
156
157
158
159
160
161
    if sbatch -W ./run_${CLUSTER}_1node_2GPU.sh; then
      exitCode=$?
    else
      exitCode=$?
      echo "Submission exited with exitCode $exitCode"
    fi
Andreas Marek's avatar
Andreas Marek committed
162

163
164
    #if (( $exitCode > 0 ))
    #then
165
      cat ./ELPA_CI_2gpu.err.*
166
    #fi
167
    
168
  else
Andreas Marek's avatar
Andreas Marek committed
169

170
171
172
173
174
175
176
177
178
179
180
181
182
183
    #SSE, AVX, AVX2, and AVX-512 runners
    if [[ "$CI_RUNNER_TAGS" =~ "sse" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx2" ]]  || [ ["$CI_RUNNER_TAGS" =~ "avx512" ]]
    then
      cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
      echo "./configure " "$configureArgs"  >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "make -j 16 " >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
      echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
      echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \"  " >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "#copy everything back from /tmp/elpa to runner directory"  >> ./run_${CLUSTER}_1node.sh
      echo "cp -r * \$runner_path"  >> ./run_${CLUSTER}_1node.sh
Andreas Marek's avatar
Andreas Marek committed
184
      echo "cd .. && rm -rf /tmp/elpa_\$SLURM_JOB_ID" >> ./run_${CLUSTER}_1node.sh
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

      echo " "
      echo "Job script for the run"
      cat ./run_${CLUSTER}_1node.sh
      echo " "
      echo "Submitting to SLURM"
      if sbatch -W ./run_${CLUSTER}_1node.sh; then
        exitCode=$?
      else
        exitCode=$?
        echo "Submission excited with exitCode $exitCode"
      fi

      echo " "
      echo "Exit Code of sbatch: $exitCode"
      echo " "
      cat ./ELPA_CI.out.*
      #if [ $exitCode -ne 0 ]
      #then
        cat ./ELPA_CI.err.*
      #fi
206

Andreas Marek's avatar
Andreas Marek committed
207
    fi
Andreas Marek's avatar
Andreas Marek committed
208
  fi
209
210
  #if [ $exitCode -ne 0 ]
  #then
Andreas Marek's avatar
Andreas Marek committed
211
212
  if [ -f ./test-suite.log ]
  then
213
    cat ./test-suite.log
Andreas Marek's avatar
Andreas Marek committed
214
  fi
215
  #fi
216
217

  exit $exitCode
Andreas Marek's avatar
Andreas Marek committed
218

Andreas Marek's avatar
Andreas Marek committed
219
220
fi

Andreas Marek's avatar
Andreas Marek committed
221
222
223
224
225
226
227
228
229
230

# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
  # this is the old, messy implementation for
  # - appdev
  # - freya-interactive
  # - draco
  # - buildtest
  # - virtual machine runners
Andreas Marek's avatar
Andreas Marek committed
231
  # hopefully this can be removed soon
Andreas Marek's avatar
Andreas Marek committed
232
  echo "Using old CI logic for appdev"
Andreas Marek's avatar
Andreas Marek committed
233
234
  if [ "$batchCommand" == "srun" ]
  then
Andreas Marek's avatar
Andreas Marek committed
235
    # use srun to start mpi jobs
Andreas Marek's avatar
Andreas Marek committed
236
    if [ "$interactiveRun" == "no" ]
Andreas Marek's avatar
Andreas Marek committed
237
    then
Andreas Marek's avatar
Andreas Marek committed
238
      # interactive runs are not possible
Andreas Marek's avatar
Andreas Marek committed
239
      echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
240
      # $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval  ./configure $configureArgs; }'
Andreas Marek's avatar
Andreas Marek committed
241
242
243
244
245
246
247
248
249
250
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"

      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=8 $SRUN_COMMANDLINE_BUILD ./ci_test_scripts/build_step.sh $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=2 $SRUN_COMMANDLINE_RUN ./ci_test_scripts/test_step.sh $mpiTasks $ompThreads "TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" "
      if [ $? -ne 0 ]; then exit 1; fi

      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
251
    else
Andreas Marek's avatar
Andreas Marek committed
252
253
254
      # interactive runs are possible
      #eval ./configure $configureArgs
      ./ci_test_scripts/configure_step.sh "$configureArgs"
255

Andreas Marek's avatar
Andreas Marek committed
256
      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
257
    
Andreas Marek's avatar
Andreas Marek committed
258
259
      make -j $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
260
    
Andreas Marek's avatar
Andreas Marek committed
261
262
      OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
      if [ $? -ne 0 ]; then exit 1; fi
263
     
Andreas Marek's avatar
Andreas Marek committed
264
265
      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
266

Andreas Marek's avatar
Andreas Marek committed
267
    fi
Andreas Marek's avatar
Andreas Marek committed
268

Andreas Marek's avatar
Andreas Marek committed
269
  else
Andreas Marek's avatar
Andreas Marek committed
270
271
    # do not use srun to start mpi applications
 
Andreas Marek's avatar
Andreas Marek committed
272
    #eval ./configure $configureArgs
273
    ./ci_test_scripts/configure_step.sh "$configureArgs"
Andreas Marek's avatar
Andreas Marek committed
274

Andreas Marek's avatar
Andreas Marek committed
275
276
277
278
279
    if [ $? -ne 0 ]; then cat config.log && exit 1; fi
    
    make -j $makeTasks
    if [ $? -ne 0 ]; then exit 1; fi
    
Andreas Marek's avatar
Andreas Marek committed
280
    OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
Andreas Marek's avatar
Andreas Marek committed
281
282
283
284
285
    if [ $? -ne 0 ]; then exit 1; fi
     
    grep -i "Expected %stop" test-suite.log && exit 1 || true ;
    if [ $? -ne 0 ]; then exit 1; fi
  fi
Andreas Marek's avatar
Andreas Marek committed
286
287
288
else
  # a submission to SLURM via a batch script will be done
  echo "Submitting to a SLURM batch system"
289
fi
Andreas Marek's avatar
Andreas Marek committed
290