run_ci_tests.sh 7.16 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1 2 3 4 5 6 7 8 9 10 11 12
#!/bin/bash
set -e
set -x

#some defaults
makeTasks=1
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
13
skipStep=0
Andreas Marek's avatar
Andreas Marek committed
14
batchCommand=""
Andreas Marek's avatar
Andreas Marek committed
15
interactiveRun="yes"
Andreas Marek's avatar
Andreas Marek committed
16
slurmBatch="no"
Andreas Marek's avatar
Andreas Marek committed
17 18 19 20 21 22 23

function usage() {
	cat >&2 <<-EOF

		Call all the necessary steps to perform an ELPA CI test

		Usage:
Andreas Marek's avatar
Andreas Marek committed
24
		  run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm]"
Andreas Marek's avatar
Andreas Marek committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

		Options:
		 -c configure arguments
		    Line of arguments passed to configure call
		 -t MPI Tasks
		    Number of MPI processes used during test runs of ELPA tests

		 -m Matrix size
		    Size of the mxm matrix used during runs of ELPA tests

		 -n Number of eigenvectors
		    Number of eigenvectors to be computed during runs of ELPA tests

		 -b block size
		    Block size of block-cyclic distribution during runs of ELPA tests

		 -o OpenMP threads
		    Number of OpenMP threads used during runs of ELPA tests

Andreas Marek's avatar
Andreas Marek committed
44
		 -j makeTasks
Andreas Marek's avatar
Andreas Marek committed
45
		    Number of processes make should use during build (default 1)
46 47 48 49

		 -s skipStep
		    Skip the test run if 1 (default 0)

Andreas Marek's avatar
Andreas Marek committed
50 51 52
		 -q submit command
		    Job steps will be submitted via command to a batch system (default no submission)

Andreas Marek's avatar
Andreas Marek committed
53 54 55
		 -i interactive_run
		    if "yes" NO no batch command will be triggered

Andreas Marek's avatar
Andreas Marek committed
56 57 58
		 -S submit to slurm
		    if "yes" a SLURM batch job will be submitted

Andreas Marek's avatar
Andreas Marek committed
59 60 61 62 63 64
		 -h
		    Print this help text
	EOF
}


Andreas Marek's avatar
Andreas Marek committed
65
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
Andreas Marek's avatar
Andreas Marek committed
66 67 68 69 70 71 72 73 74 75 76 77 78 79
	case $opt in
		j)
			makeTasks=$OPTARG;;
		t)
			mpiTasks=$OPTARG;;
		m)
			matrixSize=$OPTARG;;
		n)
			nrEV=$OPTARG;;
		b)
			blockSize=$OPTARG;;
		o)
			ompThreads=$OPTARG;;
		c)
80 81 82
			configureArgs=$OPTARG;;
		s)
			skipStep=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
83 84
		q)
			batchCommand=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
85 86
		i)
			interactiveRun=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
87
		S)
Andreas Marek's avatar
Andreas Marek committed
88
			slurmBatch=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
89 90 91 92 93 94 95 96 97 98
		:)
			echo "Option -$OPTARG requires an argument" >&2;;
		h)
			usage
			exit 1;;
		*)
			exit 1;;
	esac
done

Andreas Marek's avatar
Andreas Marek committed
99

Andreas Marek's avatar
Andreas Marek committed
100
if [ $skipStep -eq 1 ]
101 102 103
then
  echo "Skipping the test since option -s has been specified"
  exit 0
Andreas Marek's avatar
Andreas Marek committed
104
fi
Andreas Marek's avatar
Andreas Marek committed
105 106
if [ "$slurmBatch" == "yes" ]
then
107 108 109

  # default exit code
  exitCode=1
Andreas Marek's avatar
Andreas Marek committed
110 111 112 113 114
  CLUSTER=""
  if [[ "$HOST" =~ "cobra" ]]
  then
    CLUSTER="cobra"
  fi
Andreas Marek's avatar
Andreas Marek committed
115 116 117 118 119 120 121 122
  if [[ "$HOST" =~ "talos" ]]
  then
    CLUSTER="talos"
  fi
  if [[ "$HOST" =~ "freya" ]]
  then
    CLUSTER="freya"
  fi
Andreas Marek's avatar
Andreas Marek committed
123 124 125 126
  if [[ "$HOST" =~ "draco" ]]
  then
    CLUSTER="draco"
  fi
Andreas Marek's avatar
Andreas Marek committed
127

128
  echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"
Andreas Marek's avatar
Andreas Marek committed
129

130 131
  # GPU runners
  if [ "$CI_RUNNER_TAGS" == "gpu" ]
Andreas Marek's avatar
Andreas Marek committed
132
  then
133 134 135 136 137 138 139 140 141
    cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
    echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh

Andreas Marek's avatar
Andreas Marek committed
142 143
    echo " "
    echo "Job script for the run"
Andreas Marek's avatar
Andreas Marek committed
144
    cat ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
145 146
    echo " "
    echo "Submitting to SLURM"
147 148
    sbatch -W ./run_${CLUSTER}_1node_2GPU.sh
    exitCode=$?
Andreas Marek's avatar
Andreas Marek committed
149

150 151 152 153
    echo " "
    echo "Exit Code of sbatch: $exitCode"
    echo " "
    if (( $exitCode > 0 ))
Andreas Marek's avatar
Andreas Marek committed
154
    then
155
      cat ./ELPA_CI_2gpu.err.*
Andreas Marek's avatar
Andreas Marek committed
156
    fi
157 158
    
  fi
Andreas Marek's avatar
Andreas Marek committed
159

160 161 162 163 164 165 166 167 168 169 170 171
  #SSE, AVX, AVX2, and AVX-512 runners
  if [[ "$CI_RUNNER_TAGS" =~ "sse" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx2" ]]  || [ ["$CI_RUNNER_TAGS" =~ "avx512" ]]
  then
    cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
    echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
    echo " " >> ./run_${CLUSTER}_1node.sh
    echo "make -j 16" >> ./run_${CLUSTER}_1node.sh
    echo " " >> ./run_${CLUSTER}_1node.sh
    echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
    echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
    echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node.sh

Andreas Marek's avatar
Andreas Marek committed
172 173
    echo " "
    echo "Job script for the run"
174
    cat ./run_${CLUSTER}_1node.sh
Andreas Marek's avatar
Andreas Marek committed
175 176
    echo " "
    echo "Submitting to SLURM"
177 178
    sbatch -W ./run_${CLUSTER}_1node.sh
    exitCode=$?
Andreas Marek's avatar
Andreas Marek committed
179

180 181 182 183
    echo " "
    echo "Exit Code of sbatch: $exitCode"
    echo " "
    cat ./ELPA_CI.out.*
Andreas Marek's avatar
Andreas Marek committed
184
    if [ $exitCode -ne 0 ]
Andreas Marek's avatar
Andreas Marek committed
185
    then
186
      cat ./ELPA_CI.err.*
Andreas Marek's avatar
Andreas Marek committed
187
    fi
Andreas Marek's avatar
Andreas Marek committed
188

Andreas Marek's avatar
Andreas Marek committed
189
  fi
Andreas Marek's avatar
Andreas Marek committed
190

Andreas Marek's avatar
Andreas Marek committed
191
  if [ $exitCode -ne 0 ]
192 193 194 195 196
  then
    cat ./test-suite.log
  fi

  exit $exitCode
Andreas Marek's avatar
Andreas Marek committed
197

Andreas Marek's avatar
Andreas Marek committed
198 199
fi

Andreas Marek's avatar
Andreas Marek committed
200 201 202 203 204 205 206 207 208 209

# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
  # this is the old, messy implementation for
  # - appdev
  # - freya-interactive
  # - draco
  # - buildtest
  # - virtual machine runners
Andreas Marek's avatar
Andreas Marek committed
210
  # hopefully this can be removed soon
Andreas Marek's avatar
Andreas Marek committed
211
  echo "Using old CI logic for appdev"
Andreas Marek's avatar
Andreas Marek committed
212 213
  if [ "$batchCommand" == "srun" ]
  then
Andreas Marek's avatar
Andreas Marek committed
214
    # use srun to start mpi jobs
Andreas Marek's avatar
Andreas Marek committed
215
    if [ "$interactiveRun" == "no" ]
Andreas Marek's avatar
Andreas Marek committed
216
    then
Andreas Marek's avatar
Andreas Marek committed
217
      # interactive runs are not possible
Andreas Marek's avatar
Andreas Marek committed
218
      echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
219
      # $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval  ./configure $configureArgs; }'
Andreas Marek's avatar
Andreas Marek committed
220 221 222 223 224 225 226 227 228 229
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"

      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=8 $SRUN_COMMANDLINE_BUILD ./ci_test_scripts/build_step.sh $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=2 $SRUN_COMMANDLINE_RUN ./ci_test_scripts/test_step.sh $mpiTasks $ompThreads "TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" "
      if [ $? -ne 0 ]; then exit 1; fi

      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
230
    else
Andreas Marek's avatar
Andreas Marek committed
231 232 233
      # interactive runs are possible
      #eval ./configure $configureArgs
      ./ci_test_scripts/configure_step.sh "$configureArgs"
234

Andreas Marek's avatar
Andreas Marek committed
235
      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
236
    
Andreas Marek's avatar
Andreas Marek committed
237 238
      make -j $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
239
    
Andreas Marek's avatar
Andreas Marek committed
240 241
      OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
      if [ $? -ne 0 ]; then exit 1; fi
242
     
Andreas Marek's avatar
Andreas Marek committed
243 244
      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
245

Andreas Marek's avatar
Andreas Marek committed
246
    fi
Andreas Marek's avatar
Andreas Marek committed
247

Andreas Marek's avatar
Andreas Marek committed
248
  else
Andreas Marek's avatar
Andreas Marek committed
249 250
    # do not use srun to start mpi applications
 
Andreas Marek's avatar
Andreas Marek committed
251
    #eval ./configure $configureArgs
252
    ./ci_test_scripts/configure_step.sh "$configureArgs"
Andreas Marek's avatar
Andreas Marek committed
253

Andreas Marek's avatar
Andreas Marek committed
254 255 256 257 258
    if [ $? -ne 0 ]; then cat config.log && exit 1; fi
    
    make -j $makeTasks
    if [ $? -ne 0 ]; then exit 1; fi
    
Andreas Marek's avatar
Andreas Marek committed
259
    OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
Andreas Marek's avatar
Andreas Marek committed
260 261 262 263 264
    if [ $? -ne 0 ]; then exit 1; fi
     
    grep -i "Expected %stop" test-suite.log && exit 1 || true ;
    if [ $? -ne 0 ]; then exit 1; fi
  fi
Andreas Marek's avatar
Andreas Marek committed
265 266 267
else
  # a submission to SLURM via a batch script will be done
  echo "Submitting to a SLURM batch system"
268
fi
Andreas Marek's avatar
Andreas Marek committed
269