run_ci_tests.sh 8.06 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1 2 3 4 5 6 7 8 9 10 11 12
#!/bin/bash
set -e
set -x

#some defaults
makeTasks=1
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
13
skipStep=0
Andreas Marek's avatar
Andreas Marek committed
14
batchCommand=""
Andreas Marek's avatar
Andreas Marek committed
15
interactiveRun="yes"
Andreas Marek's avatar
Andreas Marek committed
16
slurmBatch="no"
Andreas Marek's avatar
Andreas Marek committed
17
gpuJob="no"
Andreas Marek's avatar
Andreas Marek committed
18 19 20 21 22 23 24

function usage() {
	cat >&2 <<-EOF

		Call all the necessary steps to perform an ELPA CI test

		Usage:
Andreas Marek's avatar
Andreas Marek committed
25
		  run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm] [-g GPU job]"
Andreas Marek's avatar
Andreas Marek committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44

		Options:
		 -c configure arguments
		    Line of arguments passed to configure call
		 -t MPI Tasks
		    Number of MPI processes used during test runs of ELPA tests

		 -m Matrix size
		    Size of the mxm matrix used during runs of ELPA tests

		 -n Number of eigenvectors
		    Number of eigenvectors to be computed during runs of ELPA tests

		 -b block size
		    Block size of block-cyclic distribution during runs of ELPA tests

		 -o OpenMP threads
		    Number of OpenMP threads used during runs of ELPA tests

Andreas Marek's avatar
Andreas Marek committed
45
		 -j makeTasks
Andreas Marek's avatar
Andreas Marek committed
46
		    Number of processes make should use during build (default 1)
47 48 49 50

		 -s skipStep
		    Skip the test run if 1 (default 0)

Andreas Marek's avatar
Andreas Marek committed
51 52 53
		 -q submit command
		    Job steps will be submitted via command to a batch system (default no submission)

Andreas Marek's avatar
Andreas Marek committed
54 55 56
		 -i interactive_run
		    if "yes" NO no batch command will be triggered

Andreas Marek's avatar
Andreas Marek committed
57 58 59
		 -S submit to slurm
		    if "yes" a SLURM batch job will be submitted

Andreas Marek's avatar
Andreas Marek committed
60 61
		 -g gpu job
		    if "yes" a GPU job is assumed
Andreas Marek's avatar
Andreas Marek committed
62 63 64 65 66 67
		 -h
		    Print this help text
	EOF
}


Andreas Marek's avatar
Andreas Marek committed
68
while getopts "c:t:j:m:n:b:o:s:q:i:S:g:h" opt; do
Andreas Marek's avatar
Andreas Marek committed
69 70 71 72 73 74 75 76 77 78 79 80 81 82
	case $opt in
		j)
			makeTasks=$OPTARG;;
		t)
			mpiTasks=$OPTARG;;
		m)
			matrixSize=$OPTARG;;
		n)
			nrEV=$OPTARG;;
		b)
			blockSize=$OPTARG;;
		o)
			ompThreads=$OPTARG;;
		c)
83 84 85
			configureArgs=$OPTARG;;
		s)
			skipStep=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
86 87
		q)
			batchCommand=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
88 89
		i)
			interactiveRun=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
90
		S)
Andreas Marek's avatar
Andreas Marek committed
91
			slurmBatch=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
92 93
		g)
			gpuJob=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
94 95 96 97 98 99 100 101 102 103
		:)
			echo "Option -$OPTARG requires an argument" >&2;;
		h)
			usage
			exit 1;;
		*)
			exit 1;;
	esac
done

Andreas Marek's avatar
Andreas Marek committed
104

Andreas Marek's avatar
Andreas Marek committed
105
if [ $skipStep -eq 1 ]
106 107 108
then
  echo "Skipping the test since option -s has been specified"
  exit 0
Andreas Marek's avatar
Andreas Marek committed
109
fi
Andreas Marek's avatar
Andreas Marek committed
110 111
if [ "$slurmBatch" == "yes" ]
then
112 113 114

  # default exit code
  exitCode=1
Andreas Marek's avatar
Andreas Marek committed
115 116 117 118 119
  CLUSTER=""
  if [[ "$HOST" =~ "cobra" ]]
  then
    CLUSTER="cobra"
  fi
Andreas Marek's avatar
Andreas Marek committed
120 121 122 123 124 125 126 127
  if [[ "$HOST" =~ "talos" ]]
  then
    CLUSTER="talos"
  fi
  if [[ "$HOST" =~ "freya" ]]
  then
    CLUSTER="freya"
  fi
Andreas Marek's avatar
Andreas Marek committed
128 129 130 131
  if [[ "$HOST" =~ "draco" ]]
  then
    CLUSTER="draco"
  fi
Andreas Marek's avatar
Andreas Marek committed
132

133
  echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
Andreas Marek's avatar
Andreas Marek committed
134

135
  # GPU runners
Andreas Marek's avatar
Andreas Marek committed
136
  if [ "$gpuJob" == "yes" ]
Andreas Marek's avatar
Andreas Marek committed
137
  then
138
    cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
Andreas Marek's avatar
Andreas Marek committed
139
    echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
140
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
141
    echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
142 143 144
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
145
    echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh
146 147 148
    echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "#copy everything back from /tmp/elpa to runner directory"  >> ./run_${CLUSTER}_1node_2GPU.sh
    echo "cp -r * \$runner_path"  >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
149
    echo "cd .. && rm -rf /tmp/elpa_\$SLURM_JOB_ID" >> ./run_${CLUSTER}_1node_2GPU.sh
150

Andreas Marek's avatar
Andreas Marek committed
151 152
    echo " "
    echo "Job script for the run"
Andreas Marek's avatar
Andreas Marek committed
153
    cat ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
154 155
    echo " "
    echo "Submitting to SLURM"
Andreas Marek's avatar
Andreas Marek committed
156 157 158 159 160 161
    if sbatch -W ./run_${CLUSTER}_1node_2GPU.sh; then
      exitCode=$?
    else
      exitCode=$?
      echo "Submission exited with exitCode $exitCode"
    fi
Andreas Marek's avatar
Andreas Marek committed
162

163 164
    #if (( $exitCode > 0 ))
    #then
165
      cat ./ELPA_CI_2gpu.err.*
166
    #fi
167
    
168
  else
Andreas Marek's avatar
Andreas Marek committed
169

170 171 172 173 174 175 176 177 178 179 180 181 182 183
    #SSE, AVX, AVX2, and AVX-512 runners
    if [[ "$CI_RUNNER_TAGS" =~ "sse" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx2" ]]  || [ ["$CI_RUNNER_TAGS" =~ "avx512" ]]
    then
      cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
      echo "./configure " "$configureArgs"  >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "make -j 16 " >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
      echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
      echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \"  " >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "#copy everything back from /tmp/elpa to runner directory"  >> ./run_${CLUSTER}_1node.sh
      echo "cp -r * \$runner_path"  >> ./run_${CLUSTER}_1node.sh
Andreas Marek's avatar
Andreas Marek committed
184
      echo "cd .. && rm -rf /tmp/elpa_\$SLURM_JOB_ID" >> ./run_${CLUSTER}_1node.sh
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205

      echo " "
      echo "Job script for the run"
      cat ./run_${CLUSTER}_1node.sh
      echo " "
      echo "Submitting to SLURM"
      if sbatch -W ./run_${CLUSTER}_1node.sh; then
        exitCode=$?
      else
        exitCode=$?
        echo "Submission excited with exitCode $exitCode"
      fi

      echo " "
      echo "Exit Code of sbatch: $exitCode"
      echo " "
      cat ./ELPA_CI.out.*
      #if [ $exitCode -ne 0 ]
      #then
        cat ./ELPA_CI.err.*
      #fi
206

Andreas Marek's avatar
Andreas Marek committed
207
    fi
Andreas Marek's avatar
Andreas Marek committed
208
  fi
Andreas Marek's avatar
Andreas Marek committed
209 210
  if [ -f ./test-suite.log ]
  then
211
    cat ./test-suite.log
Andreas Marek's avatar
Andreas Marek committed
212
  fi
213 214

  exit $exitCode
Andreas Marek's avatar
Andreas Marek committed
215

Andreas Marek's avatar
Andreas Marek committed
216 217
fi

Andreas Marek's avatar
Andreas Marek committed
218 219 220 221 222 223 224 225 226 227

# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
  # this is the old, messy implementation for
  # - appdev
  # - freya-interactive
  # - draco
  # - buildtest
  # - virtual machine runners
Andreas Marek's avatar
Andreas Marek committed
228
  # hopefully this can be removed soon
Andreas Marek's avatar
Andreas Marek committed
229
  echo "Using old CI logic for appdev"
Andreas Marek's avatar
Andreas Marek committed
230 231
  if [ "$batchCommand" == "srun" ]
  then
Andreas Marek's avatar
Andreas Marek committed
232
    # use srun to start mpi jobs
Andreas Marek's avatar
Andreas Marek committed
233
    if [ "$interactiveRun" == "no" ]
Andreas Marek's avatar
Andreas Marek committed
234
    then
Andreas Marek's avatar
Andreas Marek committed
235
      # interactive runs are not possible
Andreas Marek's avatar
Andreas Marek committed
236
      echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
237
      # $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval  ./configure $configureArgs; }'
Andreas Marek's avatar
Andreas Marek committed
238 239 240 241 242 243 244 245 246 247
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"

      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=8 $SRUN_COMMANDLINE_BUILD ./ci_test_scripts/build_step.sh $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=2 $SRUN_COMMANDLINE_RUN ./ci_test_scripts/test_step.sh $mpiTasks $ompThreads "TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" "
      if [ $? -ne 0 ]; then exit 1; fi

      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
248
    else
Andreas Marek's avatar
Andreas Marek committed
249 250 251
      # interactive runs are possible
      #eval ./configure $configureArgs
      ./ci_test_scripts/configure_step.sh "$configureArgs"
252

Andreas Marek's avatar
Andreas Marek committed
253
      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
254
    
Andreas Marek's avatar
Andreas Marek committed
255 256
      make -j $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
257
    
Andreas Marek's avatar
Andreas Marek committed
258 259
      OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
      if [ $? -ne 0 ]; then exit 1; fi
260
     
Andreas Marek's avatar
Andreas Marek committed
261 262
      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
263

Andreas Marek's avatar
Andreas Marek committed
264
    fi
Andreas Marek's avatar
Andreas Marek committed
265

Andreas Marek's avatar
Andreas Marek committed
266
  else
Andreas Marek's avatar
Andreas Marek committed
267 268
    # do not use srun to start mpi applications
 
Andreas Marek's avatar
Andreas Marek committed
269
    #eval ./configure $configureArgs
270
    ./ci_test_scripts/configure_step.sh "$configureArgs"
Andreas Marek's avatar
Andreas Marek committed
271

Andreas Marek's avatar
Andreas Marek committed
272 273 274 275 276
    if [ $? -ne 0 ]; then cat config.log && exit 1; fi
    
    make -j $makeTasks
    if [ $? -ne 0 ]; then exit 1; fi
    
Andreas Marek's avatar
Andreas Marek committed
277
    OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
Andreas Marek's avatar
Andreas Marek committed
278 279 280 281 282
    if [ $? -ne 0 ]; then exit 1; fi
     
    grep -i "Expected %stop" test-suite.log && exit 1 || true ;
    if [ $? -ne 0 ]; then exit 1; fi
  fi
Andreas Marek's avatar
Andreas Marek committed
283 284 285
else
  # a submission to SLURM via a batch script will be done
  echo "Submitting to a SLURM batch system"
286
fi
Andreas Marek's avatar
Andreas Marek committed
287