run_ci_tests.sh 6.88 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
#!/bin/bash
set -e
set -x

#some defaults
makeTasks=1
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
13
skipStep=0
Andreas Marek's avatar
Andreas Marek committed
14
batchCommand=""
Andreas Marek's avatar
Andreas Marek committed
15
interactiveRun="yes"
16
slurmBatch="no"
17 18 19 20 21 22 23

function usage() {
	cat >&2 <<-EOF

		Call all the necessary steps to perform an ELPA CI test

		Usage:
24
		  run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm]"
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

		Options:
		 -c configure arguments
		    Line of arguments passed to configure call
		 -t MPI Tasks
		    Number of MPI processes used during test runs of ELPA tests

		 -m Matrix size
		    Size of the mxm matrix used during runs of ELPA tests

		 -n Number of eigenvectors
		    Number of eigenvectors to be computed during runs of ELPA tests

		 -b block size
		    Block size of block-cyclic distribution during runs of ELPA tests

		 -o OpenMP threads
		    Number of OpenMP threads used during runs of ELPA tests

44
		 -j makeTasks
45
		    Number of processes make should use during build (default 1)
46 47 48 49

		 -s skipStep
		    Skip the test run if 1 (default 0)

Andreas Marek's avatar
Andreas Marek committed
50 51 52
		 -q submit command
		    Job steps will be submitted via command to a batch system (default no submission)

Andreas Marek's avatar
Andreas Marek committed
53 54 55
		 -i interactive_run
		    if "yes" NO no batch command will be triggered

56 57 58
		 -S submit to slurm
		    if "yes" a SLURM batch job will be submitted

59 60 61 62 63 64
		 -h
		    Print this help text
	EOF
}


65
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
66 67 68 69 70 71 72 73 74 75 76 77 78 79
	case $opt in
		j)
			makeTasks=$OPTARG;;
		t)
			mpiTasks=$OPTARG;;
		m)
			matrixSize=$OPTARG;;
		n)
			nrEV=$OPTARG;;
		b)
			blockSize=$OPTARG;;
		o)
			ompThreads=$OPTARG;;
		c)
80 81 82
			configureArgs=$OPTARG;;
		s)
			skipStep=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
83 84
		q)
			batchCommand=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
85 86
		i)
			interactiveRun=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
87
		S)
88
			slurmBatch=$OPTARG;;
89 90 91 92 93 94 95 96 97 98
		:)
			echo "Option -$OPTARG requires an argument" >&2;;
		h)
			usage
			exit 1;;
		*)
			exit 1;;
	esac
done

Andreas Marek's avatar
Andreas Marek committed
99

Andreas Marek's avatar
Andreas Marek committed
100
if [ $skipStep -eq 1 ]
101 102 103
then
  echo "Skipping the test since option -s has been specified"
  exit 0
104
fi
Andreas Marek's avatar
Andreas Marek committed
105 106 107 108 109 110 111
if [ "$slurmBatch" == "yes" ]
then
  CLUSTER=""
  if [[ "$HOST" =~ "cobra" ]]
  then
    CLUSTER="cobra"
  fi
Andreas Marek's avatar
Andreas Marek committed
112 113 114 115 116 117 118 119
  if [[ "$HOST" =~ "talos" ]]
  then
    CLUSTER="talos"
  fi
  if [[ "$HOST" =~ "freya" ]]
  then
    CLUSTER="freya"
  fi
Andreas Marek's avatar
Andreas Marek committed
120 121 122 123
  if [[ "$HOST" =~ "draco" ]]
  then
    CLUSTER="draco"
  fi
Andreas Marek's avatar
Andreas Marek committed
124 125 126 127 128 129


  if [ "$CLUSTER" == "cobra" ]
  then
    echo "Running on cobra with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"

Andreas Marek's avatar
Andreas Marek committed
130
    # GPU runners
Andreas Marek's avatar
Andreas Marek committed
131 132
    if [ "$CI_RUNNER_TAGS" == "gpu" ]
    then
Andreas Marek's avatar
Andreas Marek committed
133 134 135 136 137 138 139 140
      cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
      echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
141

Andreas Marek's avatar
Andreas Marek committed
142
      sbatch -W ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
143 144 145

      exitCode=$?
      cat ./ELPA_CI_2gpu.out.*
Andreas Marek's avatar
Andreas Marek committed
146 147 148 149
      if (( $exitCode > 0 ))
      then
        cat ./ELPA_CI_2gpu.err.*
      fi
Andreas Marek's avatar
Andreas Marek committed
150 151
      
    fi
Andreas Marek's avatar
Andreas Marek committed
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175

    #SSE, AVX, AVX2, and AVX-512 runners
    if [ "$CI_RUNNER_TAGS" == "sse" ] || [ "$CI_RUNNER_TAGS" == "avx" ] || [ "$CI_RUNNER_TAGS" == "avx2" ]  || [ "$CI_RUNNER_TAGS" == "avx512" ]
    then
      cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
      echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "make -j 16" >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
      echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
      echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node.sh

      sbatch -W ./run_COBRA_1node.sh

      exitCode=$?
      cat ./ELPA_CI.out.*
      if (( $exitCode > 0 ))
      then
        cat ./ELPA_CI.err.*
      fi

    fi

Andreas Marek's avatar
Andreas Marek committed
176 177 178 179
    if (( $exitCode > 0 ))
    then
      cat ./test-suite.log
    fi
Andreas Marek's avatar
Andreas Marek committed
180

Andreas Marek's avatar
Andreas Marek committed
181 182
    exit $exitCode
  fi
Andreas Marek's avatar
Andreas Marek committed
183 184


Andreas Marek's avatar
Andreas Marek committed
185 186
fi

187 188 189 190 191 192 193 194 195 196

# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
  # this is the old, messy implementation for
  # - appdev
  # - freya-interactive
  # - draco
  # - buildtest
  # - virtual machine runners
Andreas Marek's avatar
Andreas Marek committed
197
  # hopefully this can be removed soon
198
  echo "Using old CI logic for appdev"
Andreas Marek's avatar
Andreas Marek committed
199 200
  if [ "$batchCommand" == "srun" ]
  then
201
    # use srun to start mpi jobs
Andreas Marek's avatar
Andreas Marek committed
202
    if [ "$interactiveRun" == "no" ]
Andreas Marek's avatar
Andreas Marek committed
203
    then
204
      # interactive runs are not possible
Andreas Marek's avatar
Andreas Marek committed
205
      echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
206
      # $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval  ./configure $configureArgs; }'
Andreas Marek's avatar
Andreas Marek committed
207 208 209 210 211 212 213 214 215 216
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"

      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=8 $SRUN_COMMANDLINE_BUILD ./ci_test_scripts/build_step.sh $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=2 $SRUN_COMMANDLINE_RUN ./ci_test_scripts/test_step.sh $mpiTasks $ompThreads "TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" "
      if [ $? -ne 0 ]; then exit 1; fi

      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
217
    else
218 219 220
      # interactive runs are possible
      #eval ./configure $configureArgs
      ./ci_test_scripts/configure_step.sh "$configureArgs"
221

222
      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
223
    
224 225
      make -j $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
226
    
227 228
      OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
      if [ $? -ne 0 ]; then exit 1; fi
229
     
230 231
      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
232

Andreas Marek's avatar
Andreas Marek committed
233
    fi
234

Andreas Marek's avatar
Andreas Marek committed
235
  else
236 237
    # do not use srun to start mpi applications
 
Andreas Marek's avatar
Andreas Marek committed
238
    #eval ./configure $configureArgs
239
    ./ci_test_scripts/configure_step.sh "$configureArgs"
Andreas Marek's avatar
Andreas Marek committed
240

Andreas Marek's avatar
Andreas Marek committed
241 242 243 244 245
    if [ $? -ne 0 ]; then cat config.log && exit 1; fi
    
    make -j $makeTasks
    if [ $? -ne 0 ]; then exit 1; fi
    
Andreas Marek's avatar
Andreas Marek committed
246
    OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
Andreas Marek's avatar
Andreas Marek committed
247 248 249 250 251
    if [ $? -ne 0 ]; then exit 1; fi
     
    grep -i "Expected %stop" test-suite.log && exit 1 || true ;
    if [ $? -ne 0 ]; then exit 1; fi
  fi
252 253 254
else
  # a submission to SLURM via a batch script will be done
  echo "Submitting to a SLURM batch system"
255
fi
256