run_ci_tests.sh 6.82 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
#!/bin/bash
set -e
set -x

#some defaults
makeTasks=1
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
13
skipStep=0
Andreas Marek's avatar
Andreas Marek committed
14
batchCommand=""
Andreas Marek's avatar
Andreas Marek committed
15
interactiveRun="yes"
16
slurmBatch="no"
17 18 19 20 21 22 23

function usage() {
	cat >&2 <<-EOF

		Call all the necessary steps to perform an ELPA CI test

		Usage:
24
		  run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm]"
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

		Options:
		 -c configure arguments
		    Line of arguments passed to configure call
		 -t MPI Tasks
		    Number of MPI processes used during test runs of ELPA tests

		 -m Matrix size
		    Size of the mxm matrix used during runs of ELPA tests

		 -n Number of eigenvectors
		    Number of eigenvectors to be computed during runs of ELPA tests

		 -b block size
		    Block size of block-cyclic distribution during runs of ELPA tests

		 -o OpenMP threads
		    Number of OpenMP threads used during runs of ELPA tests

44
		 -j makeTasks
45
		    Number of processes make should use during build (default 1)
46 47 48 49

		 -s skipStep
		    Skip the test run if 1 (default 0)

Andreas Marek's avatar
Andreas Marek committed
50 51 52
		 -q submit command
		    Job steps will be submitted via command to a batch system (default no submission)

Andreas Marek's avatar
Andreas Marek committed
53 54 55
		 -i interactive_run
		    if "yes" NO no batch command will be triggered

56 57 58
		 -S submit to slurm
		    if "yes" a SLURM batch job will be submitted

59 60 61 62 63 64
		 -h
		    Print this help text
	EOF
}


65
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
66 67 68 69 70 71 72 73 74 75 76 77 78 79
	case $opt in
		j)
			makeTasks=$OPTARG;;
		t)
			mpiTasks=$OPTARG;;
		m)
			matrixSize=$OPTARG;;
		n)
			nrEV=$OPTARG;;
		b)
			blockSize=$OPTARG;;
		o)
			ompThreads=$OPTARG;;
		c)
80 81 82
			configureArgs=$OPTARG;;
		s)
			skipStep=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
83 84
		q)
			batchCommand=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
85 86
		i)
			interactiveRun=$OPTARG;;
Andreas Marek's avatar
Andreas Marek committed
87
		S)
88
			slurmBatch=$OPTARG;;
89 90 91 92 93 94 95 96 97 98
		:)
			echo "Option -$OPTARG requires an argument" >&2;;
		h)
			usage
			exit 1;;
		*)
			exit 1;;
	esac
done

Andreas Marek's avatar
Andreas Marek committed
99

Andreas Marek's avatar
Andreas Marek committed
100
if [ $skipStep -eq 1 ]
101 102 103
then
  echo "Skipping the test since option -s has been specified"
  exit 0
104
fi
Andreas Marek's avatar
Andreas Marek committed
105 106 107 108 109 110 111
if [ "$slurmBatch" == "yes" ]
then
  CLUSTER=""
  if [[ "$HOST" =~ "cobra" ]]
  then
    CLUSTER="cobra"
  fi
Andreas Marek's avatar
Andreas Marek committed
112 113 114 115 116 117 118 119
  if [[ "$HOST" =~ "talos" ]]
  then
    CLUSTER="talos"
  fi
  if [[ "$HOST" =~ "freya" ]]
  then
    CLUSTER="freya"
  fi
Andreas Marek's avatar
Andreas Marek committed
120 121 122 123 124 125


  if [ "$CLUSTER" == "cobra" ]
  then
    echo "Running on cobra with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"

Andreas Marek's avatar
Andreas Marek committed
126
    # GPU runners
Andreas Marek's avatar
Andreas Marek committed
127 128
    if [ "$CI_RUNNER_TAGS" == "gpu" ]
    then
Andreas Marek's avatar
Andreas Marek committed
129 130 131 132 133 134 135 136
      cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
      echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
      echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
137

Andreas Marek's avatar
Andreas Marek committed
138
      sbatch -W ./run_${CLUSTER}_1node_2GPU.sh
Andreas Marek's avatar
Andreas Marek committed
139 140 141

      exitCode=$?
      cat ./ELPA_CI_2gpu.out.*
Andreas Marek's avatar
Andreas Marek committed
142 143 144 145
      if (( $exitCode > 0 ))
      then
        cat ./ELPA_CI_2gpu.err.*
      fi
Andreas Marek's avatar
Andreas Marek committed
146 147
      
    fi
Andreas Marek's avatar
Andreas Marek committed
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171

    #SSE, AVX, AVX2, and AVX-512 runners
    if [ "$CI_RUNNER_TAGS" == "sse" ] || [ "$CI_RUNNER_TAGS" == "avx" ] || [ "$CI_RUNNER_TAGS" == "avx2" ]  || [ "$CI_RUNNER_TAGS" == "avx512" ]
    then
      cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
      echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "make -j 16" >> ./run_${CLUSTER}_1node.sh
      echo " " >> ./run_${CLUSTER}_1node.sh
      echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
      echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
      echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node.sh

      sbatch -W ./run_COBRA_1node.sh

      exitCode=$?
      cat ./ELPA_CI.out.*
      if (( $exitCode > 0 ))
      then
        cat ./ELPA_CI.err.*
      fi

    fi

Andreas Marek's avatar
Andreas Marek committed
172 173 174 175
    if (( $exitCode > 0 ))
    then
      cat ./test-suite.log
    fi
Andreas Marek's avatar
Andreas Marek committed
176

Andreas Marek's avatar
Andreas Marek committed
177 178
    exit $exitCode
  fi
Andreas Marek's avatar
Andreas Marek committed
179 180


Andreas Marek's avatar
Andreas Marek committed
181 182
fi

183 184 185 186 187 188 189 190 191 192

# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
  # this is the old, messy implementation for
  # - appdev
  # - freya-interactive
  # - draco
  # - buildtest
  # - virtual machine runners
Andreas Marek's avatar
Andreas Marek committed
193
  # hopefully this can be removed soon
194
  echo "Using old CI logic for appdev"
Andreas Marek's avatar
Andreas Marek committed
195 196
  if [ "$batchCommand" == "srun" ]
  then
197
    # use srun to start mpi jobs
Andreas Marek's avatar
Andreas Marek committed
198
    if [ "$interactiveRun" == "no" ]
Andreas Marek's avatar
Andreas Marek committed
199
    then
200
      # interactive runs are not possible
Andreas Marek's avatar
Andreas Marek committed
201
      echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
202
      # $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval  ./configure $configureArgs; }'
Andreas Marek's avatar
Andreas Marek committed
203 204 205 206 207 208 209 210 211 212
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"

      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=8 $SRUN_COMMANDLINE_BUILD ./ci_test_scripts/build_step.sh $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
      $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=2 $SRUN_COMMANDLINE_RUN ./ci_test_scripts/test_step.sh $mpiTasks $ompThreads "TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" "
      if [ $? -ne 0 ]; then exit 1; fi

      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
213
    else
214 215 216
      # interactive runs are possible
      #eval ./configure $configureArgs
      ./ci_test_scripts/configure_step.sh "$configureArgs"
217

218
      if [ $? -ne 0 ]; then cat config.log && exit 1; fi
219
    
220 221
      make -j $makeTasks
      if [ $? -ne 0 ]; then exit 1; fi
222
    
223 224
      OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
      if [ $? -ne 0 ]; then exit 1; fi
225
     
226 227
      grep -i "Expected %stop" test-suite.log && exit 1 || true ;
      if [ $? -ne 0 ]; then exit 1; fi
228

Andreas Marek's avatar
Andreas Marek committed
229
    fi
230

Andreas Marek's avatar
Andreas Marek committed
231
  else
232 233
    # do not use srun to start mpi applications
 
Andreas Marek's avatar
Andreas Marek committed
234
    #eval ./configure $configureArgs
235
    ./ci_test_scripts/configure_step.sh "$configureArgs"
Andreas Marek's avatar
Andreas Marek committed
236

Andreas Marek's avatar
Andreas Marek committed
237 238 239 240 241
    if [ $? -ne 0 ]; then cat config.log && exit 1; fi
    
    make -j $makeTasks
    if [ $? -ne 0 ]; then exit 1; fi
    
Andreas Marek's avatar
Andreas Marek committed
242
    OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
Andreas Marek's avatar
Andreas Marek committed
243 244 245 246 247
    if [ $? -ne 0 ]; then exit 1; fi
     
    grep -i "Expected %stop" test-suite.log && exit 1 || true ;
    if [ $? -ne 0 ]; then exit 1; fi
  fi
248 249 250
else
  # a submission to SLURM via a batch script will be done
  echo "Submitting to a SLURM batch system"
251
fi
252