diff --git a/BioEM_labbook.org b/BioEM_labbook.org index d9eae88c1a4abfa526e0b1c2c8c22276cbb3fa0e..a8cb57d72a5c14ce631efa1f124fd4dd977fb3ec 100644 --- a/BioEM_labbook.org +++ b/BioEM_labbook.org @@ -112,7 +112,7 @@ mpiexec -n 2 $BUILD_DIR/bioEM --Inputfile $TUTORIAL_DIR/Param_Input --Modelfile # Loading necessary modules if needed (check installation gcc/Intel) # Environment variables -export OMP_NUM_THREADS=5 +export OMP_NUM_THREADS=20 export OMP_PLACES=cores export FFTALGO=1 export GPU=1 @@ -123,13 +123,44 @@ export GPUWORKLOAD=100 INPUT_DIR="$HOME/BioEM_project/inputs" BUILD_DIR="$HOME/BioEM_project/build" -# Running only few iterations, reading already generated maps (only if they are avaialble in the same folder -BIOEM_DEBUG_BREAK=4 BIOEM_DEBUG_OUTPUT=2 mpiexec -n 2 $BUILD_DIR/bioEM --Inputfile $INPUT_DIR/INPUT_FRH_Sep2016 --Modelfile $INPUT_DIR/Mod_X-ray_PDB --Particlesfile $INPUT_DIR/2000FRH_Part --LoadMapDump +# Running only few iterations, reading already generated maps (only if they are available in the same folder +BIOEM_DEBUG_BREAK=20 BIOEM_DEBUG_OUTPUT=2 mpiexec -n 2 $BUILD_DIR/bioEM --Inputfile $INPUT_DIR/INPUT_FRH_Sep2016 --Modelfile $INPUT_DIR/Mod_X-ray_PDB --Particlesfile $INPUT_DIR/2000FRH_Part --LoadMapDump + +# To check if the execution provided correct results +/afs/ipp-garching.mpg.de/u/sluka/BioEM_fork/Tutorial_BioEM/MODEL_COMPARISION/subtract_LogP.sh $INPUT_DIR/Output_Probabilities_20_ref Output_Probabilities | tail # Running full example BIOEM_DEBUG_OUTPUT=0 mpiexec -n 2 $BUILD_DIR/bioEM --Inputfile $INPUT_DIR/INPUT_FRH_Sep2016 --Modelfile $INPUT_DIR/Mod_X-ray_PDB --Particlesfile $INPUT_DIR/2000FRH_Part #+END_SRC +**** Running multiple experiments example + +#+BEGIN_SRC +# Loading necessary modules if needed (check installation gcc/Intel) + +# Environment variables +export OMP_NUM_THREADS=20 +export OMP_PLACES=cores +export FFTALGO=1 +export GPU=1 +export GPUDEVICE=-1 +export GPUWORKLOAD=100 + +# Paths +INPUT_DIR="$HOME/BioEM_project/inputs" +BUILD_DIR="$HOME/BioEM_project/build" + +echo "Time, Workload, GPUs, OMP_THREADS" > results.csv + +# Running multiple experiments +res=$(BIOEM_DEBUG_BREAK=50 BIOEM_DEBUG_OUTPUT=0 BIOEM_AUTOTUNING=0 GPUWORKLOAD=100 OMP_NUM_THREADS=5 mpiexec -n 2 $BUILD_DIR/bioEM --Inputfile $INPUT_DIR/INPUT_FRH_Sep2016 --Modelfile $INPUT_DIR/Mod_X-ray_PDB --Particlesfile $INPUT_DIR/2000FRH_Part --LoadMapDump | tail -1 | cut -d ' ' -f 5) + +# Write results into file +echo "$res, 100, 1, 5" >> results.csv + +#+END_SRC + + *** [error] miy (Minsky) machine - problems at CUDA runtime **** Machine description - 3 identical machine with IBM Power 8+ architecture @@ -1129,3 +1160,51 @@ bioEM: tpp.c:63: __pthread_tpp_change_priority: Assertion `new_prio == -1 || (ne + Just measure the timings on GPUs and CPUs for an iteration, and then from that derive the optimal balance + Possibly rebalance every X iterations (or projections/orientations) if the balance changed +** 2017-06-16 +*** TODO Developing autotuning on dvl machine [2/4] + :LOGBOOK: + - State "TODO" from [2017-06-16 Fri 16:58] + :END: + - Added nicer error detection and print for the Driver CUDA errors + - Problem on dvl device was related to the part that tests all CUDA devices, searching for the fastest one. Commenting out this part made both MPI and non-MPI executions possible. + - [ ] Still it seems that when initializing device 1 then 0, this causes a problem (initialization 0 then 1 seems to be fine). Need to inspect this problem in more details + - [X] When changing the code, do constantly checks if numerically everything is still correct. Possibly rely on subtract_LogP.sh script available in Tutorial_Bio/MODEL_COMPARISON + + If comparing only first 20 models, there is significant difference between the obtained Output_Probabilities and the ones sent by Markus together with the inputs. Hence another Output_Probabilities_20_ref as a reference was created + - [X] When changing workload during the execution, never getting the best performance for that workload (compared to when the same workload is tested without autotuning).=Actually, this was solved by doing deviceFinishRun() -> deviceStartRun(). These functions introduce overhead, but it seems that they are necessary + - [ ] Create a script for typical runs + +*** Simple analysis of the results + +#+begin_src R :results output graphics :file :file (org-babel-temp-file "figure" ".png") :exports both :width 600 :height 400 :session org-R +library(ggplot2) + +df <- read.csv("data/results1.csv") + +df$Workload <- factor(df$Workload, levels=c(" 60%", " 80%", " 100%", " Auto")) +df$GPUs <- as.factor(as.character(paste("#GPUs =", df$GPUs))) +df$OMP_THREADS <- as.factor(as.character(paste("#OMPs =", df$OMP_THREADS))) +df$OMP_THREADS <- factor(df$OMP_THREADS, levels=c("#OMPs = 5", "#OMPs = 20")) + +ggplot(df, aes(x=Workload, y=Time, fill=Workload)) + geom_bar(stat="identity") + facet_grid(GPUs ~ OMP_THREADS) + theme_bw() + ylab("Overall execution time [s]") + xlab("Workload processed on GPUs") +#+end_src + +#+RESULTS: +[[file:/tmp/babel-2425YP4/figure2425NIt.png]] + + - Maybe it would be good to save these results + +#+begin_src R :results output graphics :file analysis/results1_analysis.pdf :exports both :width 6 :height 4 :session org-R +library(ggplot2) + +df <- read.csv("data/results1.csv") + +df$Workload <- factor(df$Workload, levels=c(" 60%", " 80%", " 100%", " Auto")) +df$GPUs <- as.factor(as.character(paste("#GPUs =", df$GPUs))) +df$OMP_THREADS <- as.factor(as.character(paste("#OMPs =", df$OMP_THREADS))) +df$OMP_THREADS <- factor(df$OMP_THREADS, levels=c("#OMPs = 5", "#OMPs = 20")) + +ggplot(df, aes(x=Workload, y=Time, fill=Workload)) + geom_bar(stat="identity") + facet_grid(GPUs ~ OMP_THREADS) + theme_bw() + ylab("Overall execution time [s]") + xlab("Workload processed on GPUs") +#+end_src + +#+RESULTS: +[[file:analysis/results1_analysis.pdf]] diff --git a/analysis/results1_analysis.pdf b/analysis/results1_analysis.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9b072a9983d5f52bfde1d2e7869fb0af632371eb Binary files /dev/null and b/analysis/results1_analysis.pdf differ