Commit fffcad08 authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master_pre_stage' of https://gitlab.mpcdf.mpg.de/elpa/elpa into master_pre_stage

parents a490a3f4 ec8bc696
......@@ -111,42 +111,19 @@
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: na, lda, nblk, nbw, matrixCols, numBlocks, mpi_comm_rows, mpi_comm_cols
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: a(lda,*), tmat(nbw,nbw,*)
#else
real(kind=REAL_DATATYPE) :: a(lda,matrixCols), tmat(nbw,nbw,numBlocks)
#endif
#endif
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*), tmat(nbw,nbw,*)
MATH_DATATYPE(kind=rck) :: a(lda,*), tmat(nbw,nbw,*)
#else
complex(kind=COMPLEX_DATATYPE) :: a(lda,matrixCols), tmat(nbw,nbw,numBlocks)
MATH_DATATYPE(kind=rck) :: a(lda,matrixCols), tmat(nbw,nbw,numBlocks)
#endif
#endif /* COMPLEXCASE */
#if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL
real(kind=REAL_DATATYPE), parameter :: ZERO = 0.0_rk8, ONE = 1.0_rk8
#else
real(kind=REAL_DATATYPE), parameter :: ZERO = 0.0_rk4, ONE = 1.0_rk4
#endif
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=COMPLEX_DATATYPE), parameter :: ZERO = (0.0_rk8, 0.0_rk8), ONE = (1.0_rk8, 0.0_rk8)
#else
complex(kind=COMPLEX_DATATYPE), parameter :: ZERO = (0.0_rk4, 0.0_rk4), ONE = (1.0_rk4, 0.0_rk4)
real(kind=rk) :: eps
#endif
#endif /* COMPLEXCASE == 1 */
logical, intent(in) :: useGPU
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
......@@ -161,32 +138,19 @@
integer(kind=ik) :: istep, ncol, lch, lcx, nlc
integer(kind=ik) :: tile_size, l_rows_tile, l_cols_tile
real(kind=REAL_DATATYPE) :: vnorm2
#if REALCASE == 1
real(kind=REAL_DATATYPE) :: xf, aux1(nbw), aux2(nbw), vrl, tau, vav(nbw,nbw)
#endif
#if COMPLEXCASE == 1
complex(kind=COMPLEX_DATATYPE) :: xf, aux1(nbw), aux2(nbw), vrl, tau, vav(nbw,nbw)
#endif
real(kind=rk) :: vnorm2
MATH_DATATYPE(kind=rck) :: xf, aux1(nbw), aux2(nbw), vrl, tau, vav(nbw,nbw)
#if COMPLEXCASE == 1
! complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:,:), vmrCUDA(:,:), umcCUDA(:,:) ! note the different dimension in real case
complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:), vmrCUDA(:), umcCUDA(:)
complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCPU(:,:), vmrCPU(:,:), umcCPU(:,:)
complex(kind=COMPLEX_DATATYPE), allocatable :: vr(:)
#endif
#if REALCASE == 1
real(kind=REAL_DATATYPE), allocatable :: tmpCUDA(:), vmrCUDA(:), umcCUDA(:)
real(kind=REAL_DATATYPE), allocatable :: tmpCPU(:,:), vmrCPU(:,:), umcCPU(:,:)
real(kind=REAL_DATATYPE), allocatable :: vr(:)
#endif
MATH_DATATYPE(kind=rck), allocatable :: tmpCUDA(:), vmrCUDA(:), umcCUDA(:)
MATH_DATATYPE(kind=rck), allocatable :: tmpCPU(:,:), vmrCPU(:,:), umcCPU(:,:)
MATH_DATATYPE(kind=rck), allocatable :: vr(:)
#if REALCASE == 1
! needed for blocked QR decomposition
integer(kind=ik) :: PQRPARAM(11), work_size
real(kind=REAL_DATATYPE) :: dwork_size(1)
real(kind=REAL_DATATYPE), allocatable :: work_blocked(:), tauvector(:), blockheuristic(:)
real(kind=rk) :: dwork_size(1)
real(kind=rk), allocatable :: work_blocked(:), tauvector(:), blockheuristic(:)
#endif
! a_dev is passed from bandred_real to trans_ev_band
integer(kind=C_intptr_T) :: a_dev, vmr_dev, umc_dev, tmat_dev, vav_dev
......
This diff is collapsed.
......@@ -501,15 +501,7 @@ program test
#if defined(TEST_EIGENVECTORS) || defined(TEST_QR_DECOMPOSITION)
#ifdef TEST_MATRIX_ANALYTIC
!
!#if defined(TEST_MATRIX_ANALYTIC)
status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, check_all_evals)
call check_status(status, myid)
if (.true.) then
! also check residuals
status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol)
call check_status(status, myid)
endif
#else
!#elif defined(TEST_MATRIX_FRANK)
! status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol)
......
#!/usr/bin/env python
from itertools import product
from scaling import *
output_dir = "out"
template_file = "run_template_hydra.sh"
#elpa_method = ['elpa1', 'elpa2']
elpa_method = ['elpa1', 'elpa2', 'scalapack_all', 'scalapack_part']
#elpa_method = ['scalapack_part']
math_type = ['real', 'complex']
precision = ['single', 'double']
mat_size = [5000, 20000]
proc_eigen = [10,50,100]
block_size = [16]
num_nodes = [1]
#num_nodes.extend([2**i for i in range(2,11)])
num_nodes.extend([2**i for i in range(2,7)])
#num_nodes = [2048]
#===============================================================================================
#===============================================================================================
# the rest of the script should be changed only if something changed (etc. in elpa)
#===============================================================================================
#===============================================================================================
for em, mt, pr, ms, pe, bs, nn in product(elpa_method, math_type, precision, mat_size, proc_eigen, block_size, num_nodes):
tokens = {}
tokens['_BLOCK_SIZE_'] = bs
tokens['_MAT_SIZE_'] = ms·
tokens['_NUM_EIGEN_'] = ms * pe // 100
tokens['_NUM_NODES_'] = nn
variant(output_dir, template_file, tokens, em, mt, pr)
#! /bin/bash
echo nodes total tridiag solve trans_ev
for f in *.txt
do
#echo "processing $f... "
S=`grep " node = " $f | awk '{print $5}'`
TOTAL=`grep "e%eigenvectors()" $f | awk '{print $3}'`
if [[ -z "$TOTAL" ]]; then
continue
fi
S+=" "$TOTAL
S+=" "`grep "|_ tridiag_" $f | awk '{print $3}'`
S+=" "`grep "|_ solve " $f | awk '{print $3}'`
S+=" "`grep "|_ trans_ev" $f | awk '{print $3}'`
echo $S
done
#! /bin/bash
echo nodes total bandred tridiag solve trans_ev_to_band trans_ev_to_full
for f in *.txt
do
#echo "processing $f... "
S=`grep " node = " $f | awk '{print $5}'`
TOTAL=`grep "e%eigenvectors()" $f | awk '{print $3}'`
if [[ -z "$TOTAL" ]]; then
continue
fi
S+=" "$TOTAL
S+=" "`grep "|_ bandred " $f | awk '{print $3}'`
S+=" "`grep "|_ tridiag " $f | awk '{print $3}'`
S+=" "`grep "|_ solve " $f | awk '{print $3}'`
S+=" "`grep "|_ trans_ev_to_band " $f | awk '{print $3}'`
S+=" "`grep "|_ trans_ev_to_full " $f | awk '{print $3}'`
echo $S
done
#! /bin/bash
echo nodes total
for f in *.txt
do
#echo "processing $f... "
S=`grep " node = " $f | awk '{print $5}'`
TOTAL=`grep "e%eigenvectors()" $f | awk '{print $3}'`
if [[ -z "$TOTAL" ]]; then
continue
fi
S+=" "$TOTAL
echo $S
done
#! /usr/bin/env python
import numpy as np
import matplotlib.pyplot as plt
import os
print("PLOTING ...")
group_colors = [['red', 'firebrick', 'indianred', 'tomato', 'maroon', 'salmon'],
['green', 'darkgreen', 'springgreen', 'darkseagreen', 'lawngreen', 'yellowgreen'],
['blue', 'darkblue', 'cornflowerblue', 'dodgerblue', 'midnightblue', 'lightskyblue'],
['magenta', 'darkviolet', 'mediumvioletred', 'orchid', 'deeppink', 'purple'],
['orange', 'gold', 'navajowhite', 'darkorange', 'goldenrod', 'sandybrown'],
['cyan', 'darkcyan', 'lightseagreen', 'turquoise', 'darkturquoise', 'mediumturquoise']]
group_symbols = ['o', 's', '*', 'D', 'x', 'H']
elpa1_subtimes = ["tridiag", "solve", "trans_ev"]
elpa2_subtimes = ["bandred", "tridiag", "solve", "trans_ev_to_band", "trans_ev_to_full"]
cores_per_node = 20
base_paths = ["results", "results2"]
num_type = "real"
prec = "double"
mat_size = 5000
def scalapack_name(num, pr, all_ev):
if(num_type == "real"):
if(pr == "single"):
name = "pssyev"
else:
name = "pdsyev"
else:
if(pr == "single"):
name = "pcheev"
else:
name = "pzheev"
if(all_ev):
name += "d"
else:
name += "r"
return name
def line(what, mat_size, proc_evec, method, label, color, style):
data_line_res = []
nodes_res = []
for base_path in base_paths:
path = "/".join([base_path,num_type,prec,str(mat_size),str(mat_size*proc_evec//100),method,"tab.txt"])
#print(path)
if not os.path.isfile(path):
continue
data = np.genfromtxt(path, names=True)
nodes = data['nodes']
data_line = data[what]
#print("data_line", data_line, "data_line_res", data_line_res)
if(nodes_res == []):
assert(data_line_res == [])
nodes_res = nodes
data_line_res = data_line
else:
assert(all(nodes == nodes_res))
data_line_res = np.minimum(data_line_res, data_line)
cores = cores_per_node * nodes_res
#print(cores, data_line_res)
plt.plot(cores,data_line_res, style, label=label, color=color, linewidth=2)
def plot1():
line("total", mat_size, 100, "pdsyevd", "MKL 2017, " + scalapack_name(num_type, prec, True), "black", "x-")
line("total", mat_size, 100, "pdsyevr", "MKL 2017, " + scalapack_name(num_type, prec, True) + ", 100% EVs", "blue", "x-")
line("total", mat_size, 50, "pdsyevr", "MKL 2017, " + scalapack_name(num_type, prec, True) + ", 50% EVs", "green", "x-")
line("total", mat_size, 10, "pdsyevr", "MKL 2017, " + scalapack_name(num_type, prec, True) + ", 10% EVs", "red", "x-")
line("total", mat_size, 100, "elpa1", "ELPA 1, 100% EVs", "blue", "*--")
line("total", mat_size, 50, "elpa1", "ELPA 1, 50% EVs", "green", "*--")
line("total", mat_size, 10, "elpa1", "ELPA 1, 10% EVs", "red", "*--")
line("total", mat_size, 100, "elpa2", "ELPA 2, 100% EVs", "blue", "o:")
line("total", mat_size, 50, "elpa2", "ELPA 2, 50% EVs", "green", "o:")
line("total", mat_size, 10, "elpa2", "ELPA 2, 10% EVs", "red", "o:")
def details(proc_ev):
for i in range(len(elpa1_subtimes)):
line(elpa1_subtimes[i], mat_size, proc_ev, "elpa1", "ELPA1 - " + elpa1_subtimes[i], group_colors[0][i], group_symbols[2*i] + '-')
for i in range(len(elpa2_subtimes)):
line(elpa2_subtimes[i], mat_size, proc_ev, "elpa2", "ELPA2 - " + elpa2_subtimes[i], group_colors[1][i], group_symbols[i] + '-')
fig = plt.figure(figsize=(15, 10))
ax = fig.add_subplot(111)
ax.tick_params(labelright='on')
plot1()
#details(100)
#plt.title('Num CPUs ' + str(num_cpus) + ' and ' + str(eigenvectors_percent) + '% eigenvectors, ' + numtype)
#plt.title('Num CPUs ')
plt.title("Matrix " + str(mat_size//1000) + "k, " + num_type + ", " + prec)
plt.grid()
plt.legend(loc=1)
plt.xlabel('Number of cores')
plt.ylabel('Execution time [s]')
plt.xscale('log')
plt.yscale('log')
ax.xaxis.grid(b=True, which='major', color='black', linestyle=':')
ax.yaxis.grid(b=True, which='major', color='black', linestyle='--')
ax.yaxis.grid(b=True, which='minor', color='black', linestyle=':')
ticks = [20* 2**i for i in range(0,12)]
ax.xaxis.set_ticks(ticks)
ax.xaxis.set_ticklabels(ticks)
if(mat_size < 10000):
y_min = 0.1
y_max = 50
else:
y_min = 5
y_max = 500
yticks_major = [1,10,100,1000, y_min, y_max]
ax.yaxis.set_ticks(yticks_major)
ax.yaxis.set_ticklabels(yticks_major)
# yticks_minor = [2, 5, 20, 50, 200, 500]
# ax.yaxis.set_ticks(yticks_minor, minor=True)
# ax.yaxis.set_ticklabels(yticks_minor, minor=True)
plt.ylim([y_min, y_max])
plt.xlim([20, 41000])
plt.savefig('plot.pdf')
#if show:
plt.show()
#plt.close()
#!/bin/bash
column=${1:-2}
read x
echo set terminal dumb
echo set logscale xy
echo plot \"-\" u 1:$column with lines title \""`echo $x | awk '{print $"'"$column"'"}'`"\"
echo "#" $x
while read x;
do echo $x;
done
#! /usr/bin/env python
import os
import subprocess
rootdir = "results"
path = subprocess.check_output('pwd')[:-1] + "/"
for subdir, dirs, files in os.walk(rootdir):
# for file in files:
# print os.path.join(subdir, file)
# print subdir, dirs, files
if(len(files) != 0):
#print subdir, dirs, files
#print("cd " + path + subdir)
os.chdir(path + subdir)
method = subdir.split("/")[-1]
with open("tab.txt", "w") as outfile:
if(method == "elpa1"):
subprocess.call("parse_elpa1", stdout=outfile)
elif(method == "elpa2"):
subprocess.call("parse_elpa2", stdout=outfile)
else:
subprocess.call("parse_mkl", stdout=outfile)
os.chdir(path)
# @ shell=/bin/bash
#
# Sample script for LoadLeveler
#
## @ class = test
# @ task_affinity = core(1)
# @ error = _OUTPUT_DIR_/$(jobid).err
# @ output = _OUTPUT_DIR_/$(jobid).out
# @ job_type = parallel
# @ node_usage= not_shared
# @ node = _NUM_NODES_·
# @ tasks_per_node = 20
# xxx @ first_node_tasks = 20
#### @ resources = ConsumableCpus(1)
##### @ node_resources = ConsumableMemory(2GB)
# @ network.MPI = sn_all,not_shared,us
# @ wall_clock_limit = 00:20:00
# @ notification = never
# @ notify_user = $(user)@rzg.mpg.de
# @ queue·
# run the program
cd ..
OUTPUT_FILE=run/_OUTPUT_DIR_/${LOADL_STEP_ID}.txt
cat $0 | grep "# @" >> ${OUTPUT_FILE}
echo BUILD_DIR= `pwd` >> $OUTPUT_FILE
echo "Modules loaded at config-time" >> $OUTPUT_FILE
cat modules_config.log >> $OUTPUT_FILE
source ./load_modules.sh
echo "Modules loaded at run-time" >> $OUTPUT_FILE
module list >> $OUTPUT_FILE 2>&1
#echo "ulimit -s" >> $OUTPUT_FILE
#ulimit -s >> $OUTPUT_FILE
#echo "List of hosts" >> $OUTPUT_FILE
#cat $LOADL_HOSTFILE >> $OUTPUT_FILE
#echo "Content of config.log" >> $OUTPUT_FILE
#cat config.log >> $OUTPUT_FILE
#echo "Output of configure script" >> $OUTPUT_FILE
#cat config_output.log >> $OUTPUT_FILE
echo _PRE_RUN_ >> $OUTPUT_FILE
_PRE_RUN_
echo "Running elpa command: " >> $OUTPUT_FILE
COMMAND="poe ./_EXECUTABLE_ _MAT_SIZE_ _NUM_EIGEN_ _BLOCK_SIZE_ "
echo $COMMAND >> $OUTPUT_FILE
${COMMAND} >> $OUTPUT_FILE 2>&1
import os
import sys
def substitute(template_file, tokens):
with open("run.sh", "w") as fout:
with open(template_file, "r") as fin:
for line in fin:
for token in tokens.keys():
line = line.replace(token, str(tokens[token]))
fout.write(line)
def variant_path(output_dir, tokens, elpa_method, math_type, precision):
return "/".join([output_dir, math_type, precision,
str(tokens['_MAT_SIZE_']),
str(tokens['_NUM_EIGEN_']),
elpa_method])
def variant(output_dir, template_file, tokens, elpa_method, math_type, precision):
typeprec = math_type + "_" + precision
tokens['_PRE_RUN_'] = ''
if(elpa_method == 'elpa1'):
tokens['_EXECUTABLE_'] = "test_" + typeprec + "_eigenvectors_1stage_analytic"
elif(elpa_method == 'elpa2'):
tokens['_PRE_RUN_'] = 'TEST_KERNEL="ELPA_2STAGE_REAL_AVX_BLOCK2"'
tokens['_EXECUTABLE_'] = "test_" + typeprec + "_eigenvectors_2stage_default_kernel_analytic"
elif(elpa_method == 'scalapack_all'):
tokens['_EXECUTABLE_'] = "test_" + typeprec + "_eigenvectors_scalapack_all_analytic"
elif(elpa_method == 'scalapack_part'):
tokens['_EXECUTABLE_'] = "test_" + typeprec + "_eigenvectors_scalapack_part_analytic"
else:
assert(0)
tokens['_OUTPUT_DIR_'] = variant_path(output_dir, tokens, elpa_method, math_type, precision)
if not os.path.exists(tokens['_OUTPUT_DIR_']):
os.makedirs(tokens['_OUTPUT_DIR_'])
substitute(template_file, tokens)
if(len(sys.argv) == 2 and sys.argv[1] == '--submit'):
os.system('llsubmit run.sh')
else:
os.system('cat run.sh')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment