Commit 1d969360 authored by Andreas Marek's avatar Andreas Marek

Add ELPA_AUTOTUNE_EXTENSIVE; use for stripe_width

parent c14f2990
......@@ -107,7 +107,8 @@ enum ELPA_CONSTANTS {
#define ELPA_FOR_ALL_AUTOTUNE_LEVELS(X, ...) \
X(ELPA_AUTOTUNE_NOT_TUNABLE, 0) \
X(ELPA_AUTOTUNE_FAST, 1) \
X(ELPA_AUTOTUNE_MEDIUM, 2)
X(ELPA_AUTOTUNE_MEDIUM, 2) \
X(ELPA_AUTOTUNE_EXTENSIVE, 3)
enum ELPA_AUTOTUNE_LEVELS {
ELPA_FOR_ALL_AUTOTUNE_LEVELS(ELPA_ENUM_ENTRY)
......
......@@ -278,24 +278,24 @@
thread_width = (l_nev-1)/max_threads + 1 ! number of eigenvectors per OMP thread
#if REALCASE == 1
! call obj%get("stripe_width_real",stripe_width, error)
call obj%get("stripewidth_real",stripe_width, error)
#ifdef DOUBLE_PRECISION_REAL
stripe_width = 48 ! Must be a multiple of 4
!stripe_width = 48 ! Must be a multiple of 4
#else
!stripe_width = stripe_width * 2
stripe_width = 96 ! Must be a multiple of 8
stripe_width = stripe_width * 2
!stripe_width = 96 ! Must be a multiple of 8
#endif
#endif /* REALCASE */
#if COMPLEXCASE == 1
!call obj%get("stripe_width_complex",stripe_width, error)
call obj%get("stripewidth_complex",stripe_width, error)
#ifdef DOUBLE_PRECISION_COMPLEX
stripe_width = 48 ! Must be a multiple of 2
!stripe_width = 48 ! Must be a multiple of 2
#else
!stripe_width = stripe_width * 2
stripe_width = 48 ! Must be a multiple of 4
stripe_width = stripe_width * 2
!stripe_width = 48 ! Must be a multiple of 4
#endif
#endif /* COMPLEXCASE */
......@@ -384,18 +384,23 @@
else ! useGPU
#if REALCASE == 1
call obj%get("stripewidth_real",stripe_width, error)
#ifdef DOUBLE_PRECISION_REAL
stripe_width = 48 ! Must be a multiple of 4
!stripe_width = 48 ! Must be a multiple of 4
#else
stripe_width = 96 ! Must be a multiple of 8
!stripe_width = 96 ! Must be a multiple of 8
stripe_width = 2 * stripe_width
#endif
#endif /* REALCASE */
#if COMPLEXCASE == 1
call obj%get("stripewidth_complex",stripe_width, error)
#ifdef DOUBLE_PRECISION_COMPLEX
stripe_width = 48 ! Must be a multiple of 2
!stripe_width = 48 ! Must be a multiple of 2
#else
stripe_width = 48 ! Must be a multiple of 4
!stripe_width = 48 ! Must be a multiple of 4
#endif
#endif /* COMPLEXCASE */
......
......@@ -222,9 +222,9 @@ static const elpa_index_int_entry_t int_entries[] = {
INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL,
INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL,
stripewidth_real_cardinality, stripewidth_real_enumerate, stripewidth_real_is_valid, NULL, PRINT_YES),
INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_COMPLEX,
INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX,
stripewidth_complex_cardinality, stripewidth_complex_enumerate, stripewidth_complex_is_valid, NULL, PRINT_YES),
INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment