diff --git a/Changelog b/Changelog index 4fd68aabcc986f1960bbf246a0470aac24a4efca..92ecb82a234ee9446ea0b4509c01cbdd050a5cd7 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,22 @@ +Changelog for ELPA 2017.05.001.rc1 + +This is the release candidate 1 for the ELPA 2017.05.001 version. +It provides a first version of the new, more generic API of the ELPA library. +Smaller changes to the API might be possible in the upcoming release +candidates. For users, who would like to use the older API of the ELPA +library, the API as defined with release 2016.11.001.pre is frozen in and +also supported. + +Apart of the API change to be more flexible for the future, this release +offers the following changes: + +- faster GPU implementation, especially for ELPA 1stage +- the restriction of the block-cyclic distribution blocksize = 128 in the GPU + case is relaxed +- Faster CPU implementation due to better blocking +- support of already banded matrices (new API only!) +- improved KNL support + Changelog for pre-release ELPA 2016.11.001.pre This pre-release contains an experimental API which will most likely diff --git a/DEPRECATED_FEATURES.md b/DEPRECATED_FEATURES.md index 321cf9b2c155c743f07fff88d2da81c640dc8280..403d3dfb4ba969c310b9da81f13a71f609d10065 100644 --- a/DEPRECATED_FEATURES.md +++ b/DEPRECATED_FEATURES.md @@ -6,6 +6,14 @@ in the (near) future from the *ELPA* library. ### A) Deprecated interfaces:### +With the release of ELPA 2017.05.001.rc1 a new, more general API for the library has +been published. All new features of ELPA will only be accesible via this new interface. +For ease of transion, the API as defined in release ELPA 2016.11.001 has been frozen in +and will be still supported for some time, albeit without any new features. + +Independent of the freezing in of the old, legacy API from the release 2016.11.001 the +following listed interfaces will be removed at some time. + In order to unfiy the namespace of the *ELPA* public interfaces, several interfaces have been replaced by new names. The old interfaces will be removed @@ -31,4 +39,5 @@ For all symbols also the corresponding "_single" routines are available ### B) Runtime options ### -At the moment no runtime options are deprecated +At the moment no runtime options are deprecated. However, future options will only be available via the new +interface diff --git a/Doxyfile.in b/Doxyfile.in index 8b9b83cfe37b2ac414b4dc11ce430d2c2ba5ba68..968833c24902570e9d14236ce3f29e720f689cff 100644 --- a/Doxyfile.in +++ b/Doxyfile.in @@ -875,85 +875,248 @@ RECURSIVE = YES # Note that relative paths are relative to the directory from which doxygen is # run. -EXCLUDE = @top_srcdir@/src/elpa1_compute.F90 \ - @top_srcdir@/src/mod_precision.f90 \ - @top_srcdir@/src/aligned_mem.F90 \ - @top_srcdir@/src/mod_compute_hh_trafo_real.F90 \ - @top_srcdir@/src/mod_compute_hh_trafo_complex.F90 \ - @top_srcdir@/src/mod_mpi.F90 \ - @top_srcdir@/src/mod_mpi_stubs.F90 \ - @top_srcdir@/src/mod_time_c.F90 \ - @top_srcdir@/src/mod_pack_unpack_complex.F90 \ - @top_srcdir@/src/mod_pack_unpack_real.F90 \ - @top_srcdir@/src/elpa2_compute.F90 \ - @top_srcdir@/src/elpa2_utilities.F90 \ - @top_srcdir@/src/elpa_c_interface.F90 \ - @top_srcdir@/src/elpa_reduce_add_vectors.X90 \ - @top_srcdir@/src/elpa_transpose_vectors.X90 \ - @top_srcdir@/src/elpa_utilities.F90 \ - @top_srcdir@/src/timer.F90 \ - @top_srcdir@/src/redist_band.X90 \ - @top_srcdir@/src/timer_dummy.F90 \ - @top_srcdir@/src/precision_macros.h \ - @top_srcdir@/src/precision_macros_complex.h \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_asm_x86_64.s \ - @top_srcdir@/src/elpa2_kernels/mod_single_hh_trafo_real.F90 \ - @top_srcdir@/src/elpa2_kernels/mod_fortran_interfaces.F90 \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real.F90 \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_simple.F90 \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_complex.F90 \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_complex_simple.F90 \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_complex_sse_1hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_complex_sse_2hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_bgp.f90 \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_bgq.f90 \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_sse_2hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_sse_4hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_sse_6hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv.c \ - @top_srcdir@/src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv.c \ - @top_srcdir@/src/elpa_qr/elpa_pdgeqrf.F90 \ - @top_srcdir@/src/elpa_qr/elpa_pdlarfb.F90 \ - @top_srcdir@/src/elpa_qr/elpa_qrkernels.f90 \ - @top_srcdir@/src/elpa_qr/qr_utils.F90 \ - @top_srcdir@/src/ftimings/ftimings.F90 \ - @top_srcdir@/src/ftimings/ftimings_type.F90 \ - @top_srcdir@/src/ftimings/ftimings_value.F90 \ - @top_srcdir@/src/ftimings/highwater_mark.c \ - @top_srcdir@/src/ftimings/papi.c \ - @top_srcdir@/src/ftimings/resident_set_size.c \ - @top_srcdir@/src/ftimings/time.c \ - @top_srcdir@/src/ftimings/virtual_memory.c \ - @top_srcdir@/test/shared/mod_output_types.F90 \ - @top_srcdir@/test/C/elpa1_test_complex_c_version.c \ - @top_srcdir@/test/C/elpa1_test_real_c_version.c \ - @top_srcdir@/test/C/elpa2_test_complex_c_version.c \ - @top_srcdir@/test/C/elpa2_test_real_c_version.c \ - @top_srcdir@/test/Fortran/read_real.F90 \ - @top_srcdir@/test/Fortran/test_complex2_choose_kernel_with_api.F90 \ - @top_srcdir@/test/Fortran/test_complex2_default_kernel.F90 \ - @top_srcdir@/test/Fortran/test_complex2.F90 \ - @top_srcdir@/test/Fortran/test_complex.F90 \ - @top_srcdir@/test/Fortran/test_real2_choose_kernel_with_api.F90 \ - @top_srcdir@/test/Fortran/test_real2_default_kernel.F90 \ - @top_srcdir@/test/Fortran/test_real2_default_kernel_qr_decomposition.F90 \ - @top_srcdir@/test/Fortran/test_real2.F90 \ - @top_srcdir@/test/Fortran/test_real.F90 \ - @top_srcdir@/test/Fortran/test_real_with_c.F90 \ - @top_srcdir@/test/shared/blacs_infrastructure.F90 \ - @top_srcdir@/test/shared/call_elpa1.c \ - @top_srcdir@/test/shared/call_elpa2.c \ - @top_srcdir@/test/shared/check_correctnes.F90 \ - @top_srcdir@/test/shared/mod_from_c.F90 \ - @top_srcdir@/test/shared/prepare_matrix.F90 \ - @top_srcdir@/test/shared/read_input_parameters.F90 \ - @top_srcdir@/test/shared/redir.c \ - @top_srcdir@/test/shared/redirect.F90 \ - @top_srcdir@/test/shared/setup_mpi.F90 \ - @top_srcdir@/test/shared/util.F90 +EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \ + @top_srcdir@/src/GPU/cuUtils.cu \ + @top_srcdir@/src/GPU/cuUtils.h \ + @top_srcdir@/src/GPU/cuUtils_template.Xcu \ + @top_srcdir@/src/GPU/cudaFunctions.cu \ + @top_srcdir@/src/GPU/mod_cuda.F90 \ + @top_srcdir@/src/elpa1/elpa1.F90 \ + @top_srcdir@/src/elpa1/elpa1_auxiliary.F90 \ + @top_srcdir@/src/elpa1/elpa1_compute_private.F90 \ + @top_srcdir@/src/elpa1/elpa1_compute_template.X90 \ + @top_srcdir@/src/elpa1/elpa1_merge_systems_real_template.X90 \ + @top_srcdir@/src/elpa1/elpa1_solve_tridi_real_template.X90 \ + @top_srcdir@/src/elpa1/elpa1_template.X90 \ + @top_srcdir@/src/elpa1/elpa1_tools_template.X90 \ + @top_srcdir@/src/elpa1/elpa1_trans_ev_template.X90 \ + @top_srcdir@/src/elpa1/elpa1_tridiag_template.X90 \ + @top_srcdir@/src/elpa1/elpa1_utilities.F90 \ + @top_srcdir@/src/elpa1/elpa_cholesky_template.X90 \ + @top_srcdir@/src/elpa1/elpa_invert_trm.X90 \ + @top_srcdir@/src/elpa1/elpa_multiply_a_b.X90 \ + @top_srcdir@/src/elpa1/elpa_reduce_add_vectors.X90 \ + @top_srcdir@/src/elpa1/elpa_solve_tridi_impl_public.X90 \ + @top_srcdir@/src/elpa1/elpa_transpose_vectors.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa1.F90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa1_auxiliary.F90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa1_c_interface_template.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa1_template.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_1stage_c_interface.F90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_cholesky_c_interface_template.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_cholesky_template.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_invert_trm.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_invert_trm_c_interface_template.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_mult_ah_b_c_interface_template.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_mult_at_b_c_interface_template.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_multiply_a_b.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_solve_tridi.X90 \ + @top_srcdir@/src/elpa1/legacy_interface/elpa_solve_tridi_c_interface_template.X90 \ + @top_srcdir@/src/elpa2/GPU/ev_tridi_band_gpu_c_v2.cu \ + @top_srcdir@/src/elpa2/GPU/ev_tridi_band_gpu_c_v2_complex_template.Xcu \ + @top_srcdir@/src/elpa2/GPU/ev_tridi_band_gpu_c_v2_real_template.Xcu \ + @top_srcdir@/src/elpa2/GPU/interface_c_kernel.F90 \ + @top_srcdir@/src/elpa2/compute_hh_trafo.X90 \ + @top_srcdir@/src/elpa2/elpa2.F90 \ + @top_srcdir@/src/elpa2/elpa2_bandred_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_compute.F90 \ + @top_srcdir@/src/elpa2/elpa2_compute_complex_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_compute_real_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_determine_workload.F90 \ + @top_srcdir@/src/elpa2/elpa2_herm_matrix_allreduce_complex_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_print_kernels.F90 \ + @top_srcdir@/src/elpa2/elpa2_symm_matrix_allreduce_real_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_trans_ev_band_to_full_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_trans_ev_tridi_to_band_template.X90 \ + @top_srcdir@/src/elpa2/elpa2_tridiag_band_template.X90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex.F90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_simple.F90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_template.X90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real.F90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_bgp.f90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_bgq.f90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_simple.F90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_2hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_2hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_4hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_4hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_6hv_double_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_6hv_single_precision.c \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_template.X90 \ + @top_srcdir@/src/elpa2/kernels/elpa2_kernels_simple_template.X90 \ + @top_srcdir@/src/elpa2/kernels/mod_single_hh_trafo_real.F90 \ + @top_srcdir@/src/elpa2/legacy_interface/elpa2.F90 \ + @top_srcdir@/src/elpa2/legacy_interface/elpa2_c_interface_template.X90 \ + @top_srcdir@/src/elpa2/legacy_interface/elpa2_template.X90 \ + @top_srcdir@/src/elpa2/legacy_interface/elpa2_utilities.F90 \ + @top_srcdir@/src/elpa2/legacy_interface/elpa_2stage_c_interface.F90 \ + @top_srcdir@/src/elpa2/mod_compute_hh_trafo.F90 \ + @top_srcdir@/src/elpa2/mod_pack_unpack_cpu.F90 \ + @top_srcdir@/src/elpa2/mod_pack_unpack_gpu.F90 \ + @top_srcdir@/src/elpa2/mod_redist_band.F90 \ + @top_srcdir@/src/elpa2/pack_unpack_cpu.X90 \ + @top_srcdir@/src/elpa2/pack_unpack_gpu.X90 \ + @top_srcdir@/src/elpa2/qr/elpa_pdgeqrf.F90 \ + @top_srcdir@/src/elpa2/qr/elpa_pdgeqrf_template.X90 \ + @top_srcdir@/src/elpa2/qr/elpa_pdlarfb.F90 \ + @top_srcdir@/src/elpa2/qr/elpa_pdlarfb_template.X90 \ + @top_srcdir@/src/elpa2/qr/elpa_qrkernels.F90 \ + @top_srcdir@/src/elpa2/qr/elpa_qrkernels_template.X90 \ + @top_srcdir@/src/elpa2/qr/qr_utils.F90 \ + @top_srcdir@/src/elpa2/qr/qr_utils_template.X90 \ + @top_srcdir@/src/elpa2/redist_band.X90 \ + @top_srcdir@/src/elpa_c_interface.c \ + @top_srcdir@/src/elpa_constants.F90 \ + @top_srcdir@/src/elpa_driver/legacy_interface/elpa.F90 \ + @top_srcdir@/src/elpa_driver/legacy_interface/elpa_driver_c_interface.F90 \ + @top_srcdir@/src/elpa_driver/legacy_interface/elpa_driver_c_interface_template.X90 \ + @top_srcdir@/src/elpa_generated_fortran_interfaces.F90 \ + @top_srcdir@/src/elpa_impl.F90 \ + @top_srcdir@/src/elpa_index.c \ + @top_srcdir@/src/elpa_index.h \ + @top_srcdir@/src/fortran_constants.h \ + @top_srcdir@/src/ftimings/COPYING.LESSER \ + @top_srcdir@/src/ftimings/ftimings.F90 \ + @top_srcdir@/src/ftimings/ftimings_type.F90 \ + @top_srcdir@/src/ftimings/ftimings_value.F90 \ + @top_srcdir@/src/ftimings/highwater_mark.c \ + @top_srcdir@/src/ftimings/papi.c \ + @top_srcdir@/src/ftimings/resident_set_size.c \ + @top_srcdir@/src/ftimings/time.c \ + @top_srcdir@/src/ftimings/virtual_memory.c \ + @top_srcdir@/src/general/elpa_utilities.F90 \ + @top_srcdir@/src/general/precision_macros.h \ + @top_srcdir@/src/general/sanity.X90 \ + @top_srcdir@/src/helpers/aligned_mem.F90 \ + @top_srcdir@/src/helpers/mod_mpi.F90 \ + @top_srcdir@/src/helpers/mod_mpi_stubs.F90 \ + @top_srcdir@/src/helpers/mod_precision.F90 \ + @top_srcdir@/src/helpers/mod_time_c.F90 \ + @top_srcdir@/src/helpers/timer.F90 \ + @top_srcdir@/src/helpers/timer_dummy.F90 \ + @top_srcdir@/test/C/driver/legacy_interface/legacy_complex_driver_c_version.c \ + @top_srcdir@/test/C/driver/legacy_interface/legacy_real_driver_c_version.c \ + @top_srcdir@/test/C/driver/legacy_interface/legacy_single_complex_driver_c_version.c \ + @top_srcdir@/test/C/driver/legacy_interface/legacy_single_real_driver_c_version.c \ + @top_srcdir@/test/C/elpa1/legacy_interface/legacy_complex_1stage_c_version.c \ + @top_srcdir@/test/C/elpa1/legacy_interface/legacy_real_1stage_c_version.c \ + @top_srcdir@/test/C/elpa2/legacy_interface/legacy_complex_2stage_c_version.c \ + @top_srcdir@/test/C/elpa2/legacy_interface/legacy_real_2stage_c_version.c \ + @top_srcdir@/test/C/test.c \ + @top_srcdir@/test/Fortran/assert.h \ + @top_srcdir@/test/Fortran/driver/legacy_interface/legacy_complex_driver.F90 \ + @top_srcdir@/test/Fortran/driver/legacy_interface/legacy_real_driver.F90 \ + @top_srcdir@/test/Fortran/driver/legacy_interface/legacy_single_complex_driver.F90 \ + @top_srcdir@/test/Fortran/driver/legacy_interface/legacy_single_real_driver.F90 \ + @top_srcdir@/test/Fortran/elpa1/complex_1stage.F90 \ + @top_srcdir@/test/Fortran/elpa1/complex_1stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_complex.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_complex_cholesky.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_complex_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_complex_invert_trm.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_complex_transp_multiply.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_real.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_real_1stage_with_c.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_real_cholesky.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_real_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_real_invert_trm.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_real_transp_multiply.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_complex.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_complex_cholesky.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_complex_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_complex_invert_trm.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_complex_transp_multiply.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_real.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_real_cholesky.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_real_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_real_invert_trm.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_real_transp_multiply.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_single_toeplitz.F90 \ + @top_srcdir@/test/Fortran/elpa1/legacy_interface/legacy_toeplitz.F90 \ + @top_srcdir@/test/Fortran/elpa1/real_1stage.F90 \ + @top_srcdir@/test/Fortran/elpa1/real_1stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa1/single_complex_1stage.F90 \ + @top_srcdir@/test/Fortran/elpa1/single_complex_1stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa1/single_real_1stage.F90 \ + @top_srcdir@/test/Fortran/elpa1/single_real_1stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/complex_2stage.F90 \ + @top_srcdir@/test/Fortran/elpa2/complex_2stage_banded.F90 \ + @top_srcdir@/test/Fortran/elpa2/complex_2stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/double_instance.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_complex.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_complex_api.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_complex_default_kernel.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_complex_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_real.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_real_api.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_real_default_kernel.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_real_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_real_qr.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_complex.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_complex_api.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_complex_default_kernel.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_complex_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_real.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_real_api.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_real_default_kernel.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_real_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/legacy_interface/legacy_single_real_qr.F90 \ + @top_srcdir@/test/Fortran/elpa2/real_2stage.F90 \ + @top_srcdir@/test/Fortran/elpa2/real_2stage_banded.F90 \ + @top_srcdir@/test/Fortran/elpa2/real_2stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/single_complex_2stage.F90 \ + @top_srcdir@/test/Fortran/elpa2/single_complex_2stage_banded.F90 \ + @top_srcdir@/test/Fortran/elpa2/single_complex_2stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa2/single_real_2stage.F90 \ + @top_srcdir@/test/Fortran/elpa2/single_real_2stage_banded.F90 \ + @top_srcdir@/test/Fortran/elpa2/single_real_2stage_gpu.F90 \ + @top_srcdir@/test/Fortran/elpa_print_headers.X90 \ + @top_srcdir@/test/Fortran/elpa_tests.F90 \ + @top_srcdir@/test/Fortran/test.F90 \ + @top_srcdir@/test/TO_DO_AFTER_MERGE.TXT \ + @top_srcdir@/test/shared/blacs_infrastructure.F90 \ + @top_srcdir@/test/shared/call_elpa1.c \ + @top_srcdir@/test/shared/call_elpa2.c \ \ + @top_srcdir@/test/shared/check_correctness.F90 \ + @top_srcdir@/test/shared/check_correctness_template.X90 \ + @top_srcdir@/test/shared/mod_assert.F90 \ + @top_srcdir@/test/shared/mod_from_c.F90 \ + @top_srcdir@/test/shared/mod_output_types.F90 \ + @top_srcdir@/test/shared/prepare_matrix.F90 \ + @top_srcdir@/test/shared/prepare_matrix_template.X90 \ + @top_srcdir@/test/shared/read_input_parameters.F90 \ + @top_srcdir@/test/shared/redir.c \ + @top_srcdir@/test/shared/redirect.F90 \ + @top_srcdir@/test/shared/setup_mpi.F90 \ + @top_srcdir@/test/shared/util.F90 + # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded diff --git a/INSTALL.md b/INSTALL.md index 06e0b5d12784190bf7b3a4139ef564c679a8c526..44467c9a84801a1fc80044a3d27a7d1cf9b70943 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -33,7 +33,54 @@ for the documentation how to proceed. Please look at configure --help for all available options. -We recommend that you do not build ELPA in it`s main directory but that you use do it +An excerpt of the most important (*ELPA* specific) options reads as follows: + + --disable-legacy do not build legacy API, default yes + --enable-openmp use OpenMP threading, default no. + --enable-redirect for test programs, allow redirection of + stdout/stderr per MPI taks in a file (useful for + timing), default no. + --enable-single-precision + build with single precision + --disable-timings more detailed timing, default yes + --disable-band-to-full-blocking + build ELPA2 with blocking in band_to_full (default: + enabled) + --disable-mpi-module do not use the Fortran MPI module, get interfaces by + 'include "mpif.h') + --disable-generic do not build GENERIC kernels, default: enabled + --disable-sse do not build SSE kernels, default: enabled + --disable-sse-assembly do not build SSE_ASSEMBLY kernels, default: enabled + --disable-avx do not build AVX kernels, default: enabled + --disable-avx2 do not build AVX2 kernels, default: enabled + --enable-avx512 build AVX512 kernels, default: disabled + --enable-gpu build GPU kernels, default: disabled + --enable-bgp build BGP kernels, default: disabled + --enable-bgq build BGQ kernels, default: disabled + --with-mpi=[yes|no] compile with MPI. Default: yes + --with-cuda-path=PATH prefix where CUDA is installed [default=auto] + --with-cuda-sdk-path=PATH + prefix where CUDA SDK is installed [default=auto] + --with-GPU-compute-capability=VALUE + use compute capability VALUE for GPU version, + default: "sm_35" + --with-fixed-real-kernel=KERNEL + compile with only a single specific real kernel. + Available kernels are: generic generic_simple + sse_block2 sse_block4 sse_block6 sse_assembly + avx_block2 avx_block4 avx_block6 avx2_block2 + avx2_block4 avx2_block6 avx512_block2 avx512_block4 + avx512_block6 bgp bgq + --with-fixed-complex-kernel=KERNEL + compile with only a single specific complex kernel. + Available kernels are: generic generic_simple + sse_block1 sse_block2 sse_assembly avx_block1 + avx_block2 avx2_block1 avx2_block2 avx512_block1 + avx512_block2 bgp bgq + --with-gpu-support-only Compile and always use the GPU version + + +We recommend that you do not build ELPA in it`s main directory but that you use it in a sub-directory: mkdir build @@ -66,7 +113,7 @@ If you want to build *ELPA* with MPI support, please have a look at "A) Setting For builds without MPI support, please have a look at "B) Building *ELPA* without MPI support". Please note, that it is absolutely supported that both versions of the *ELPA* library are build -and installed. +and installed in the same directory. ### A) Setting of MPI compiler and libraries ### @@ -76,11 +123,11 @@ cannot automatically found, it is recommended to set it by hand with a variable, configure FC=mpif90 -Please note, thate setting a C MPI-compiles is NOT necessary. +Please note, thate setting a C MPI-compiler is NOT necessary, and in most case even harmful. In some cases, on your system different MPI libraries and compilers are installed. Then it might happen that during the build step an error like "no module mpi" or "cannot open module mpi" is given. -You can switch off that the *ELPA* library uses MPI modules (and instead uses MPI header files) by +You can disable that the *ELPA* library uses MPI modules (and instead uses MPI header files) by adding --disable-mpi-module @@ -98,11 +145,11 @@ If you want to build *ELPA* without MPI support, add to your configure call. -You have to specify which compilers shouldbe used like, +You have to specify which compilers should be used with e.g., configure FC=gfortran --with-mpi=0 -DO NOT specify a MPI compiler here. +DO NOT specify a MPI compiler here! Note, that the the installed *ELPA* library files will be suffixed with "_onenode", in order to descriminate this build from possible ones with MPI. @@ -207,13 +254,11 @@ ELPA 2stage can be used with different implementations of compute intensive kern Some kernels (all for x86_64 architectures) are enabled by default (and must be disabled if you do not want them), others are disabled by default and must be enabled if they are wanted. -Every kernel can be enabled with +One can enable or disable "kernel classes" by setting e.g. ---enable-"kernel-types" +--enable-avx2 -or disabled with - ---disable-"kernel-types" +This will try to build all the AVX2 kernels. Please see configure --help for all options During the configure step all possible kernels will be printed, and whether they will be enabled or not. diff --git a/ISSUES.md b/ISSUES.md index 8aaad46d8ba4e067c75bee662d1d6a9ca882fc6c..05af270ffb5448e30e020cbe680350d5fc5f1f27 100644 --- a/ISSUES.md +++ b/ISSUES.md @@ -3,6 +3,9 @@ For more details and recent updates please visit the online [issue system] (https://gitlab.mpcdf.mpg.de/elpa/elpa/issues) Issues which are not mentioned in a newer release are (considered as) solved +### ELPA 2017.05.001.rc1 release ### +- at the moment no issues are known + ### ELPA 2016.11.001 release ### - at the moment no issues are known diff --git a/LIBRARY_INTERFACE b/LIBRARY_INTERFACE index d870d9c6585cecc1bed033dd6efed43357d530f9..66c98447d972660231f40d3e6b28c9ee7526aa69 100644 --- a/LIBRARY_INTERFACE +++ b/LIBRARY_INTERFACE @@ -58,3 +58,13 @@ https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html This is an experimental API, which will most likely change in the future The state of release 2016.11.001.pre defines this interface + +- 9 + NO incompatible API changes w.r.t. to the previous version. The interface of + the previous version 2016.11.001.pre has been marked as "legacy", although it + is fully available and supported. + + However, a new, more general API has been published, which will in the long run + completely replace the "legacy" interface. + + The state of release 2017.05.001.rc1 defines this interface diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 48725a45dffb33eaa4f3221b638f0a5e7390e568..06490d4eac097411e50b798df3f96207dc7128cc 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,11 +1,17 @@ -This file contains the release notes for the ELPA 2016.11.001 version +This file contains the release notes for the ELPA 2017.05.001.rc1 version What is new? ------------- -For detailed information about changes since release ELPA 2016.05.004 please have a look at the Changelog file +For detailed information about changes since release ELPA 2016.11.001 please have a look at the Changelog file Highlights are: -- support of single and double precision (real and complex) calculations -- GPU support for ELPA 1stage and ELPA 2stage + +- a more generic and more flexible API, which allows easy implementation of upcoming features +- faster GPU implementation, especially for ELPA 1stage +- the restriction of the block-cyclic distribution blocksize = 128 in the GPU + case is relaxed +- Faster CPU implementation due to better blocking +- support of already banded matrices (new API only!) +- improved KNL support ABI change @@ -19,5 +25,5 @@ Any incompatibilities to previous version? --------------------------------------- As mentioned before, the ABI of ELPA was not changed; There is no -incompatibility with the previous version ELPA 2016.05.002. There is, however, an incompatibility with older versions than ELPA 2015.11.001 ! +incompatibility with the previous version ELPA 2016.11.001. diff --git a/USERS_GUIDE.md b/USERS_GUIDE.md index d022f7ea3ab3b352a592f1be88ec7d60327e6a40..c709180eb5eaf3d0da43ecfddf292ae248c064d1 100644 --- a/USERS_GUIDE.md +++ b/USERS_GUIDE.md @@ -9,21 +9,21 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst For example "man elpa2_print_kernels" should provide the documentation for the *ELPA* program which prints all the available kernels. -Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.001/html/index.html) +Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.001.rc1/html/index.html) for each *ELPA* release is available. ## API of the *ELPA* library ## -With release 2017.05.001 of the *ELPA* library the interface has been rewritten substantially, in order to have a more +With release 2017.05.001.rc1 of the *ELPA* library the interface has been rewritten substantially, in order to have a more generic interface and avoid future interface changes. For compatibility reasons the interface defined in the previous release 2016.11.001 is also still available IF AND ONLY IF *ELPA* has been build with support of this legacy interface. If you want to use the legacy interface, please look to section "B) Using the legacy API of the *ELPA* library. -The legacy API defines all the functionallity as has been defined in *ELPA* release 2016.11.011. Note, however, -that all future features of *ELPA* will only be accessible via the new API defined in release 2017.05.001. +The legacy API defines all the functionallity as it has been defined in *ELPA* release 2016.11.011. Note, however, +that all future features of *ELPA* will only be accessible via the new API defined in release 2017.05.001.rc1 or later. ## A) Using the final API definition of the *ELPA* library ## diff --git a/configure.ac b/configure.ac index 6d497f54c6b29b18eb84558b3f578d2dce85f3dc..d97568308a8252084bdf01ade7b1119337174b91 100644 --- a/configure.ac +++ b/configure.ac @@ -27,7 +27,7 @@ AM_SILENT_RULES([yes]) # by the current interface, as they are ABI compatible (e.g. only new symbols # were added by the new interface) # -AC_SUBST([ELPA_SO_VERSION], [8:0:0]) +AC_SUBST([ELPA_SO_VERSION], [9:1:0]) # API Version AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version]) diff --git a/elpa.spec b/elpa.spec index 6bfa555c7833dd77cbee2cf156c455eae8c9002f..0d6d3f51a9591fcf7bafa650b086a25588ab32a2 100644 --- a/elpa.spec +++ b/elpa.spec @@ -12,7 +12,7 @@ # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. -%define so_version 8 +%define so_version 9 # OpenMP support requires an MPI implementation with MPI_THREAD_MULTIPLE support, # which is only available for a sufficiently configured openmpi >= 1.8 @@ -32,7 +32,7 @@ %endif Name: elpa -Version: 2016.11.001.pre +Version: 2017.05.001.rc1 Release: 1 Summary: A massively parallel eigenvector solver License: LGPL-3.0