diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..36eacbb7eb9964e076a230834f1415b38c86a3ab --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,309 @@ +####################################################################### +# # +# Copyright 2019 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +cmake_minimum_required(VERSION 3.10) +cmake_policy(VERSION 3.12) + +if (DEFINED ENV{MPICXX}) + message(STATUS "Using CMAKE_CXX_COMPILER=MPICXX") + set(CMAKE_CXX_COMPILER $ENV{MPICXX}) +else() + message(STATUS "MPICXX environment variable undefined, trying to find MPI") + set(MPI_STATIC ON) + find_package(MPI REQUIRED) +endif() + +if (DEFINED ENV{MPICC}) + set(CMAKE_C_COMPILER $ENV{MPICC}) + message(STATUS "Using CMAKE_C_COMPILER=MPICC") +endif() + +if (DEFINED ENV{CMAKE_INSTALL_PREFIX}) + set(CMAKE_INSTALL_PREFIX $ENV{CMAKE_INSTALL_PREFIX}) +endif() + +project(BFPS) + +execute_process(COMMAND python ${PROJECT_SOURCE_DIR}/get_version.py OUTPUT_VARIABLE BFPS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) + +project(BFPS + VERSION ${BFPS_VERSION} + LANGUAGES CXX) + + +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/morse ${CMAKE_MODULE_PATH}) +set(BFPS_LIBS "") + +##################################################################################### +## MPI + +set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} ${MPI_CXX_COMPILE_OPTIONS}") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_CXX_LINK_FLAGS}") +include_directories(${MPI_CXX_INCLUDE_DIRS}) +add_definitions(${MPI_CXX_COMPILE_DEFINITIONS}) +list(APPEND BFPS_LIBS "${MPI_CXX_LIBRARIES}") + +##################################################################################### +## CXX Standard + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +# set(CMAKE_CXX_EXTENSIONS OFF) + +##################################################################################### +## OpenMP + +find_package(OpenMP REQUIRED) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +list(APPEND BFPS_LIBS "${OpenMP_CXX_LIB_NAMES}") + +##################################################################################### +## Extra flags + +set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} $ENV{BFPS_OPTIMIZATION_FLAGS} -Wall -g") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_COMPILE_FLAGS}") + +##################################################################################### +## HDF5 + +set(HDF5_STATIC ON) +if(NOT DEFINED ENV{HDF5_ROOT}) + message(WARNING "The environment variable HDF5_ROOT is undefined, this might cause trouble in finding the HDF5") +endif() + +set(HDF5_PREFER_PARALLEL TRUE) +find_package(HDF5 REQUIRED) + +message(STATUS "HDF5_C_INCLUDE_DIRS ${HDF5_C_INCLUDE_DIRS}") + +include_directories(${HDF5_C_INCLUDE_DIRS}) +add_definitions(${HDF5_C_DEFINITIONS}) +list(APPEND BFPS_LIBS "${HDF5_C_LIBRARIES}") + +option(BFPS_HDF5_USE_SZIP "Set to on to also link against SZIP" OFF) + +if(BFPS_HDF5_USE_SZIP) + option(BFPS_HDF5_SZIP_LIB_PATH "Additional lib path for SZIP" "") + if(BFPS_HDF5_SZIP_LIB_PATH) + link_directories(${BFPS_HDF5_SZIP_LIB_PATH}) + endif() + list(APPEND BFPS_LIBS "z") +endif() + +##################################################################################### +## FFTW + +set(FFTW_STATIC ON) +if(NOT DEFINED ENV{FFTW_DIR}) + message(WARNING "The environment variable FFTW_DIR is undefined, this might cause trouble in finding the FFTW") +endif() + +find_package(FFTW REQUIRED OMP) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FFTW_CFLAGS_OTHER}") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FFTW_LDFLAGS_OTHER}") +list(APPEND BFPS_LIBS "${FFTW_LIBRARIES}") +include_directories(${FFTW_INCLUDE_DIRS}) +link_directories(${FFTW_LIBRARY_DIRS}) + +find_package(FFTW REQUIRED OMP SIMPLE) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FFTW_CFLAGS_OTHER}") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FFTW_LDFLAGS_OTHER}") +list(APPEND BFPS_LIBS "${FFTW_LIBRARIES}") +include_directories(${FFTW_INCLUDE_DIRS}) +link_directories(${FFTW_LIBRARY_DIRS}) + +# hack for FFTW MPI libs +find_library( + FFTWF_MPI fftw3f_mpi + HINTS ${FFTW_LIBRARY_DIRS}) +set(BFPS_LIBS ${FFTWF_MPI} ${BFPS_LIBS}) +find_library( + FFTW_MPI fftw3_mpi + HINTS ${FFTW_LIBRARY_DIRS}) +set(BFPS_LIBS ${FFTW_MPI} ${BFPS_LIBS}) + + +##################################################################################### +## Get the links and include from deps + +get_property(ALL_INCLUDE_DIRS DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) +get_property(ALL_LINK_DIRS DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY LINK_DIRECTORIES) + +##################################################################################### +## Build the lib + +include_directories(${PROJECT_SOURCE_DIR}/cpp) + +#file(GLOB_RECURSE cpp_for_lib ${PROJECT_SOURCE_DIR}/*.cpp) +set(cpp_for_lib + ${PROJECT_SOURCE_DIR}/cpp/full_code/code_base.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/direct_numerical_simulation.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/joint_acc_vel_stats.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/test.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/filter_test.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/field_test.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/symmetrize_test.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/field_output_test.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/get_rfields.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/field_single_to_double.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/resize.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE_field_stats.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/native_binary_to_hdf5.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/postprocess.cpp + ${PROJECT_SOURCE_DIR}/cpp/field.cpp + ${PROJECT_SOURCE_DIR}/cpp/kspace.cpp + ${PROJECT_SOURCE_DIR}/cpp/field_layout.cpp + ${PROJECT_SOURCE_DIR}/cpp/hdf5_tools.cpp + ${PROJECT_SOURCE_DIR}/cpp/fftw_tools.cpp + ${PROJECT_SOURCE_DIR}/cpp/vorticity_equation.cpp + ${PROJECT_SOURCE_DIR}/cpp/field_binary_IO.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n1.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n2.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n3.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n4.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n5.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n6.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n7.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n8.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n9.cpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n10.cpp + ${PROJECT_SOURCE_DIR}/cpp/Lagrange_polys.cpp + ${PROJECT_SOURCE_DIR}/cpp/scope_timer.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/test_interpolation.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEparticles.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEcomplex_particles.cpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEp_extra_sampling.cpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_inner_computer.cpp) +set(hpp_for_lib + ${PROJECT_SOURCE_DIR}/cpp/full_code/code_base.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/direct_numerical_simulation.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/joint_acc_vel_stats.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/test.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/filter_test.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/field_test.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/symmetrize_test.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/field_output_test.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/get_rfields.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/field_single_to_double.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/resize.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE_field_stats.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/native_binary_to_hdf5.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/postprocess.hpp + ${PROJECT_SOURCE_DIR}/cpp/field.hpp + ${PROJECT_SOURCE_DIR}/cpp/kspace.hpp + ${PROJECT_SOURCE_DIR}/cpp/field_layout.hpp + ${PROJECT_SOURCE_DIR}/cpp/hdf5_tools.hpp + ${PROJECT_SOURCE_DIR}/cpp/fftw_tools.hpp + ${PROJECT_SOURCE_DIR}/cpp/vorticity_equation.hpp + ${PROJECT_SOURCE_DIR}/cpp/field_binary_IO.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n1.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n2.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n3.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n4.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n5.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n6.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n7.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n8.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n9.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline_n10.hpp + ${PROJECT_SOURCE_DIR}/cpp/Lagrange_polys.hpp + ${PROJECT_SOURCE_DIR}/cpp/scope_timer.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/test_interpolation.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEparticles.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEcomplex_particles.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEp_extra_sampling.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_inner_computer.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/abstract_particles_input.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/abstract_particles_output.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/abstract_particles_system.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/alltoall_exchanger.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/env_utils.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/lock_free_bool_array.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_computer_empty.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_computer.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_distr_mpi.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_tree.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_adams_bashforth.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_distr_mpi.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_field_computer.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_generic_interp.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_inner_computer_empty.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_input_hdf5.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_output_hdf5.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_output_mpiio.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_output_sampling_hdf5.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_sampling.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_system_builder.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_system.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/particles_utils.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/main_code.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/codes_with_no_output.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE_no_output.hpp + ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEparticles_no_output.hpp + ${PROJECT_SOURCE_DIR}/cpp/base.hpp + ${PROJECT_SOURCE_DIR}/cpp/fftw_interface.hpp + ${PROJECT_SOURCE_DIR}/cpp/bfps_timer.hpp + ${PROJECT_SOURCE_DIR}/cpp/omputils.hpp + ${PROJECT_SOURCE_DIR}/cpp/shared_array.hpp + ${PROJECT_SOURCE_DIR}/cpp/spline.hpp + ) +#file(GLOB_RECURSE hpp_for_lib ${PROJECT_SOURCE_DIR}/*.hpp) +LIST(APPEND source_files ${hpp_for_lib} ${cpp_for_lib}) + +add_library(bfps ${source_files}) + +target_link_libraries(bfps ${BFPS_LIBS}) + +install(TARGETS bfps EXPORT BFPS_EXPORT DESTINATION lib/ ) +install(DIRECTORY ${PROJECT_SOURCE_DIR}/cpp/ DESTINATION include/bfps/ FILES_MATCHING PATTERN "*.h*") + +##################################################################################### +## Export the configuration + +configure_file(${PROJECT_SOURCE_DIR}/cmake/BFPSConfig.cmake.in ${PROJECT_BINARY_DIR}/BFPSConfig.cmake @ONLY) + +install(FILES "${PROJECT_BINARY_DIR}/BFPSConfig.cmake" DESTINATION lib/) +export(TARGETS bfps FILE "${PROJECT_BINARY_DIR}/BFPSLibraryDepends.cmake") +install(EXPORT BFPS_EXPORT DESTINATION lib/) + + +##################################################################################### +## Install the python wrapper +# copy command +install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/bfps ${PROJECT_BINARY_DIR}/python/bfps/)") +if(EXISTS "${PROJECT_SOURCE_DIR}/host_info.py") + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/host_info.py ${PROJECT_BINARY_DIR}/python/bfps/)") +else() + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/pc_host_info.py ${PROJECT_BINARY_DIR}/python/bfps/host_info.py)") +endif() +install(CODE "execute_process(COMMAND python ${PROJECT_SOURCE_DIR}/setup.py install --force --prefix=${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/python/)") + diff --git a/README.rst b/README.rst index ddb9f2447db919248100368a9a08b13297d5e3a4..7dc457a7b00c99feec225d73b446ead083ef0a00 100644 --- a/README.rst +++ b/README.rst @@ -58,22 +58,10 @@ Use a console; navigate to the ``bfps`` folder, and type: **Full installation** If you want to run simulations on the machine where you're installing, -you will need to call `compile_library` before installing. +you will need to use `cmake` to compile and install the full library. Your machine needs to have an MPI compiler installed, the HDF5 C library -and FFTW >= 3.4. -The file `machine_settings_py.py` should be modified -appropriately for your machine (otherwise the `compile_library` command will most -likely fail). -This file will be copied the first time you run `setup.py` into -`$HOME/.config/bfps/machine_settings.py`, **where it will be imported from -afterwards** --- any future edits **must** be made to the new file. -You may, obviously, edit it afterwards and rerun the `compile_library` command as -needed. - -.. code:: bash - - python setup.py compile_library - python setup.py install +and FFTW >= 3.4 --- detailed instructions are +included at the end of this document. ------------- Documentation @@ -82,8 +70,8 @@ Documentation While the code is not fully documented yet, basic information is already available, and it is recommended that you generate the manual and go through it carefully. -Please don't be shy about asking for specific improvements to the -current text. +Please do ask for specific improvements to the current text where it is +found lacking. In order to generate the manual, navigate to the repository folder, and execute the following commands: @@ -99,10 +87,113 @@ type ``make html`` instead of ``make latexpdf``. Comments -------- +* the `cmake` folder contains files extracted from + https://gitlab.inria.fr/solverstack/morse_cmake, a separate project licensed + under the "CeCILL-C" license, please see + http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html for + details. + * particles: initialization of multistep solvers is done with lower order methods, so direct convergence tests will fail. -* Code is used mainly with Python 3.4 and 3.5. - In principle it should be easy to maintain compatibility with Python - 2.7.x, but as of `bfps 1.8` this is no longer a main concern. +* code is only known to work with HDF5 1.8.x. + +* code is used mainly with Python 3.5 and later, and it is not tested at + all with Python 2.x + +------------------------------- +Installation with prerequisites +------------------------------- + +These installation steps assume that you have a working MPI compiler, +properly configured on your system (i.e. the various configure scripts +are able to find it), as well as the `cmake` tool. +We recommend to specify the desired MPI C++ compiler by exporting the +environment variable `MPICXX` --- the BFPS cmake configuration looks for +this variable. +We also recommend that an environment variable `BFPS_OPTIMIZATION_FLAGS` +is defined appropriately. +In particular, for clusters of unknown architecture it helps to log into +individual nodes and run the following command: + +.. code:: bash + + gcc -march=native -Q --help=target + +Detailed full installation instructions: + +1. Make directory PREFIX on a local fast partition. + +2. Download, compile, install FFTW (latest version 3.x from http://www.fftw.org/). + Execute the following commands in order, feel free to customize + optimisation flags for your own computer (see http://www.fftw.org/fftw3_doc/Installation-on-Unix.html): + + .. code:: bash + + ./configure --prefix=PREFIX --enable-float --enable-sse --enable-mpi --enable-openmp --enable-threads + make + make install + ./configure --prefix=PREFIX --enable-sse2 --enable-avx512 --enable-mpi --enable-openmp --enable-threads + make + make install + + BFPS will try to find FFTW using the FindFFTW from the Morse project. + If the package is installed in a non standard location, it is recommanded + to setup the environment variables: `FFTW_DIR` (or `FFTW_INCDIR` and `FFTW_LIBDIR`). + +3. Download, compile, install HDF5 (version 1.8.x, currently available + at https://portal.hdfgroup.org/display/support/HDF5+1.8.20#files). + We are using parallel I/O, therefore we must use the plain C interface of HDF5: + + .. code:: bash + + ./configure --prefix=PREFIX --enable-parallel + make + make install + + BFPS will try to find HDF5 using the regular FindHDF5. + Therefore, if the package is installed in a non standard location, it is recommanded + to setup the environment variable: HDF5_ROOT. + +3. Optional. + We recommend the creation of a virtual python3 environment (also under PREFIX) that will be used for installing bfps and dependencies. + Please see https://docs.python-guide.org/dev/virtualenvs/. + +4. Clone bfps repository. + + .. code:: bash + + git clone git@gitlab.mpcdf.mpg.de:clalescu/bfps.git + +5. Go into bfps repository, execute + + .. code:: bash + + mkdir build + cd build + cmake .. + # possibly : cmake .. -DCMAKE_INSTALL_PREFIX=INSTALL_DIR + make + # to get a verbose compilation process, use + VERBOSE=1 make + make install + +6. If you used a custom install location (i.e. `CMAKE_INSTALL_PREFIX`) + you must include this location in the environment variable + `CMAKE_PREFIX_PATH`. + This ensures that the required `BFPSConfig.cmake` file is accessible for + future use by the package. + +7. Using BFPS from an external project. + BFPS creates and installs 3 files alongside the C++ headers and + library: + + .. code:: bash + + -- Installing: install/lib/BFPSConfig.cmake + -- Installing: install/lib/BFPS_EXPORT.cmake + -- Installing: install/lib/BFPS_EXPORT-noconfig.cmake + + In case these files provide incomplete information, it is necessary to update + the cmake input config file: bfps/cmake/BFPSConfig.cmake.in. diff --git a/bfps/DNS.py b/bfps/DNS.py index 4f26b86c5d4739e1bb3989f2e4a7d9a70ad3f009..bb4385458d8ce44a18ca1d18d88d5192aee147c6 100644 --- a/bfps/DNS.py +++ b/bfps/DNS.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -50,13 +49,8 @@ class DNS(_code): self, work_dir = work_dir, simname = simname) - self.host_info = {'type' : 'cluster', - 'environment' : None, - 'deltanprocs' : 1, - 'queue' : '', - 'mail_address': '', - 'mail_events' : None} self.generate_default_parameters() + self.statistics = {} return None def set_precision( self, @@ -78,7 +72,8 @@ class DNS(_code): self.C_field_dtype = 'double' self.fluid_precision = 'double' return None - def write_src(self): + def write_src( + self): self.version_message = ( '/***********************************************************************\n' + '* this code automatically generated by bfps\n' + @@ -114,30 +109,11 @@ class DNS(_code): with open(self.name + '.cpp', 'w') as outfile: outfile.write(self.version_message + '\n\n') outfile.write(self.includes + '\n\n') - outfile.write( - self.cread_pars( - template_class = '{0}<rnumber>::'.format(self.dns_type), - template_prefix = 'template <typename rnumber> ', - simname_variable = 'this->simname.c_str()', - prepend_this = True) + - '\n\n') - for rnumber in ['float', 'double']: - outfile.write(self.cread_pars( - template_class = '{0}<{1}>::'.format(self.dns_type, rnumber), - template_prefix = 'template '.format(rnumber), - just_declaration = True) + '\n\n') - if self.dns_type in ['NSVEparticles', 'NSVE_no_output', 'NSVEparticles_no_output']: - outfile.write('template <typename rnumber> int NSVE<rnumber>::read_parameters(){return EXIT_SUCCESS;}\n') - outfile.write('template int NSVE<float>::read_parameters();\n') - outfile.write('template int NSVE<double>::read_parameters();\n\n') - if self.dns_type in ['NSVEparticles_no_output']: - outfile.write('template <typename rnumber> int NSVEparticles<rnumber>::read_parameters(){return EXIT_SUCCESS;}\n') - outfile.write('template int NSVEparticles<float>::read_parameters();\n') - outfile.write('template int NSVEparticles<double>::read_parameters();\n\n') outfile.write(self.main + '\n') return None def generate_default_parameters(self): # these parameters are relevant for all DNS classes + self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE' self.parameters['dealias_type'] = int(1) self.parameters['dkx'] = float(1.0) self.parameters['dky'] = float(1.0) @@ -150,19 +126,35 @@ class DNS(_code): self.parameters['nu'] = float(0.1) self.parameters['fmode'] = int(1) self.parameters['famplitude'] = float(0.5) + self.parameters['friction_coefficient'] = float(0.5) + self.parameters['energy'] = float(0.5) + self.parameters['injection_rate'] = float(0.4) self.parameters['fk0'] = float(2.0) self.parameters['fk1'] = float(4.0) - self.parameters['forcing_type'] = 'linear' + self.parameters['forcing_type'] = 'fixed_energy_injection_rate' self.parameters['histogram_bins'] = int(256) self.parameters['max_velocity_estimate'] = float(1) self.parameters['max_vorticity_estimate'] = float(1) # parameters specific to particle version self.NSVEp_extra_parameters = {} self.NSVEp_extra_parameters['niter_part'] = int(1) + self.NSVEp_extra_parameters['niter_part_fine_period'] = int(10) + self.NSVEp_extra_parameters['niter_part_fine_duration'] = int(0) self.NSVEp_extra_parameters['nparticles'] = int(10) self.NSVEp_extra_parameters['tracers0_integration_steps'] = int(4) self.NSVEp_extra_parameters['tracers0_neighbours'] = int(1) self.NSVEp_extra_parameters['tracers0_smoothness'] = int(1) + self.NSVEp_extra_parameters['tracers0_enable_p2p'] = int(0) + self.NSVEp_extra_parameters['tracers0_enable_inner'] = int(0) + self.NSVEp_extra_parameters['tracers0_enable_vorticity_omega'] = int(0) + self.NSVEp_extra_parameters['tracers0_cutoff'] = float(1) + self.NSVEp_extra_parameters['tracers0_inner_v0'] = float(1) + self.NSVEp_extra_parameters['tracers0_lambda'] = float(1) + #self.extra_parameters = {} + #for key in ['NSVE', 'NSVE_no_output', 'NSVEparticles', 'NSVEparticles_no_output', 'NSVEcomplex_particles']: + # self.extra_parameters[key] = {} + #for key in ['NSVEparticles', 'NSVEparticles_no_output', 'NSVEcomplex_particles']: + # self.extra_parameters[key].update(self.NSVEp_extra_parameters) return None def get_kspace(self): kspace = {} @@ -198,8 +190,12 @@ class DNS(_code): return os.path.join(self.work_dir, self.simname + '_particles.h5') def get_particle_file(self): return h5py.File(self.get_particle_file_name(), 'r') + def get_cache_file_name(self): + return os.path.join(self.work_dir, self.simname + '_cache.h5') + def get_cache_file(self): + return h5py.File(self.get_cache_file_name(), 'r') def get_postprocess_file_name(self): - return os.path.join(self.work_dir, self.simname + '_postprocess.h5') + return self.get_cache_file_name() def get_postprocess_file(self): return h5py.File(self.get_postprocess_file_name(), 'r') def compute_statistics(self, iter0 = 0, iter1 = None): @@ -215,75 +211,134 @@ class DNS(_code): tensors, and the enstrophy spectrum is also used to compute the dissipation :math:`\\varepsilon(t)`. These basic quantities are stored in a newly created HDF5 file, - ``simname_postprocess.h5``. + ``simname_cache.h5``. """ if len(list(self.statistics.keys())) > 0: return None - self.read_parameters() - with self.get_data_file() as data_file: - if 'moments' not in data_file['statistics'].keys(): - return None - iter0 = min((data_file['statistics/moments/velocity'].shape[0] * - self.parameters['niter_stat']-1), - iter0) - if type(iter1) == type(None): - iter1 = data_file['iteration'].value - else: - iter1 = min(data_file['iteration'].value, iter1) - ii0 = iter0 // self.parameters['niter_stat'] - ii1 = iter1 // self.parameters['niter_stat'] - self.statistics['kshell'] = data_file['kspace/kshell'].value - self.statistics['kM'] = data_file['kspace/kM'].value - self.statistics['dk'] = data_file['kspace/dk'].value - computation_needed = True - pp_file = h5py.File(self.get_postprocess_file_name(), 'a') - if 'ii0' in pp_file.keys(): - computation_needed = not (ii0 == pp_file['ii0'].value and - ii1 == pp_file['ii1'].value) + if not os.path.exists(self.get_data_file_name()): + if os.path.exists(self.get_cache_file_name()): + self.read_parameters(fname = self.get_cache_file_name()) + with self.get_cache_file() as pp_file: + for k in ['t', + 'energy(t)', + 'energy(k)', + 'enstrophy(t)', + 'enstrophy(k)', + 'R_ij(t)', + 'vel_max(t)', + 'renergy(t)']: + if k in pp_file.keys(): + self.statistics[k] = pp_file[k].value + self.statistics['kM'] = pp_file['kspace/kM'].value + self.statistics['dk'] = pp_file['kspace/dk'].value + self.statistics['kshell'] = pp_file['kspace/kshell'].value + self.statistics['nshell'] = pp_file['kspace/nshell'].value + else: + self.read_parameters() + with self.get_data_file() as data_file: + if 'moments' not in data_file['statistics'].keys(): + return None + iter0 = min((data_file['statistics/moments/velocity'].shape[0] * + self.parameters['niter_stat']-1), + iter0) + if type(iter1) == type(None): + iter1 = data_file['iteration'].value + else: + iter1 = min(data_file['iteration'].value, iter1) + ii0 = iter0 // self.parameters['niter_stat'] + ii1 = iter1 // self.parameters['niter_stat'] + self.statistics['kshell'] = data_file['kspace/kshell'].value + self.statistics['nshell'] = data_file['kspace/nshell'].value + for kk in [-1, -2]: + if (self.statistics['kshell'][kk] == 0): + self.statistics['kshell'][kk] = np.nan + self.statistics['kM'] = data_file['kspace/kM'].value + self.statistics['dk'] = data_file['kspace/dk'].value + computation_needed = True + pp_file = h5py.File(self.get_postprocess_file_name(), 'a') + if not ('parameters' in pp_file.keys()): + data_file.copy('parameters', pp_file) + data_file.copy('kspace', pp_file) + if 'ii0' in pp_file.keys(): + computation_needed = not (ii0 == pp_file['ii0'].value and + ii1 == pp_file['ii1'].value) + if computation_needed: + for k in ['t', 'vel_max(t)', 'renergy(t)', + 'energy(t)', 'enstrophy(t)', + 'energy(k)', 'enstrophy(k)', + 'energy(t, k)', + 'enstrophy(t, k)', + 'R_ij(t)', + 'ii0', 'ii1', 'iter0', 'iter1']: + if k in pp_file.keys(): + del pp_file[k] if computation_needed: - for k in pp_file.keys(): - del pp_file[k] - if computation_needed: - pp_file['iter0'] = iter0 - pp_file['iter1'] = iter1 - pp_file['ii0'] = ii0 - pp_file['ii1'] = ii1 - pp_file['t'] = (self.parameters['dt']* - self.parameters['niter_stat']* - (np.arange(ii0, ii1+1).astype(np.float))) - pp_file['energy(t, k)'] = ( - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['enstrophy(t, k)'] = ( - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] - pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 - for k in ['t', - 'energy(t, k)', - 'enstrophy(t, k)', - 'vel_max(t)', - 'renergy(t)']: - if k in pp_file.keys(): - self.statistics[k] = pp_file[k].value - self.compute_time_averages() + pp_file['iter0'] = iter0 + pp_file['iter1'] = iter1 + pp_file['ii0'] = ii0 + pp_file['ii1'] = ii1 + pp_file['t'] = (self.parameters['dt']* + self.parameters['niter_stat']* + (np.arange(ii0, ii1+1).astype(np.float))) + phi_ij = data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1] + pp_file['R_ij(t)'] = np.sum(phi_ij, axis = 1) + energy_tk = ( + phi_ij[:, :, 0, 0] + + phi_ij[:, :, 1, 1] + + phi_ij[:, :, 2, 2])/2 + pp_file['energy(t)'] = np.sum(energy_tk, axis = 1) + pp_file['energy(k)'] = np.mean(energy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell']) + enstrophy_tk = ( + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + + data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 + pp_file['enstrophy(t)'] = np.sum(enstrophy_tk, axis = 1) + pp_file['enstrophy(k)'] = np.mean(enstrophy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell']) + pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 9, 3] + pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 + for k in ['t', + 'energy(t)', + 'energy(k)', + 'enstrophy(t)', + 'enstrophy(k)', + 'R_ij(t)', + 'vel_max(t)', + 'renergy(t)']: + if k in pp_file.keys(): + self.statistics[k] = pp_file[k].value + # sanity check --- Parseval theorem check + assert(np.max(np.abs( + self.statistics['renergy(t)'] - + self.statistics['energy(t)']) / self.statistics['energy(t)']) < 1e-5) + self.compute_time_averages() + return None + def compute_Reynolds_stress_invariants( + self): + """ + see Choi and Lumley, JFM v436 p59 (2001) + """ + Rij = self.statistics['R_ij(t)'] + Rij /= (2*self.statistics['energy(t)'][:, None, None]) + Rij[:, 0, 0] -= 1./3 + Rij[:, 1, 1] -= 1./3 + Rij[:, 2, 2] -= 1./3 + self.statistics['I2(t)'] = np.sqrt(np.einsum('...ij,...ij', Rij, Rij, optimize = True) / 6) + self.statistics['I3(t)'] = np.cbrt(np.einsum('...ij,...jk,...ki', Rij, Rij, Rij, optimize = True) / 6) return None def compute_time_averages(self): """Compute easy stats. Further computation of statistics based on the contents of - ``simname_postprocess.h5``. + ``simname_cache.h5``. Standard quantities are as follows (consistent with [Ishihara]_): .. math:: U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm - L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm - T_{\\textrm{int}}(t) = - \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} + L_{\\textrm{int}} = \\frac{\pi}{2U_{int}^2} \\int \\frac{dk}{k} E(k), \\hskip .5cm + T_{\\textrm{int}} = + \\frac{L_{\\textrm{int}}}{U_{\\textrm{int}}} \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm @@ -300,21 +355,14 @@ class DNS(_code): J. Fluid Mech., **592**, 335-366, 2007 """ - for key in ['energy', 'enstrophy']: - self.statistics[key + '(t)'] = (self.statistics['dk'] * - np.sum(self.statistics[key + '(t, k)'], axis = 1)) self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) - self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / - (2*self.statistics['Uint(t)']**2)) * - np.nansum(self.statistics['energy(t, k)'] / - self.statistics['kshell'][None, :], axis = 1)) for key in ['energy', 'enstrophy', - 'vel_max', - 'Uint', - 'Lint']: + 'mean_trS2', + 'Uint']: if key + '(t)' in self.statistics.keys(): self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) + self.statistics['vel_max'] = np.max(self.statistics['vel_max(t)']) for suffix in ['', '(t)']: self.statistics['diss' + suffix] = (self.parameters['nu'] * self.statistics['enstrophy' + suffix]*2) @@ -322,9 +370,6 @@ class DNS(_code): self.statistics['diss' + suffix])**.25 self.statistics['tauK' + suffix] = (self.parameters['nu'] / self.statistics['diss' + suffix])**.5 - self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['Lint' + suffix] / - self.parameters['nu']) self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * self.statistics['Uint' + suffix]**2 / self.statistics['diss' + suffix])**.5 @@ -335,6 +380,13 @@ class DNS(_code): self.statistics['etaK' + suffix]) if self.parameters['dealias_type'] == 1: self.statistics['kMeta' + suffix] *= 0.8 + self.statistics['Lint'] = ((np.pi / + (2*self.statistics['Uint']**2)) * + np.nansum(self.statistics['energy(k)'] / + self.statistics['kshell'])) + self.statistics['Re'] = (self.statistics['Uint'] * + self.statistics['Lint'] / + self.parameters['nu']) self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] self.statistics['Taylor_microscale'] = self.statistics['lambda'] return None @@ -371,12 +423,11 @@ class DNS(_code): return None def write_par( self, - iter0 = 0, - particle_ic = None): + iter0 = 0): assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0) assert (self.parameters['niter_todo'] % self.parameters['niter_out'] == 0) assert (self.parameters['niter_out'] % self.parameters['niter_stat'] == 0) - if self.dns_type in ['NSVEparticles_no_output', 'NSVEparticles']: + if self.dns_type in ['NSVEparticles_no_output', 'NSVEcomplex_particles', 'NSVEparticles']: assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0) assert (self.parameters['niter_out'] % self.parameters['niter_part'] == 0) _code.write_par(self, iter0 = iter0) @@ -419,36 +470,8 @@ class DNS(_code): 4), dtype = np.int64) ofile['checkpoint'] = int(0) - if self.dns_type in ['NSVE', 'NSVE_no_output']: + if (self.dns_type in ['NSVE', 'NSVE_no_output']): return None - - if type(particle_ic) == type(None): - pbase_shape = (self.parameters['nparticles'],) - number_of_particles = self.parameters['nparticles'] - else: - pbase_shape = particle_ic.shape[:-1] - assert(particle_ic.shape[-1] == 3) - number_of_particles = 1 - for val in pbase_shape[1:]: - number_of_particles *= val - with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile: - s = 0 - ofile.create_group('tracers{0}'.format(s)) - ofile.create_group('tracers{0}/rhs'.format(s)) - ofile.create_group('tracers{0}/state'.format(s)) - ofile['tracers{0}/rhs'.format(s)].create_dataset( - '0', - shape = ( - (self.parameters['tracers{0}_integration_steps'.format(s)],) + - pbase_shape + - (3,)), - dtype = np.float) - ofile['tracers{0}/state'.format(s)].create_dataset( - '0', - shape = ( - pbase_shape + - (3,)), - dtype = np.float) return None def job_parser_arguments( self, @@ -472,6 +495,10 @@ class DNS(_code): metavar = 'NTHREADS_PER_PROCESS', help = 'number of threads to use per MPI process', default = 1) + parser.add_argument( + '--no-debug', + action = 'store_true', + dest = 'no_debug') parser.add_argument( '--no-submit', action = 'store_true', @@ -602,28 +629,32 @@ class DNS(_code): parser_NSVEparticles_no_output = subparsers.add_parser( 'NSVEparticles_no_output', help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers, checkpoints are NOT SAVED') - self.simulation_parser_arguments(parser_NSVEparticles_no_output) - self.job_parser_arguments(parser_NSVEparticles_no_output) - self.particle_parser_arguments(parser_NSVEparticles_no_output) - self.parameters_to_parser_arguments(parser_NSVEparticles_no_output) - self.parameters_to_parser_arguments( - parser_NSVEparticles_no_output, - self.NSVEp_extra_parameters) parser_NSVEp2 = subparsers.add_parser( 'NSVEparticles', help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers') - self.simulation_parser_arguments(parser_NSVEp2) - self.job_parser_arguments(parser_NSVEp2) - self.particle_parser_arguments(parser_NSVEp2) - self.parameters_to_parser_arguments(parser_NSVEp2) - self.parameters_to_parser_arguments( - parser_NSVEp2, - self.NSVEp_extra_parameters) + + parser_NSVEp2p = subparsers.add_parser( + 'NSVEcomplex_particles', + help = 'plain Navier-Stokes vorticity formulation, with oriented active particles') + + parser_NSVEp_extra = subparsers.add_parser( + 'NSVEp_extra_sampling', + help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers, that sample velocity gradient, as well as pressure and its derivatives.') + + for parser in ['NSVEparticles_no_output', 'NSVEp2', 'NSVEp2p', 'NSVEp_extra']: + eval('self.simulation_parser_arguments({0})'.format('parser_' + parser)) + eval('self.job_parser_arguments({0})'.format('parser_' + parser)) + eval('self.particle_parser_arguments({0})'.format('parser_' + parser)) + eval('self.parameters_to_parser_arguments({0})'.format('parser_' + parser)) + eval('self.parameters_to_parser_arguments(' + 'parser_{0},' + 'self.NSVEp_extra_parameters)'.format(parser)) return None def prepare_launch( self, - args = []): + args = [], + extra_parameters = None): """Set up reasonable parameters. With the default Lundgren forcing applied in the band [2, 4], @@ -654,16 +685,13 @@ class DNS(_code): self.dns_type = opt.DNS_class self.name = self.dns_type + '-' + self.fluid_precision + '-v' + bfps.__version__ # merge parameters if needed - if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: + if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']: for k in self.NSVEp_extra_parameters.keys(): self.parameters[k] = self.NSVEp_extra_parameters[k] - self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) - self.parameters['dt'] = (opt.dtfactor / opt.n) - # custom famplitude for 288 and 576 - if opt.n == 288: - self.parameters['famplitude'] = 0.45 - elif opt.n == 576: - self.parameters['famplitude'] = 0.47 + if type(extra_parameters) != type(None): + if self.dns_type in extra_parameters.keys(): + for k in extra_parameters[self.dns_type].keys(): + self.parameters[k] = extra_parameters[self.dns_type][k] if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): self.parameters['niter_out'] = self.parameters['niter_todo'] if len(opt.src_work_dir) == 0: @@ -672,7 +700,7 @@ class DNS(_code): opt.dkx = 2. / opt.Lx if type(opt.dky) == type(None): opt.dky = 2. / opt.Ly - if type(opt.dkx) == type(None): + if type(opt.dkz) == type(None): opt.dkz = 2. / opt.Lz if type(opt.nx) == type(None): opt.nx = opt.n @@ -680,11 +708,49 @@ class DNS(_code): opt.ny = opt.n if type(opt.nz) == type(None): opt.nz = opt.n + if type(opt.fk0) == type(None): + opt.fk0 = self.parameters['fk0'] + if type(opt.fk1) == type(None): + opt.fk1 = self.parameters['fk1'] + if type(opt.injection_rate) == type(None): + opt.injection_rate = self.parameters['injection_rate'] + if type(opt.dealias_type) == type(None): + opt.dealias_type = self.parameters['dealias_type'] + if (opt.nx > opt.n or + opt.ny > opt.n or + opt.nz > opt.n): + opt.n = min(opt.nx, opt.ny, opt.nz) + print("Warning: '-n' parameter changed to minimum of nx, ny, nz. This affects the computation of nu.") + self.parameters['dt'] = (opt.dtfactor / opt.n) + self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) + # check value of kMax + kM = opt.n * 0.5 + if opt.dealias_type == 1: + kM *= 0.8 + # tweak forcing/viscosity based on forcint type + if opt.forcing_type == 'linear': + # custom famplitude for 288 and 576 + if opt.n == 288: + self.parameters['famplitude'] = 0.45 + elif opt.n == 576: + self.parameters['famplitude'] = 0.47 + elif opt.forcing_type == 'fixed_energy_injection_rate': + # use the fact that mean dissipation rate is equal to injection rate + self.parameters['nu'] = ( + opt.injection_rate * + (opt.kMeta / kM)**4)**(1./3) + elif opt.forcing_type == 'fixed_energy': + kf = 1. / (1./opt.fk0 + + 1./opt.fk1) + self.parameters['nu'] = ( + (opt.kMeta / kM)**(4./3) * + (np.pi / kf)**(1./3) * + (2*self.parameters['energy'] / 3)**0.5) if type(opt.checkpoints_per_file) == type(None): # hardcoded FFTW complex representation size field_size = 3*(opt.nx+2)*opt.ny*opt.nz*self.fluid_dtype.itemsize checkpoint_size = field_size - if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: + if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']: rhs_size = self.parameters['tracers0_integration_steps'] if type(opt.tracers0_integration_steps) != type(None): rhs_size = opt.tracers0_integration_steps @@ -708,28 +774,64 @@ class DNS(_code): return os.path.join( self.work_dir, self.simname + '_checkpoint_0.h5') + def get_checkpoint_fname(self, iteration = 0): + checkpoint = iteration // self.parameters['checkpoints_per_file'] + return os.path.join( + self.work_dir, + self.simname + '_checkpoint_{0}.h5'.format(checkpoint)) def generate_tracer_state( self, rseed = None, - species = 0): - with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: - dset = data_file[ - 'tracers{0}/state/0'.format(species)] - if not type(rseed) == type(None): - np.random.seed(rseed) - nn = self.parameters['nparticles'] - cc = int(0) - batch_size = int(1e6) - while nn > 0: - if nn > batch_size: - dset[cc*batch_size:(cc+1)*batch_size] = np.random.random( - (batch_size, 3))*2*np.pi - nn -= batch_size - else: - dset[cc*batch_size:cc*batch_size+nn] = np.random.random( - (nn, 3))*2*np.pi - nn = 0 - cc += 1 + species = 0, + integration_steps = None, + ncomponents = 3): + try: + if type(integration_steps) == type(None): + integration_steps = self.NSVEp_extra_parameters['tracers0_integration_steps'] + if 'tracers{0}_integration_steps'.format(species) in self.parameters.keys(): + integration_steps = self.parameters['tracers{0}_integration_steps'.format(species)] + if self.dns_type == 'NSVEcomplex_particles' and species == 0: + ncomponents = 6 + with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: + nn = self.parameters['nparticles'] + if not 'tracers{0}'.format(species) in data_file.keys(): + data_file.create_group('tracers{0}'.format(species)) + data_file.create_group('tracers{0}/rhs'.format(species)) + data_file.create_group('tracers{0}/state'.format(species)) + data_file['tracers{0}/rhs'.format(species)].create_dataset( + '0', + shape = (integration_steps, nn, ncomponents,), + dtype = np.float) + dset = data_file['tracers{0}/state'.format(species)].create_dataset( + '0', + shape = (nn, ncomponents,), + dtype = np.float) + if not type(rseed) == type(None): + np.random.seed(rseed) + cc = int(0) + batch_size = int(1e6) + def get_random_phases(npoints): + return np.random.random( + (npoints, 3))*2*np.pi + def get_random_versors(npoints): + bla = np.random.normal( + size = (npoints, 3)) + bla /= np.sum(bla**2, axis = 1)[:, None]**.5 + return bla + while nn > 0: + if nn > batch_size: + dset[cc*batch_size:(cc+1)*batch_size, :3] = get_random_phases(batch_size) + if dset.shape[1] == 6: + dset[cc*batch_size:(cc+1)*batch_size, 3:] = get_random_versors(batch_size) + nn -= batch_size + else: + dset[cc*batch_size:cc*batch_size+nn, :3] = get_random_phases(nn) + if dset.shape[1] == 6: + dset[cc*batch_size:cc*batch_size+nn, 3:] = get_random_versors(nn) + nn = 0 + cc += 1 + except Exception as e: + print(e) return None def generate_vector_field( self, @@ -774,21 +876,21 @@ class DNS(_code): """ np.random.seed(rseed) Kdata00 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx'], p = spectra_slope, amplitude = amplitude).astype(self.ctype) Kdata01 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx'], p = spectra_slope, amplitude = amplitude).astype(self.ctype) Kdata02 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, + self.parameters['nz'], + self.parameters['ny'], + self.parameters['nx'], p = spectra_slope, amplitude = amplitude).astype(self.ctype) Kdata0 = np.zeros( @@ -818,113 +920,128 @@ class DNS(_code): # the case of increasing/decreasing by the same factor in all directions. # in principle we could write something more generic, but i'm not sure # how complicated that would be - dst_shape = (self.parameters['nz'], - self.parameters['ny'], + dst_shape = (self.parameters['ny'], + self.parameters['nz'], (self.parameters['nx']+2) // 2, 3) src_file = h5py.File(src_file_name, 'r') if (src_file[src_dset_name].shape == dst_shape): - if make_link and (src_file[src_dset_name].dtype == self.ctype): - dst_file[dst_dset_name] = h5py.ExternalLink( - src_file_name, - src_dset_name) - else: - dst_file.create_dataset( - dst_dset_name, - shape = dst_shape, - dtype = self.ctype, - fillvalue = 0.0) - for kz in range(src_file[src_dset_name].shape[0]): - dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz] + dst_file[dst_dset_name] = h5py.ExternalLink( + src_file_name, + src_dset_name) else: - print('aloha') min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]), min(dst_shape[1], src_file[src_dset_name].shape[1]), min(dst_shape[2], src_file[src_dset_name].shape[2]), 3) - print(self.ctype) + src_shape = src_file[src_dset_name].shape dst_file.create_dataset( dst_dset_name, shape = dst_shape, dtype = np.dtype(self.ctype), fillvalue = complex(0)) - for kz in range(min_shape[0]): - dst_file[dst_dset_name][kz,:min_shape[1], :min_shape[2]] = \ - src_file[src_dset_name][kz, :min_shape[1], :min_shape[2]] + for kz in range(min_shape[0]//2): + dst_file[dst_dset_name][kz,:min_shape[1]//2, :min_shape[2]] = \ + src_file[src_dset_name][kz, :min_shape[1]//2, :min_shape[2]] + dst_file[dst_dset_name][kz, + dst_shape[1] - min_shape[1]//2+1:, + :min_shape[2]] = \ + src_file[src_dset_name][kz, + src_shape[1] - min_shape[1]//2+1, + :min_shape[2]] + if kz > 0: + dst_file[dst_dset_name][-kz,:min_shape[1]//2, :min_shape[2]] = \ + src_file[src_dset_name][-kz, :min_shape[1]//2, :min_shape[2]] + dst_file[dst_dset_name][-kz, + dst_shape[1] - min_shape[1]//2+1:, + :min_shape[2]] = \ + src_file[src_dset_name][-kz, + src_shape[1] - min_shape[1]//2+1, + :min_shape[2]] + return None + def generate_particle_data( + self, + opt = None): + if self.parameters['nparticles'] > 0: + self.generate_tracer_state( + species = 0, + rseed = opt.particle_rand_seed) + if not os.path.exists(self.get_particle_file_name()): + with h5py.File(self.get_particle_file_name(), 'w') as particle_file: + particle_file.create_group('tracers0/position') + particle_file.create_group('tracers0/velocity') + particle_file.create_group('tracers0/acceleration') + if self.dns_type in ['NSVEcomplex_particles']: + particle_file.create_group('tracers0/orientation') + particle_file.create_group('tracers0/velocity_gradient') + if self.dns_type in ['NSVEp_extra_sampling']: + particle_file.create_group('tracers0/velocity_gradient') + particle_file.create_group('tracers0/pressure') + particle_file.create_group('tracers0/pressure_gradient') + particle_file.create_group('tracers0/pressure_Hessian') + return None + def generate_initial_condition( + self, + opt = None): + # take care of fields' initial condition + # first, check if initial field exists + need_field = False + if not os.path.exists(self.get_checkpoint_0_fname()): + need_field = True + else: + f = h5py.File(self.get_checkpoint_0_fname(), 'r') + try: + dset = f['vorticity/complex/0'] + need_field = (dset.shape == (self.parameters['ny'], + self.parameters['nz'], + self.parameters['nx']//2+1, + 3)) + except: + need_field = True + f.close() + if need_field: + f = h5py.File(self.get_checkpoint_0_fname(), 'a') + if len(opt.src_simname) > 0: + source_cp = 0 + src_file = 'not_a_file' + while True: + src_file = os.path.join( + os.path.realpath(opt.src_work_dir), + opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) + f0 = h5py.File(src_file, 'r') + if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): + f0.close() + break + source_cp += 1 + self.copy_complex_field( + src_file, + 'vorticity/complex/{0}'.format(opt.src_iteration), + f, + 'vorticity/complex/{0}'.format(0)) + else: + data = self.generate_vector_field( + write_to_file = False, + spectra_slope = 2.0, + amplitude = 0.05) + f['vorticity/complex/{0}'.format(0)] = data + f.close() + # now take care of particles' initial condition + if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']: + self.generate_particle_data(opt = opt) return None def launch_jobs( self, - opt = None, - particle_initial_condition = None): - if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): - # take care of fields' initial condition - if not os.path.exists(self.get_checkpoint_0_fname()): - f = h5py.File(self.get_checkpoint_0_fname(), 'w') - if len(opt.src_simname) > 0: - source_cp = 0 - src_file = 'not_a_file' - while True: - src_file = os.path.join( - os.path.realpath(opt.src_work_dir), - opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) - f0 = h5py.File(src_file, 'r') - if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): - f0.close() - break - source_cp += 1 - self.copy_complex_field( - src_file, - 'vorticity/complex/{0}'.format(opt.src_iteration), - f, - 'vorticity/complex/{0}'.format(0)) - else: - data = self.generate_vector_field( - write_to_file = False, - spectra_slope = 2.0, - amplitude = 0.05) - f['vorticity/complex/{0}'.format(0)] = data - f.close() - ## take care of particles' initial condition - #if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: - # if opt.pclouds > 1: - # np.random.seed(opt.particle_rand_seed) - # if opt.pcloud_type == 'random-cube': - # particle_initial_condition = ( - # np.random.random((opt.pclouds, 1, 3))*2*np.pi + - # np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) - # elif opt.pcloud_type == 'regular-cube': - # onedarray = np.linspace( - # -opt.particle_cloud_size/2, - # opt.particle_cloud_size/2, - # self.parameters['nparticles']) - # particle_initial_condition = np.zeros( - # (opt.pclouds, - # self.parameters['nparticles'], - # self.parameters['nparticles'], - # self.parameters['nparticles'], 3), - # dtype = np.float64) - # particle_initial_condition[:] = \ - # np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi - # particle_initial_condition[..., 0] += onedarray[None, None, None, :] - # particle_initial_condition[..., 1] += onedarray[None, None, :, None] - # particle_initial_condition[..., 2] += onedarray[None, :, None, None] - self.write_par( - particle_ic = None) - if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: - if self.parameters['nparticles'] > 0: - self.generate_tracer_state( - species = 0, - rseed = opt.particle_rand_seed) - if not os.path.exists(self.get_particle_file_name()): - with h5py.File(self.get_particle_file_name(), 'w') as particle_file: - particle_file.create_group('tracers0/velocity') - particle_file.create_group('tracers0/acceleration') + opt = None): + if not os.path.exists(self.get_data_file_name()): + self.generate_initial_condition(opt = opt) + self.write_par() self.run( nb_processes = opt.nb_processes, nb_threads_per_process = opt.nb_threads_per_process, njobs = opt.njobs, hours = opt.minutes // 60, minutes = opt.minutes % 60, - no_submit = opt.no_submit) + no_submit = opt.no_submit, + no_debug = opt.no_debug) return None diff --git a/bfps/FluidConvert.py b/bfps/FluidConvert.py deleted file mode 100644 index 58d19116bfb8ab386ef9783babb2ad8da79760e4..0000000000000000000000000000000000000000 --- a/bfps/FluidConvert.py +++ /dev/null @@ -1,140 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import numpy as np -import pickle -import os -from ._fluid_base import _fluid_particle_base -from ._base import _base -import bfps - -class FluidConvert(_fluid_particle_base): - """This class is meant to be used for conversion of native DNS field - representations to real-space representations of velocity/vorticity - fields. - It may be superseeded by streamlined functionality in the future... - """ - def __init__( - self, - name = 'FluidConvert-v' + bfps.__version__, - work_dir = './', - simname = 'test', - fluid_precision = 'single', - use_fftw_wisdom = False): - _fluid_particle_base.__init__( - self, - name = name + '-' + fluid_precision, - work_dir = work_dir, - simname = simname, - dtype = fluid_precision, - use_fftw_wisdom = use_fftw_wisdom) - self.spec_parameters = {} - self.spec_parameters['write_rvelocity'] = 1 - self.spec_parameters['write_rvorticity'] = 1 - self.spec_parameters['write_rTrS2'] = 1 - self.spec_parameters['write_renstrophy'] = 1 - self.spec_parameters['write_rpressure'] = 1 - self.spec_parameters['iter0'] = 0 - self.spec_parameters['iter1'] = -1 - self.fill_up_fluid_code() - self.finalize_code(postprocess_mode = True) - return None - def fill_up_fluid_code(self): - self.definitions += self.cread_pars( - parameters = self.spec_parameters, - function_suffix = '_specific', - file_group = 'conversion_parameters') - self.variables += self.cdef_pars( - parameters = self.spec_parameters) - self.main_start += 'read_parameters_specific();\n' - self.fluid_includes += '#include <cstring>\n' - self.fluid_variables += ('double t;\n' + - 'fluid_solver<{0}> *fs;\n').format(self.C_dtype) - self.fluid_definitions += """ - //begincpp - void do_conversion(fluid_solver<{0}> *bla) - {{ - bla->read('v', 'c'); - if (write_rvelocity) - bla->write('u', 'r'); - if (write_rvorticity) - bla->write('v', 'r'); - if (write_rTrS2) - bla->write_rTrS2(); - if (write_renstrophy) - bla->write_renstrophy(); - if (write_rpressure) - bla->write_rpressure(); - }} - //endcpp - """.format(self.C_dtype) - self.fluid_start += """ - //begincpp - fs = new fluid_solver<{0}>( - simname, - nx, ny, nz, - dkx, dky, dkz, - dealias_type, - DEFAULT_FFTW_FLAG); - //endcpp - """.format(self.C_dtype) - self.fluid_loop += """ - //begincpp - fs->iteration = frame_index; - do_conversion(fs); - //endcpp - """ - self.fluid_end += 'delete fs;\n' - return None - def specific_parser_arguments( - self, - parser): - _fluid_particle_base.specific_parser_arguments(self, parser) - self.parameters_to_parser_arguments( - parser, - parameters = self.spec_parameters) - return None - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args) - if opt.iter1 == -1: - opt.iter1 = self.get_data_file()['iteration'].value - self.pars_from_namespace( - opt, - parameters = self.spec_parameters) - self.rewrite_par( - group = 'conversion_parameters', - parameters = self.spec_parameters) - self.run(opt.nb_processes, - 1, - hours = opt.minutes // 60, - minutes = opt.minutes % 60, - err_file = 'err_convert', - out_file = 'out_convert') - return None - diff --git a/bfps/FluidResize.py b/bfps/FluidResize.py deleted file mode 100644 index fb5e26208f6960d447bc927bd9e207354620d188..0000000000000000000000000000000000000000 --- a/bfps/FluidResize.py +++ /dev/null @@ -1,156 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import os - -import bfps -from ._fluid_base import _fluid_particle_base - -class FluidResize(_fluid_particle_base): - """This class is meant to resize snapshots of DNS states to new grids. - Typical stuff for DNS of turbulence. - It will become superfluous when HDF5 is used for field I/O. - """ - def __init__( - self, - name = 'FluidResize-v' + bfps.__version__, - work_dir = './', - simname = 'test', - fluid_precision = 'single', - use_fftw_wisdom = False): - _fluid_particle_base.__init__( - self, - name = name + '-' + fluid_precision, - work_dir = work_dir, - simname = simname, - dtype = fluid_precision, - use_fftw_wisdom = use_fftw_wisdom) - self.parameters['src_simname'] = 'test' - self.parameters['dst_iter'] = 0 - self.parameters['dst_nx'] = 32 - self.parameters['dst_ny'] = 32 - self.parameters['dst_nz'] = 32 - self.parameters['dst_simname'] = 'new_test' - self.parameters['dst_dkx'] = 1.0 - self.parameters['dst_dky'] = 1.0 - self.parameters['dst_dkz'] = 1.0 - self.fill_up_fluid_code() - self.finalize_code() - return None - def fill_up_fluid_code(self): - self.fluid_includes += '#include <cstring>\n' - self.fluid_includes += '#include "fftw_tools.hpp"\n' - self.fluid_variables += ('double t;\n' + - 'fluid_solver<' + self.C_dtype + '> *fs0, *fs1;\n') - self.fluid_start += """ - //begincpp - char fname[512]; - fs0 = new fluid_solver<{0}>( - src_simname, - nx, ny, nz, - dkx, dky, dkz); - fs1 = new fluid_solver<{0}>( - dst_simname, - dst_nx, dst_ny, dst_nz, - dst_dkx, dst_dky, dst_dkz); - fs0->iteration = iteration; - fs1->iteration = 0; - DEBUG_MSG("about to read field\\n"); - fs0->read('v', 'c'); - DEBUG_MSG("field read, about to copy data\\n"); - double a, b; - fs0->compute_velocity(fs0->cvorticity); - a = 0.5*fs0->autocorrel(fs0->cvelocity); - b = 0.5*fs0->autocorrel(fs0->cvorticity); - DEBUG_MSG("old field %d %g %g\\n", fs0->iteration, a, b); - copy_complex_array<{0}>(fs0->cd, fs0->cvorticity, - fs1->cd, fs1->cvorticity, - 3); - DEBUG_MSG("data copied, about to write new field\\n"); - fs1->write('v', 'c'); - DEBUG_MSG("finished writing\\n"); - fs1->compute_velocity(fs1->cvorticity); - a = 0.5*fs1->autocorrel(fs1->cvelocity); - b = 0.5*fs1->autocorrel(fs1->cvorticity); - DEBUG_MSG("new field %d %g %g\\n", fs1->iteration, a, b); - //endcpp - """.format(self.C_dtype) - self.fluid_end += """ - //begincpp - delete fs0; - delete fs1; - //endcpp - """ - return None - def specific_parser_arguments( - self, - parser): - _fluid_particle_base.specific_parser_arguments(self, parser) - parser.add_argument( - '-m', - type = int, - dest = 'm', - default = 32, - metavar = 'M', - help = 'resize from N to M') - parser.add_argument( - '--src_wd', - type = str, - dest = 'src_work_dir', - required = True) - parser.add_argument( - '--src_iteration', - type = int, - dest = 'src_iteration', - required = True) - return None - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args) - cmd_line_pars = vars(opt) - for k in ['dst_nx', 'dst_ny', 'dst_nz']: - if type(cmd_line_pars[k]) == type(None): - cmd_line_pars[k] = opt.m - # the 3 dst_ni have been updated in opt itself at this point - # I'm not sure if this code is future-proof... - self.parameters['niter_todo'] = 0 - self.pars_from_namespace(opt) - src_file = os.path.join( - os.path.realpath(opt.src_work_dir), - opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration)) - read_file = os.path.join( - self.work_dir, - opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration)) - self.write_par(iter0 = opt.src_iteration) - if not os.path.exists(read_file): - os.symlink(src_file, read_file) - self.run(ncpu = opt.ncpu, - hours = opt.minutes // 60, - minutes = opt.minutes % 60) - return None - diff --git a/bfps/NSManyParticles.py b/bfps/NSManyParticles.py deleted file mode 100644 index 03f7345f61b27299bd2da60ea0c4d44924112837..0000000000000000000000000000000000000000 --- a/bfps/NSManyParticles.py +++ /dev/null @@ -1,92 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import bfps - -class NSManyParticles(bfps.NavierStokes): - def specific_parser_arguments( - self, - parser): - bfps.NavierStokes.specific_parser_arguments(self, parser) - parser.add_argument( - '--particle-class', - default = 'rFFTW_distributed_particles', - dest = 'particle_class', - type = str) - parser.add_argument( - '--interpolator-class', - default = 'rFFTW_interpolator', - dest = 'interpolator_class', - type = str) - parser.add_argument('--neighbours', - type = int, - dest = 'neighbours', - default = 3) - parser.add_argument('--smoothness', - type = int, - dest = 'smoothness', - default = 2) - return None - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args = args) - self.fill_up_fluid_code() - if type(opt.nparticles) == int: - if opt.nparticles > 0: - self.add_3D_rFFTW_field( - name = 'rFFTW_acc') - interp_list = [] - for n in range(1, opt.neighbours): - interp_list.append('Lagrange_n{0}'.format(n)) - self.add_interpolator( - interp_type = 'Lagrange', - name = interp_list[-1], - neighbours = n, - class_name = opt.interpolator_class) - for m in range(1, opt.smoothness): - interp_list.append('spline_n{0}m{1}'.format(n, m)) - self.add_interpolator( - interp_type = 'spline', - name = interp_list[-1], - neighbours = n, - smoothness = m, - class_name = opt.interpolator_class) - self.add_particles( - integration_steps = 2, - interpolator = interp_list, - acc_name = 'rFFTW_acc', - class_name = opt.particle_class) - self.add_particles( - integration_steps = 4, - interpolator = interp_list, - acc_name = 'rFFTW_acc', - class_name = opt.particle_class) - self.finalize_code() - self.launch_jobs(opt = opt) - return None - diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py deleted file mode 100644 index 5f87097fefbb56f731a75597395d42423fc17ba6..0000000000000000000000000000000000000000 --- a/bfps/NSVorticityEquation.py +++ /dev/null @@ -1,864 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import sys -import os -import numpy as np -import h5py -import argparse - -import bfps -import bfps.tools -from bfps._code import _code -from bfps._fluid_base import _fluid_particle_base - -class NSVorticityEquation(_fluid_particle_base): - def __init__( - self, - name = 'NSVorticityEquation-v' + bfps.__version__, - work_dir = './', - simname = 'test', - fluid_precision = 'single', - fftw_plan_rigor = 'FFTW_MEASURE', - use_fftw_wisdom = True): - """ - This code uses checkpoints for DNS restarts, and it can be stopped - by creating the file "stop_<simname>" in the working directory. - For postprocessing of field snapshots, consider creating a separate - HDF5 file (from the python wrapper) which contains links to all the - different snapshots. - """ - self.fftw_plan_rigor = fftw_plan_rigor - _fluid_particle_base.__init__( - self, - name = name + '-' + fluid_precision, - work_dir = work_dir, - simname = simname, - dtype = fluid_precision, - use_fftw_wisdom = use_fftw_wisdom) - self.parameters['nu'] = float(0.1) - self.parameters['fmode'] = 1 - self.parameters['famplitude'] = float(0.5) - self.parameters['fk0'] = float(2.0) - self.parameters['fk1'] = float(4.0) - self.parameters['forcing_type'] = 'linear' - self.parameters['histogram_bins'] = int(256) - self.parameters['max_velocity_estimate'] = float(1) - self.parameters['max_vorticity_estimate'] = float(1) - self.parameters['checkpoints_per_file'] = int(1) - self.file_datasets_grow = """ - //begincpp - hid_t group; - group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT); - H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL); - H5Gclose(group); - //endcpp - """ - self.style = {} - self.statistics = {} - self.fluid_output = """ - fs->io_checkpoint(false); - """ - # vorticity_equation specific things - self.includes += '#include "vorticity_equation.hpp"\n' - self.store_kspace = """ - //begincpp - if (myrank == 0 && iteration == 0) - { - TIMEZONE("fluid_base::store_kspace"); - hsize_t dims[4]; - hid_t space, dset; - // store kspace information - dset = H5Dopen(stat_file, "/kspace/kshell", H5P_DEFAULT); - space = H5Dget_space(dset); - H5Sget_simple_extent_dims(space, dims, NULL); - H5Sclose(space); - if (fs->kk->nshells != dims[0]) - { - DEBUG_MSG( - "ERROR: computed nshells %d not equal to data file nshells %d\\n", - fs->kk->nshells, dims[0]); - } - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kshell.front()); - H5Dclose(dset); - dset = H5Dopen(stat_file, "/kspace/nshell", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->nshell.front()); - H5Dclose(dset); - dset = H5Dopen(stat_file, "/kspace/kM", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kM); - H5Dclose(dset); - dset = H5Dopen(stat_file, "/kspace/dk", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->dk); - H5Dclose(dset); - } - //endcpp - """ - return None - def add_particles( - self, - integration_steps = 2, - neighbours = 1, - smoothness = 1): - assert(integration_steps > 0 and integration_steps < 6) - self.particle_species = 1 - self.parameters['tracers0_integration_steps'] = int(integration_steps) - self.parameters['tracers0_neighbours'] = int(neighbours) - self.parameters['tracers0_smoothness'] = int(smoothness) - self.parameters['tracers0_interpolator'] = 'spline' - self.particle_includes += """ - #include "particles/particles_system_builder.hpp" - #include "particles/particles_output_hdf5.hpp" - """ - ## initialize - self.particle_start += """ - DEBUG_MSG( - "current fname is %s\\n and iteration is %d", - fs->get_current_fname().c_str(), - fs->iteration); - std::unique_ptr<abstract_particles_system<long long int, double>> ps = particles_system_builder( - fs->cvelocity, // (field object) - fs->kk, // (kspace object, contains dkx, dky, dkz) - tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) - (long long int)nparticles, // to check coherency between parameters and hdf input file - fs->get_current_fname(), // particles input filename - std::string("/tracers0/state/") + std::to_string(fs->iteration), // dataset name for initial input - std::string("/tracers0/rhs/") + std::to_string(fs->iteration), // dataset name for initial input - tracers0_neighbours, // parameter (interpolation no neighbours) - tracers0_smoothness, // parameter - MPI_COMM_WORLD, - fs->iteration+1); - particles_output_hdf5<long long int, double,3,3> particles_output_writer_mpi( - MPI_COMM_WORLD, - "tracers0", - nparticles, - tracers0_integration_steps); - """ - self.particle_loop += """ - fs->compute_velocity(fs->cvorticity); - fs->cvelocity->ift(); - ps->completeLoop(dt); - """ - self.particle_output = """ - { - particles_output_writer_mpi.open_file(fs->get_current_fname()); - particles_output_writer_mpi.save(ps->getParticlesPositions(), - ps->getParticlesRhs(), - ps->getParticlesIndexes(), - ps->getLocalNbParticles(), - fs->iteration); - particles_output_writer_mpi.close_file(); - } - """ - self.particle_end += 'ps.release();\n' - return None - def create_stat_output( - self, - dset_name, - data_buffer, - data_type = 'H5T_NATIVE_DOUBLE', - size_setup = None, - close_spaces = True): - new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name) - if not type(size_setup) == type(None): - new_stat_output_txt += ( - size_setup + - 'wspace = H5Dget_space(Cdset);\n' + - 'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' + - 'mspace = H5Screate_simple(ndims, count, NULL);\n' + - 'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n') - new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' + - 'H5Dclose(Cdset);\n').format(data_type, data_buffer) - if close_spaces: - new_stat_output_txt += ('H5Sclose(mspace);\n' + - 'H5Sclose(wspace);\n') - return new_stat_output_txt - def write_fluid_stats(self): - self.fluid_includes += '#include <cmath>\n' - self.fluid_includes += '#include "fftw_tools.hpp"\n' - self.stat_src += """ - //begincpp - hid_t stat_group; - if (myrank == 0) - stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT); - fs->compute_velocity(fs->cvorticity); - *tmp_vec_field = fs->cvelocity->get_cdata(); - tmp_vec_field->compute_stats( - fs->kk, - stat_group, - "velocity", - fs->iteration / niter_stat, - max_velocity_estimate/sqrt(3)); - //endcpp - """ - self.stat_src += """ - //begincpp - *tmp_vec_field = fs->cvorticity->get_cdata(); - tmp_vec_field->compute_stats( - fs->kk, - stat_group, - "vorticity", - fs->iteration / niter_stat, - max_vorticity_estimate/sqrt(3)); - //endcpp - """ - self.stat_src += """ - //begincpp - if (myrank == 0) - H5Gclose(stat_group); - if (myrank == 0) - {{ - hid_t Cdset, wspace, mspace; - int ndims; - hsize_t count[4], offset[4], dims[4]; - offset[0] = fs->iteration/niter_stat; - offset[1] = 0; - offset[2] = 0; - offset[3] = 0; - //endcpp - """.format(self.C_dtype) - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - self.stat_src += self.create_stat_output( - '/statistics/xlines/velocity', - 'fs->rvelocity->get_rdata()', - data_type = field_H5T, - size_setup = """ - count[0] = 1; - count[1] = nx; - count[2] = 3; - """, - close_spaces = False) - self.stat_src += self.create_stat_output( - '/statistics/xlines/vorticity', - 'fs->rvorticity->get_rdata()', - data_type = field_H5T) - self.stat_src += '}\n' - ## checkpoint - self.stat_src += """ - //begincpp - if (myrank == 0) - { - std::string fname = ( - std::string("stop_") + - std::string(simname)); - { - struct stat file_buffer; - stop_code_now = (stat(fname.c_str(), &file_buffer) == 0); - } - } - MPI_Bcast(&stop_code_now, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); - //endcpp - """ - return None - def fill_up_fluid_code(self): - self.fluid_includes += '#include <cstring>\n' - self.fluid_variables += ( - 'vorticity_equation<{0}, FFTW> *fs;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype)) - self.fluid_definitions += """ - typedef struct {{ - {0} re; - {0} im; - }} tmp_complex_type; - """.format(self.C_dtype) - self.write_fluid_stats() - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - self.variables += 'int checkpoint;\n' - self.variables += 'bool stop_code_now;\n' - self.read_checkpoint = """ - //begincpp - if (myrank == 0) - { - hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT); - H5Dread( - dset, - H5T_NATIVE_INT, - H5S_ALL, - H5S_ALL, - H5P_DEFAULT, - &checkpoint); - H5Dclose(dset); - } - MPI_Bcast(&checkpoint, 1, MPI_INT, 0, MPI_COMM_WORLD); - fs->checkpoint = checkpoint; - //endcpp - """ - self.store_checkpoint = """ - //begincpp - checkpoint = fs->checkpoint; - if (myrank == 0) - { - hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT); - H5Dwrite( - dset, - H5T_NATIVE_INT, - H5S_ALL, - H5S_ALL, - H5P_DEFAULT, - &checkpoint); - H5Dclose(dset); - } - //endcpp - """ - self.fluid_start += """ - //begincpp - char fname[512]; - fs = new vorticity_equation<{0}, FFTW>( - simname, - nx, ny, nz, - dkx, dky, dkz, - {1}); - tmp_vec_field = new field<{0}, FFTW, THREE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - tmp_scal_field = new field<{0}, FFTW, ONE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - fs->checkpoints_per_file = checkpoints_per_file; - fs->nu = nu; - fs->fmode = fmode; - fs->famplitude = famplitude; - fs->fk0 = fk0; - fs->fk1 = fk1; - strncpy(fs->forcing_type, forcing_type, 128); - fs->iteration = iteration; - {2} - fs->cvorticity->real_space_representation = false; - fs->io_checkpoint(); - //endcpp - """.format( - self.C_dtype, - self.fftw_plan_rigor, - self.read_checkpoint) - self.fluid_start += self.store_kspace - self.fluid_start += 'stop_code_now = false;\n' - self.fluid_loop = 'fs->step(dt);\n' - self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' + - self.fluid_output + - self.particle_output + - self.store_checkpoint + - '\n}\n' + - 'if (stop_code_now){\n' + - 'iteration = fs->iteration;\n' + - 'break;\n}\n') - self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' + - self.fluid_output + - self.particle_output + - self.store_checkpoint + - 'DEBUG_MSG("checkpoint value is %d\\n", checkpoint);\n' + - '\n}\n' + - 'delete fs;\n' + - 'delete tmp_vec_field;\n' + - 'delete tmp_scal_field;\n') - return None - def get_postprocess_file_name(self): - return os.path.join(self.work_dir, self.simname + '_postprocess.h5') - def get_postprocess_file(self): - return h5py.File(self.get_postprocess_file_name(), 'r') - def compute_statistics(self, iter0 = 0, iter1 = None): - """Run basic postprocessing on raw data. - The energy spectrum :math:`E(t, k)` and the enstrophy spectrum - :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the - - .. math:: - - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* - - tensors, and the enstrophy spectrum is also used to - compute the dissipation :math:`\\varepsilon(t)`. - These basic quantities are stored in a newly created HDF5 file, - ``simname_postprocess.h5``. - """ - if len(list(self.statistics.keys())) > 0: - return None - self.read_parameters() - with self.get_data_file() as data_file: - if 'moments' not in data_file['statistics'].keys(): - return None - iter0 = min((data_file['statistics/moments/velocity'].shape[0] * - self.parameters['niter_stat']-1), - iter0) - if type(iter1) == type(None): - iter1 = data_file['iteration'].value - else: - iter1 = min(data_file['iteration'].value, iter1) - ii0 = iter0 // self.parameters['niter_stat'] - ii1 = iter1 // self.parameters['niter_stat'] - self.statistics['kshell'] = data_file['kspace/kshell'].value - self.statistics['kM'] = data_file['kspace/kM'].value - self.statistics['dk'] = data_file['kspace/dk'].value - computation_needed = True - pp_file = h5py.File(self.get_postprocess_file_name(), 'a') - if 'ii0' in pp_file.keys(): - computation_needed = not (ii0 == pp_file['ii0'].value and - ii1 == pp_file['ii1'].value) - if computation_needed: - for k in pp_file.keys(): - del pp_file[k] - if computation_needed: - pp_file['iter0'] = iter0 - pp_file['iter1'] = iter1 - pp_file['ii0'] = ii0 - pp_file['ii1'] = ii1 - pp_file['t'] = (self.parameters['dt']* - self.parameters['niter_stat']* - (np.arange(ii0, ii1+1).astype(np.float))) - pp_file['energy(t, k)'] = ( - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['enstrophy(t, k)'] = ( - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] - pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 - for k in ['t', - 'energy(t, k)', - 'enstrophy(t, k)', - 'vel_max(t)', - 'renergy(t)']: - if k in pp_file.keys(): - self.statistics[k] = pp_file[k].value - self.compute_time_averages() - return None - def compute_time_averages(self): - """Compute easy stats. - - Further computation of statistics based on the contents of - ``simname_postprocess.h5``. - Standard quantities are as follows - (consistent with [Ishihara]_): - - .. math:: - - U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm - L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm - T_{\\textrm{int}}(t) = - \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} - - \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm - \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm - \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} - - Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip - .5cm - R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} - - .. [Ishihara] T. Ishihara et al, - *Small-scale statistics in high-resolution direct numerical - simulation of turbulence: Reynolds number dependence of - one-point velocity gradient statistics*. - J. Fluid Mech., - **592**, 335-366, 2007 - """ - for key in ['energy', 'enstrophy']: - self.statistics[key + '(t)'] = (self.statistics['dk'] * - np.sum(self.statistics[key + '(t, k)'], axis = 1)) - self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) - self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / - (2*self.statistics['Uint(t)']**2)) * - np.nansum(self.statistics['energy(t, k)'] / - self.statistics['kshell'][None, :], axis = 1)) - for key in ['energy', - 'enstrophy', - 'vel_max', - 'Uint', - 'Lint']: - if key + '(t)' in self.statistics.keys(): - self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) - for suffix in ['', '(t)']: - self.statistics['diss' + suffix] = (self.parameters['nu'] * - self.statistics['enstrophy' + suffix]*2) - self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / - self.statistics['diss' + suffix])**.25 - self.statistics['tauK' + suffix] = (self.parameters['nu'] / - self.statistics['diss' + suffix])**.5 - self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['Lint' + suffix] / - self.parameters['nu']) - self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * - self.statistics['Uint' + suffix]**2 / - self.statistics['diss' + suffix])**.5 - self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['lambda' + suffix] / - self.parameters['nu']) - self.statistics['kMeta' + suffix] = (self.statistics['kM'] * - self.statistics['etaK' + suffix]) - if self.parameters['dealias_type'] == 1: - self.statistics['kMeta' + suffix] *= 0.8 - self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] - self.statistics['Taylor_microscale'] = self.statistics['lambda'] - return None - def set_plt_style( - self, - style = {'dashes' : (None, None)}): - self.style.update(style) - return None - def convert_complex_from_binary( - self, - field_name = 'vorticity', - iteration = 0, - file_name = None): - """read the Fourier representation of a vector field. - - Read the binary file containing iteration ``iteration`` of the - field ``field_name``, and write it in a ``.h5`` file. - """ - data = np.memmap( - os.path.join(self.work_dir, - self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), - dtype = self.ctype, - mode = 'r', - shape = (self.parameters['ny'], - self.parameters['nz'], - self.parameters['nx']//2+1, - 3)) - if type(file_name) == type(None): - file_name = self.simname + '_{0}_i{1:0>5x}.h5'.format('c' + field_name, iteration) - file_name = os.path.join(self.work_dir, file_name) - f = h5py.File(file_name, 'a') - f[field_name + '/complex/{0}'.format(iteration)] = data - f.close() - return None - def write_par( - self, - iter0 = 0, - particle_ic = None): - _fluid_particle_base.write_par(self, iter0 = iter0) - with h5py.File(self.get_data_file_name(), 'r+') as ofile: - kspace = self.get_kspace() - nshells = kspace['nshell'].shape[0] - vec_stat_datasets = ['velocity', 'vorticity'] - scal_stat_datasets = [] - for k in vec_stat_datasets: - time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/xlines/' + k, - (1, self.parameters['nx'], 3), - chunks = (time_chunk, self.parameters['nx'], 3), - maxshape = (None, self.parameters['nx'], 3), - dtype = self.dtype) - for k in vec_stat_datasets: - time_chunk = 2**20//(8*3*3*nshells) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/spectra/' + k + '_' + k, - (1, nshells, 3, 3), - chunks = (time_chunk, nshells, 3, 3), - maxshape = (None, nshells, 3, 3), - dtype = np.float64) - time_chunk = 2**20//(8*4*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/' + k, - (1, 10, 4), - chunks = (time_chunk, 10, 4), - maxshape = (None, 10, 4), - dtype = np.float64) - time_chunk = 2**20//(8*4*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/' + k, - (1, - self.parameters['histogram_bins'], - 4), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 4), - maxshape = (None, - self.parameters['histogram_bins'], - 4), - dtype = np.int64) - ofile['checkpoint'] = int(0) - if self.particle_species == 0: - return None - - if type(particle_ic) == type(None): - pbase_shape = (self.parameters['nparticles'],) - number_of_particles = self.parameters['nparticles'] - else: - pbase_shape = particle_ic.shape[:-1] - assert(particle_ic.shape[-1] == 3) - number_of_particles = 1 - for val in pbase_shape[1:]: - number_of_particles *= val - with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile: - s = 0 - ofile.create_group('tracers{0}'.format(s)) - ofile.create_group('tracers{0}/rhs'.format(s)) - ofile.create_group('tracers{0}/state'.format(s)) - ofile['tracers{0}/rhs'.format(s)].create_dataset( - '0', - shape = ( - (self.parameters['tracers{0}_integration_steps'.format(s)],) + - pbase_shape + - (3,)), - dtype = np.float) - ofile['tracers{0}/state'.format(s)].create_dataset( - '0', - shape = ( - pbase_shape + - (3,)), - dtype = np.float) - return None - def specific_parser_arguments( - self, - parser): - _fluid_particle_base.specific_parser_arguments(self, parser) - parser.add_argument( - '--src-wd', - type = str, - dest = 'src_work_dir', - default = '') - parser.add_argument( - '--src-simname', - type = str, - dest = 'src_simname', - default = '') - parser.add_argument( - '--src-iteration', - type = int, - dest = 'src_iteration', - default = 0) - parser.add_argument( - '--njobs', - type = int, dest = 'njobs', - default = 1) - parser.add_argument( - '--kMeta', - type = float, - dest = 'kMeta', - default = 2.0) - parser.add_argument( - '--dtfactor', - type = float, - dest = 'dtfactor', - default = 0.5, - help = 'dt is computed as DTFACTOR / N') - parser.add_argument( - '--particle-rand-seed', - type = int, - dest = 'particle_rand_seed', - default = None) - parser.add_argument( - '--pclouds', - type = int, - dest = 'pclouds', - default = 1, - help = ('number of particle clouds. Particle "clouds" ' - 'consist of particles distributed according to ' - 'pcloud-type.')) - parser.add_argument( - '--pcloud-type', - choices = ['random-cube', - 'regular-cube'], - dest = 'pcloud_type', - default = 'random-cube') - parser.add_argument( - '--particle-cloud-size', - type = float, - dest = 'particle_cloud_size', - default = 2*np.pi) - parser.add_argument( - '--neighbours', - type = int, - dest = 'neighbours', - default = 1) - parser.add_argument( - '--smoothness', - type = int, - dest = 'smoothness', - default = 1) - return None - def prepare_launch( - self, - args = []): - """Set up reasonable parameters. - - With the default Lundgren forcing applied in the band [2, 4], - we can estimate the dissipation, therefore we can estimate - :math:`k_M \\eta_K` and constrain the viscosity. - - In brief, the command line parameter :math:`k_M \\eta_K` is - used in the following formula for :math:`\\nu` (:math:`N` is the - number of real space grid points per coordinate): - - .. math:: - - \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} - - With this choice, the average dissipation :math:`\\varepsilon` - will be close to 0.4, and the integral scale velocity will be - close to 0.77, yielding the approximate value for the Taylor - microscale and corresponding Reynolds number: - - .. math:: - - \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in - R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} - - """ - opt = _code.prepare_launch(self, args = args) - self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) - self.parameters['dt'] = (opt.dtfactor / opt.n) - # custom famplitude for 288 and 576 - if opt.n == 288: - self.parameters['famplitude'] = 0.45 - elif opt.n == 576: - self.parameters['famplitude'] = 0.47 - if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): - self.parameters['niter_out'] = self.parameters['niter_todo'] - if len(opt.src_work_dir) == 0: - opt.src_work_dir = os.path.realpath(opt.work_dir) - self.pars_from_namespace(opt) - return opt - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args = args) - if type(opt.nparticles) != type(None): - if opt.nparticles > 0: - self.name += '-particles' - self.add_particles( - integration_steps = 4, - neighbours = opt.neighbours, - smoothness = opt.smoothness) - self.fill_up_fluid_code() - self.finalize_code() - self.launch_jobs(opt = opt, **kwargs) - return None - def get_checkpoint_0_fname(self): - return os.path.join( - self.work_dir, - self.simname + '_checkpoint_0.h5') - def generate_tracer_state( - self, - rseed = None, - iteration = 0, - species = 0, - write_to_file = False, - ncomponents = 3, - testing = False, - data = None): - if (type(data) == type(None)): - if not type(rseed) == type(None): - np.random.seed(rseed) - #point with problems: 5.37632864e+00, 6.10414710e+00, 6.25256493e+00] - data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents) - data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi - if testing: - #data[0] = np.array([3.26434, 4.24418, 3.12157]) - data[:] = np.array([ 0.72086101, 2.59043666, 6.27501953]) - with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: - data_file['tracers{0}/state/0'.format(species)][:] = data - if write_to_file: - data.tofile( - os.path.join( - self.work_dir, - "tracers{0}_state_i{1:0>5x}".format(species, iteration))) - return data - def launch_jobs( - self, - opt = None, - particle_initial_condition = None): - if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): - # take care of fields' initial condition - if not os.path.exists(self.get_checkpoint_0_fname()): - f = h5py.File(self.get_checkpoint_0_fname(), 'w') - if len(opt.src_simname) > 0: - source_cp = 0 - src_file = 'not_a_file' - while True: - src_file = os.path.join( - os.path.realpath(opt.src_work_dir), - opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) - f0 = h5py.File(src_file, 'r') - if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): - f0.close() - break - source_cp += 1 - f['vorticity/complex/{0}'.format(0)] = h5py.ExternalLink( - src_file, - 'vorticity/complex/{0}'.format(opt.src_iteration)) - else: - data = self.generate_vector_field( - write_to_file = False, - spectra_slope = 2.0, - amplitude = 0.05) - f['vorticity/complex/{0}'.format(0)] = data - f.close() - # take care of particles' initial condition - if opt.pclouds > 1: - np.random.seed(opt.particle_rand_seed) - if opt.pcloud_type == 'random-cube': - particle_initial_condition = ( - np.random.random((opt.pclouds, 1, 3))*2*np.pi + - np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) - elif opt.pcloud_type == 'regular-cube': - onedarray = np.linspace( - -opt.particle_cloud_size/2, - opt.particle_cloud_size/2, - self.parameters['nparticles']) - particle_initial_condition = np.zeros( - (opt.pclouds, - self.parameters['nparticles'], - self.parameters['nparticles'], - self.parameters['nparticles'], 3), - dtype = np.float64) - particle_initial_condition[:] = \ - np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi - particle_initial_condition[..., 0] += onedarray[None, None, None, :] - particle_initial_condition[..., 1] += onedarray[None, None, :, None] - particle_initial_condition[..., 2] += onedarray[None, :, None, None] - self.write_par( - particle_ic = particle_initial_condition) - if self.parameters['nparticles'] > 0: - data = self.generate_tracer_state( - species = 0, - rseed = opt.particle_rand_seed, - data = particle_initial_condition) - for s in range(1, self.particle_species): - self.generate_tracer_state(species = s, data = data) - self.run( - nb_processes = opt.nb_processes, - nb_threads_per_process = opt.nb_threads_per_process, - njobs = opt.njobs, - hours = opt.minutes // 60, - minutes = opt.minutes % 60, - no_submit = opt.no_submit) - return None - -if __name__ == '__main__': - pass - diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py deleted file mode 100644 index 59fb907c4a79f73dec5b6a8cfcb06d99b0b584bb..0000000000000000000000000000000000000000 --- a/bfps/NavierStokes.py +++ /dev/null @@ -1,1213 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -import sys -import os -import numpy as np -import h5py -import argparse - -import bfps -import bfps.tools -from ._code import _code -from ._fluid_base import _fluid_particle_base - -class NavierStokes(_fluid_particle_base): - """Objects of this class can be used to generate production DNS codes. - Any functionality that users require should be available through this class, - in the sense that they can implement whatever they need by simply inheriting - this class. - """ - def __init__( - self, - name = 'NavierStokes-v' + bfps.__version__, - work_dir = './', - simname = 'test', - fluid_precision = 'single', - fftw_plan_rigor = 'FFTW_MEASURE', - frozen_fields = False, - use_fftw_wisdom = True, - QR_stats_on = False, - Lag_acc_stats_on = False): - self.QR_stats_on = QR_stats_on - self.Lag_acc_stats_on = Lag_acc_stats_on - self.frozen_fields = frozen_fields - self.fftw_plan_rigor = fftw_plan_rigor - _fluid_particle_base.__init__( - self, - name = name + '-' + fluid_precision, - work_dir = work_dir, - simname = simname, - dtype = fluid_precision, - use_fftw_wisdom = use_fftw_wisdom) - self.parameters['nu'] = 0.1 - self.parameters['fmode'] = 1 - self.parameters['famplitude'] = 0.5 - self.parameters['fk0'] = 2.0 - self.parameters['fk1'] = 4.0 - self.parameters['forcing_type'] = 'linear' - self.parameters['histogram_bins'] = 256 - self.parameters['max_velocity_estimate'] = 1.0 - self.parameters['max_vorticity_estimate'] = 1.0 - self.parameters['max_Lag_acc_estimate'] = 1.0 - self.parameters['max_pressure_estimate'] = 1.0 - self.parameters['QR2D_histogram_bins'] = 64 - self.parameters['max_trS2_estimate'] = 1.0 - self.parameters['max_Q_estimate'] = 1.0 - self.parameters['max_R_estimate'] = 1.0 - self.file_datasets_grow = """ - //begincpp - hid_t group; - group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT); - H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL); - H5Gclose(group); - //endcpp - """ - self.style = {} - self.statistics = {} - self.fluid_output = 'fs->write(\'v\', \'c\');\n' - return None - def create_stat_output( - self, - dset_name, - data_buffer, - data_type = 'H5T_NATIVE_DOUBLE', - size_setup = None, - close_spaces = True): - new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name) - if not type(size_setup) == type(None): - new_stat_output_txt += ( - size_setup + - 'wspace = H5Dget_space(Cdset);\n' + - 'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' + - 'mspace = H5Screate_simple(ndims, count, NULL);\n' + - 'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n') - new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' + - 'H5Dclose(Cdset);\n').format(data_type, data_buffer) - if close_spaces: - new_stat_output_txt += ('H5Sclose(mspace);\n' + - 'H5Sclose(wspace);\n') - return new_stat_output_txt - def write_fluid_stats(self): - self.fluid_includes += '#include <cmath>\n' - self.fluid_includes += '#include "fftw_tools.hpp"\n' - self.stat_src += """ - //begincpp - hid_t stat_group; - if (myrank == 0) - stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT); - fs->compute_velocity(fs->cvorticity); - std::vector<double> max_estimate_vector; - max_estimate_vector.resize(4); - *tmp_vec_field = fs->cvelocity; - switch(fs->dealias_type) - { - case 0: - tmp_vec_field->compute_stats( - kk_two_thirds, - stat_group, - "velocity", - fs->iteration / niter_stat, - max_velocity_estimate/sqrt(3)); - break; - case 1: - tmp_vec_field->compute_stats( - kk_smooth, - stat_group, - "velocity", - fs->iteration / niter_stat, - max_velocity_estimate/sqrt(3)); - break; - } - //endcpp - """ - if self.Lag_acc_stats_on: - self.stat_src += """ - //begincpp - tmp_vec_field->real_space_representation = false; - fs->compute_Lagrangian_acceleration(tmp_vec_field->get_cdata()); - switch(fs->dealias_type) - { - case 0: - tmp_vec_field->compute_stats( - kk_two_thirds, - stat_group, - "Lagrangian_acceleration", - fs->iteration / niter_stat, - max_Lag_acc_estimate); - break; - case 1: - tmp_vec_field->compute_stats( - kk_smooth, - stat_group, - "Lagrangian_acceleration", - fs->iteration / niter_stat, - max_Lag_acc_estimate); - break; - } - tmp_scal_field->real_space_representation = false; - fs->compute_velocity(fs->cvorticity); - fs->ift_velocity(); - fs->compute_pressure(tmp_scal_field->get_cdata()); - switch(fs->dealias_type) - { - case 0: - tmp_scal_field->compute_stats( - kk_two_thirds, - stat_group, - "pressure", - fs->iteration / niter_stat, - max_pressure_estimate); - break; - case 1: - tmp_scal_field->compute_stats( - kk_smooth, - stat_group, - "pressure", - fs->iteration / niter_stat, - max_pressure_estimate); - break; - } - //endcpp - """ - self.stat_src += """ - //begincpp - *tmp_vec_field = fs->cvorticity; - switch(fs->dealias_type) - { - case 0: - tmp_vec_field->compute_stats( - kk_two_thirds, - stat_group, - "vorticity", - fs->iteration / niter_stat, - max_vorticity_estimate/sqrt(3)); - break; - case 1: - tmp_vec_field->compute_stats( - kk_smooth, - stat_group, - "vorticity", - fs->iteration / niter_stat, - max_vorticity_estimate/sqrt(3)); - break; - } - //endcpp - """ - if self.QR_stats_on: - self.stat_src += """ - //begincpp - double *trS2_Q_R_moments = new double[10*3]; - double *gradu_moments = new double[10*9]; - ptrdiff_t *hist_trS2_Q_R = new ptrdiff_t[histogram_bins*3]; - ptrdiff_t *hist_gradu = new ptrdiff_t[histogram_bins*9]; - ptrdiff_t *hist_QR2D = new ptrdiff_t[QR2D_histogram_bins*QR2D_histogram_bins]; - double trS2QR_max_estimates[3]; - double gradu_max_estimates[9]; - trS2QR_max_estimates[0] = max_trS2_estimate; - trS2QR_max_estimates[1] = max_Q_estimate; - trS2QR_max_estimates[2] = max_R_estimate; - std::fill_n(gradu_max_estimates, 9, sqrt(3*max_trS2_estimate)); - fs->compute_gradient_statistics( - fs->cvelocity, - gradu_moments, - trS2_Q_R_moments, - hist_gradu, - hist_trS2_Q_R, - hist_QR2D, - trS2QR_max_estimates, - gradu_max_estimates, - histogram_bins, - QR2D_histogram_bins); - //endcpp - """ - self.stat_src += """ - //begincpp - if (myrank == 0) - H5Gclose(stat_group); - if (fs->cd->myrank == 0) - {{ - hid_t Cdset, wspace, mspace; - int ndims; - hsize_t count[4], offset[4], dims[4]; - offset[0] = fs->iteration/niter_stat; - offset[1] = 0; - offset[2] = 0; - offset[3] = 0; - //endcpp - """.format(self.C_dtype) - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - if self.QR_stats_on: - self.stat_src += self.create_stat_output( - '/statistics/moments/trS2_Q_R', - 'trS2_Q_R_moments', - size_setup =""" - count[0] = 1; - count[1] = 10; - count[2] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/moments/velocity_gradient', - 'gradu_moments', - size_setup =""" - count[0] = 1; - count[1] = 10; - count[2] = 3; - count[3] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/histograms/trS2_Q_R', - 'hist_trS2_Q_R', - data_type = 'H5T_NATIVE_INT64', - size_setup = """ - count[0] = 1; - count[1] = histogram_bins; - count[2] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/histograms/velocity_gradient', - 'hist_gradu', - data_type = 'H5T_NATIVE_INT64', - size_setup = """ - count[0] = 1; - count[1] = histogram_bins; - count[2] = 3; - count[3] = 3; - """) - self.stat_src += self.create_stat_output( - '/statistics/histograms/QR2D', - 'hist_QR2D', - data_type = 'H5T_NATIVE_INT64', - size_setup = """ - count[0] = 1; - count[1] = QR2D_histogram_bins; - count[2] = QR2D_histogram_bins; - """) - self.stat_src += '}\n' - if self.QR_stats_on: - self.stat_src += """ - //begincpp - delete[] trS2_Q_R_moments; - delete[] gradu_moments; - delete[] hist_trS2_Q_R; - delete[] hist_gradu; - delete[] hist_QR2D; - //endcpp - """ - return None - def fill_up_fluid_code(self): - self.fluid_includes += '#include <cstring>\n' - self.fluid_variables += ( - 'fluid_solver<{0}> *fs;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) + - 'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype) + - 'kspace<FFTW, SMOOTH> *kk_smooth;\n' + - 'kspace<FFTW, TWO_THIRDS> *kk_two_thirds;\n') - self.fluid_definitions += """ - typedef struct {{ - {0} re; - {0} im; - }} tmp_complex_type; - """.format(self.C_dtype) - self.write_fluid_stats() - if self.dtype == np.float32: - field_H5T = 'H5T_NATIVE_FLOAT' - elif self.dtype == np.float64: - field_H5T = 'H5T_NATIVE_DOUBLE' - self.fluid_start += """ - //begincpp - char fname[512]; - fs = new fluid_solver<{0}>( - simname, - nx, ny, nz, - dkx, dky, dkz, - dealias_type, - {1}); - tmp_vec_field = new field<{0}, FFTW, THREE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - tmp_scal_field = new field<{0}, FFTW, ONE>( - nx, ny, nz, - MPI_COMM_WORLD, - {1}); - kk_smooth = new kspace<FFTW, SMOOTH>( - tmp_vec_field->clayout, - fs->dkx, fs->dky, fs->dkz); - kk_two_thirds = new kspace<FFTW, TWO_THIRDS>( - tmp_vec_field->clayout, - fs->dkx, fs->dky, fs->dkz); - fs->nu = nu; - fs->fmode = fmode; - fs->famplitude = famplitude; - fs->fk0 = fk0; - fs->fk1 = fk1; - strncpy(fs->forcing_type, forcing_type, 128); - fs->iteration = iteration; - fs->read('v', 'c'); - //endcpp - """.format(self.C_dtype, self.fftw_plan_rigor, field_H5T) - self.fluid_start += self.store_kspace - if not self.frozen_fields: - self.fluid_loop = 'fs->step(dt);\n' - else: - self.fluid_loop = '' - self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' + - self.fluid_output + '\n}\n') - self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' + - self.fluid_output + '\n}\n' + - 'delete fs;\n' + - 'delete tmp_vec_field;\n' + - 'delete tmp_scal_field;\n' + - 'delete kk_smooth;\n' + - 'delete kk_two_thirds;\n') - return None - def add_3D_rFFTW_field( - self, - name = 'rFFTW_acc'): - if self.dtype == np.float32: - FFTW = 'fftwf' - elif self.dtype == np.float64: - FFTW = 'fftw' - self.fluid_variables += '{0} *{1};\n'.format(self.C_dtype, name) - self.fluid_start += '{0} = {1}_alloc_real(2*fs->cd->local_size);\n'.format(name, FFTW) - self.fluid_end += '{0}_free({1});\n'.format(FFTW, name) - return None - def add_interpolator( - self, - interp_type = 'spline', - neighbours = 1, - smoothness = 1, - name = 'field_interpolator', - field_name = 'fs->rvelocity', - class_name = 'rFFTW_interpolator'): - self.fluid_includes += '#include "{0}.hpp"\n'.format(class_name) - self.fluid_variables += '{0} <{1}, {2}> *{3};\n'.format( - class_name, self.C_dtype, neighbours, name) - self.parameters[name + '_type'] = interp_type - self.parameters[name + '_neighbours'] = neighbours - if interp_type == 'spline': - self.parameters[name + '_smoothness'] = smoothness - beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness) - elif interp_type == 'Lagrange': - beta_name = 'beta_Lagrange_n{0}'.format(neighbours) - self.fluid_start += '{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n'.format( - name, - class_name, - self.C_dtype, - neighbours, - beta_name, - field_name) - self.fluid_end += 'delete {0};\n'.format(name) - return None - def add_particles( - self, - integration_steps = 2, - kcut = None, - interpolator = 'field_interpolator', - frozen_particles = False, - acc_name = None, - class_name = 'particles'): - """Adds code for tracking a series of particle species, each - consisting of `nparticles` particles. - - :type integration_steps: int, list of int - :type kcut: None (default), str, list of str - :type interpolator: str, list of str - :type frozen_particles: bool - :type acc_name: str - - .. warning:: if not None, kcut must be a list of decreasing - wavenumbers, since filtering is done sequentially - on the same complex FFTW field. - """ - if self.dtype == np.float32: - FFTW = 'fftwf' - elif self.dtype == np.float64: - FFTW = 'fftw' - s0 = self.particle_species - if type(integration_steps) == int: - integration_steps = [integration_steps] - if type(kcut) == str: - kcut = [kcut] - if type(interpolator) == str: - interpolator = [interpolator] - nspecies = max(len(integration_steps), len(interpolator)) - if type(kcut) == list: - nspecies = max(nspecies, len(kcut)) - if len(integration_steps) == 1: - integration_steps = [integration_steps[0] for s in range(nspecies)] - if len(interpolator) == 1: - interpolator = [interpolator[0] for s in range(nspecies)] - if type(kcut) == list: - if len(kcut) == 1: - kcut = [kcut[0] for s in range(nspecies)] - assert(len(integration_steps) == nspecies) - assert(len(interpolator) == nspecies) - if type(kcut) == list: - assert(len(kcut) == nspecies) - for s in range(nspecies): - neighbours = self.parameters[interpolator[s] + '_neighbours'] - if type(kcut) == list: - self.parameters['tracers{0}_kcut'.format(s0 + s)] = kcut[s] - self.parameters['tracers{0}_interpolator'.format(s0 + s)] = interpolator[s] - self.parameters['tracers{0}_acc_on'.format(s0 + s)] = int(not type(acc_name) == type(None)) - self.parameters['tracers{0}_integration_steps'.format(s0 + s)] = integration_steps[s] - self.file_datasets_grow += """ - //begincpp - group = H5Gopen(particle_file, "/tracers{0}", H5P_DEFAULT); - grow_particle_datasets(group, "", NULL, NULL); - H5Gclose(group); - //endcpp - """.format(s0 + s) - - #### code that outputs statistics - output_vel_acc = '{\n' - # array for putting sampled velocity in - # must compute velocity, just in case it was messed up by some - # other particle species before the stats - output_vel_acc += 'fs->compute_velocity(fs->cvorticity);\n' - if not type(kcut) == list: - output_vel_acc += 'fs->ift_velocity();\n' - if not type(acc_name) == type(None): - # array for putting sampled acceleration in - # must compute acceleration - output_vel_acc += 'fs->compute_Lagrangian_acceleration({0});\n'.format(acc_name) - for s in range(nspecies): - if type(kcut) == list: - output_vel_acc += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s]) - output_vel_acc += 'fs->ift_velocity();\n' - output_vel_acc += """ - {0}->read_rFFTW(fs->rvelocity); - ps{1}->sample({0}, "velocity"); - """.format(interpolator[s], s0 + s) - if not type(acc_name) == type(None): - output_vel_acc += """ - {0}->read_rFFTW({1}); - ps{2}->sample({0}, "acceleration"); - """.format(interpolator[s], acc_name, s0 + s) - output_vel_acc += '}\n' - - #### initialize, stepping and finalize code - if not type(kcut) == list: - update_fields = ('fs->compute_velocity(fs->cvorticity);\n' + - 'fs->ift_velocity();\n') - self.particle_start += update_fields - self.particle_loop += update_fields - else: - self.particle_loop += 'fs->compute_velocity(fs->cvorticity);\n' - self.particle_includes += '#include "{0}.hpp"\n'.format(class_name) - self.particle_stat_src += ( - 'if (ps0->iteration % niter_part == 0)\n' + - '{\n') - for s in range(nspecies): - neighbours = self.parameters[interpolator[s] + '_neighbours'] - self.particle_start += 'sprintf(fname, "tracers{0}");\n'.format(s0 + s) - self.particle_end += ('ps{0}->write();\n' + - 'delete ps{0};\n').format(s0 + s) - self.particle_variables += '{0}<VELOCITY_TRACER, {1}, {2}> *ps{3};\n'.format( - class_name, - self.C_dtype, - neighbours, - s0 + s) - self.particle_start += ('ps{0} = new {1}<VELOCITY_TRACER, {2}, {3}>(\n' + - 'fname, particle_file, {4},\n' + - 'niter_part, tracers{0}_integration_steps);\n').format( - s0 + s, - class_name, - self.C_dtype, - neighbours, - interpolator[s]) - self.particle_start += ('ps{0}->dt = dt;\n' + - 'ps{0}->iteration = iteration;\n' + - 'ps{0}->read();\n').format(s0 + s) - if not frozen_particles: - if type(kcut) == list: - update_field = ('fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s]) + - 'fs->ift_velocity();\n') - self.particle_loop += update_field - self.particle_loop += '{0}->read_rFFTW(fs->rvelocity);\n'.format(interpolator[s]) - self.particle_loop += 'ps{0}->step();\n'.format(s0 + s) - self.particle_stat_src += 'ps{0}->write(false);\n'.format(s0 + s) - self.particle_stat_src += output_vel_acc - self.particle_stat_src += '}\n' - self.particle_species += nspecies - return None - def get_cache_file_name(self): - return os.path.join(self.work_dir, self.simname + '_cache.h5') - def get_cache_file(self): - return h5py.File(self.get_postprocess_file_name(), 'r') - def get_postprocess_file_name(self): - return self.get_cache_file_name() - def get_postprocess_file(self): - return h5py.File(self.get_postprocess_file_name(), 'r') - def compute_statistics(self, iter0 = 0, iter1 = None): - """Run basic postprocessing on raw data. - The energy spectrum :math:`E(t, k)` and the enstrophy spectrum - :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the - - .. math:: - - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm - \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^* - - tensors, and the enstrophy spectrum is also used to - compute the dissipation :math:`\\varepsilon(t)`. - These basic quantities are stored in a newly created HDF5 file, - ``simname_cache.h5``. - """ - if len(list(self.statistics.keys())) > 0: - return None - self.read_parameters() - with self.get_data_file() as data_file: - if 'moments' not in data_file['statistics'].keys(): - return None - iter0 = min((data_file['statistics/moments/velocity'].shape[0] * - self.parameters['niter_stat']-1), - iter0) - if type(iter1) == type(None): - iter1 = data_file['iteration'].value - else: - iter1 = min(data_file['iteration'].value, iter1) - ii0 = iter0 // self.parameters['niter_stat'] - ii1 = iter1 // self.parameters['niter_stat'] - self.statistics['kshell'] = data_file['kspace/kshell'].value - self.statistics['kM'] = data_file['kspace/kM'].value - self.statistics['dk'] = data_file['kspace/dk'].value - computation_needed = True - pp_file = h5py.File(self.get_postprocess_file_name(), 'a') - if 'ii0' in pp_file.keys(): - computation_needed = not (ii0 == pp_file['ii0'].value and - ii1 == pp_file['ii1'].value) - if computation_needed: - for k in ['t', 'vel_max(t)', 'renergy(t)', - 'energy(t, k)', 'enstrophy(t, k)', - 'ii0', 'ii1', 'iter0', 'iter1']: - del pp_file[k] - if computation_needed: - pp_file['iter0'] = iter0 - pp_file['iter1'] = iter1 - pp_file['ii0'] = ii0 - pp_file['ii1'] = ii1 - pp_file['t'] = (self.parameters['dt']* - self.parameters['niter_stat']* - (np.arange(ii0, ii1+1).astype(np.float))) - pp_file['energy(t, k)'] = ( - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['enstrophy(t, k)'] = ( - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] + - data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2 - pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'] [ii0:ii1+1, 9, 3] - pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2 - if 'trS2_Q_R' in data_file['statistics/moments'].keys(): - pp_file['mean_trS2(t)'] = data_file['statistics/moments/trS2_Q_R'][:, 1, 0] - for k in ['t', - 'energy(t, k)', - 'enstrophy(t, k)', - 'vel_max(t)', - 'renergy(t)', - 'mean_trS2(t)']: - if k in pp_file.keys(): - self.statistics[k] = pp_file[k].value - self.compute_time_averages() - return None - def compute_time_averages(self): - """Compute easy stats. - - Further computation of statistics based on the contents of - ``simname_cache.h5``. - Standard quantities are as follows - (consistent with [Ishihara]_): - - .. math:: - - U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm - L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm - T_{\\textrm{int}}(t) = - \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)} - - \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm - \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm - \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}} - - Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip - .5cm - R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu} - - .. [Ishihara] T. Ishihara et al, - *Small-scale statistics in high-resolution direct numerical - simulation of turbulence: Reynolds number dependence of - one-point velocity gradient statistics*. - J. Fluid Mech., - **592**, 335-366, 2007 - """ - for key in ['energy', 'enstrophy']: - self.statistics[key + '(t)'] = (self.statistics['dk'] * - np.sum(self.statistics[key + '(t, k)'], axis = 1)) - self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3) - self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / - (2*self.statistics['Uint(t)']**2)) * - np.nansum(self.statistics['energy(t, k)'] / - self.statistics['kshell'][None, :], axis = 1)) - for key in ['energy', - 'enstrophy', - 'vel_max', - 'mean_trS2', - 'Uint', - 'Lint']: - if key + '(t)' in self.statistics.keys(): - self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0) - for suffix in ['', '(t)']: - self.statistics['diss' + suffix] = (self.parameters['nu'] * - self.statistics['enstrophy' + suffix]*2) - self.statistics['etaK' + suffix] = (self.parameters['nu']**3 / - self.statistics['diss' + suffix])**.25 - self.statistics['tauK' + suffix] = (self.parameters['nu'] / - self.statistics['diss' + suffix])**.5 - self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['Lint' + suffix] / - self.parameters['nu']) - self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] * - self.statistics['Uint' + suffix]**2 / - self.statistics['diss' + suffix])**.5 - self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] * - self.statistics['lambda' + suffix] / - self.parameters['nu']) - self.statistics['kMeta' + suffix] = (self.statistics['kM'] * - self.statistics['etaK' + suffix]) - if self.parameters['dealias_type'] == 1: - self.statistics['kMeta' + suffix] *= 0.8 - self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint'] - self.statistics['Taylor_microscale'] = self.statistics['lambda'] - return None - def set_plt_style( - self, - style = {'dashes' : (None, None)}): - self.style.update(style) - return None - def read_cfield( - self, - field_name = 'vorticity', - iteration = 0): - """read the Fourier representation of a vector field. - - Read the binary file containing iteration ``iteration`` of the - field ``field_name``, and return it as a properly shaped - ``numpy.memmap`` object. - """ - return np.memmap( - os.path.join(self.work_dir, - self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)), - dtype = self.ctype, - mode = 'r', - shape = (self.parameters['ny'], - self.parameters['nz'], - self.parameters['nx']//2+1, - 3)) - def write_par( - self, - iter0 = 0, - particle_ic = None): - _fluid_particle_base.write_par(self, iter0 = iter0) - with h5py.File(self.get_data_file_name(), 'r+') as ofile: - kspace = self.get_kspace() - nshells = kspace['nshell'].shape[0] - vec_stat_datasets = ['velocity', 'vorticity'] - scal_stat_datasets = [] - for k in vec_stat_datasets: - time_chunk = 2**20 // ( - self.dtype.itemsize*3* - self.parameters['nx']*self.parameters['ny']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/0slices/' + k + '/real', - (1, self.parameters['ny'], self.parameters['nx'], 3), - chunks = (time_chunk, self.parameters['ny'], self.parameters['nx'], 3), - maxshape = (None, self.parameters['ny'], self.parameters['nx'], 3), - dtype = self.dtype) - if self.Lag_acc_stats_on: - vec_stat_datasets += ['Lagrangian_acceleration'] - scal_stat_datasets += ['pressure'] - for k in vec_stat_datasets: - time_chunk = 2**20//(8*3*3*nshells) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/spectra/' + k + '_' + k, - (1, nshells, 3, 3), - chunks = (time_chunk, nshells, 3, 3), - maxshape = (None, nshells, 3, 3), - dtype = np.float64) - time_chunk = 2**20//(8*4*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/' + k, - (1, 10, 4), - chunks = (time_chunk, 10, 4), - maxshape = (None, 10, 4), - dtype = np.float64) - time_chunk = 2**20//(8*4*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/' + k, - (1, - self.parameters['histogram_bins'], - 4), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 4), - maxshape = (None, - self.parameters['histogram_bins'], - 4), - dtype = np.int64) - for k in scal_stat_datasets: - time_chunk = 2**20//(8*nshells) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/spectra/' + k + '_' + k, - (1, nshells), - chunks = (time_chunk, nshells), - maxshape = (None, nshells), - dtype = np.float64) - time_chunk = 2**20//(8*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/' + k, - (1, 10), - chunks = (time_chunk, 10), - maxshape = (None, 10), - dtype = np.float64) - time_chunk = 2**20//(8*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/' + k, - (1, - self.parameters['histogram_bins']), - chunks = (time_chunk, - self.parameters['histogram_bins']), - maxshape = (None, - self.parameters['histogram_bins']), - dtype = np.int64) - if self.QR_stats_on: - time_chunk = 2**20//(8*3*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/trS2_Q_R', - (1, - self.parameters['histogram_bins'], - 3), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 3), - maxshape = (None, - self.parameters['histogram_bins'], - 3), - dtype = np.int64) - time_chunk = 2**20//(8*9*self.parameters['histogram_bins']) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/velocity_gradient', - (1, - self.parameters['histogram_bins'], - 3, - 3), - chunks = (time_chunk, - self.parameters['histogram_bins'], - 3, - 3), - maxshape = (None, - self.parameters['histogram_bins'], - 3, - 3), - dtype = np.int64) - time_chunk = 2**20//(8*3*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/trS2_Q_R', - (1, 10, 3), - chunks = (time_chunk, 10, 3), - maxshape = (None, 10, 3), - dtype = np.float64) - time_chunk = 2**20//(8*9*10) - time_chunk = max(time_chunk, 1) - a = ofile.create_dataset('statistics/moments/velocity_gradient', - (1, 10, 3, 3), - chunks = (time_chunk, 10, 3, 3), - maxshape = (None, 10, 3, 3), - dtype = np.float64) - time_chunk = 2**20//(8*self.parameters['QR2D_histogram_bins']**2) - time_chunk = max(time_chunk, 1) - ofile.create_dataset('statistics/histograms/QR2D', - (1, - self.parameters['QR2D_histogram_bins'], - self.parameters['QR2D_histogram_bins']), - chunks = (time_chunk, - self.parameters['QR2D_histogram_bins'], - self.parameters['QR2D_histogram_bins']), - maxshape = (None, - self.parameters['QR2D_histogram_bins'], - self.parameters['QR2D_histogram_bins']), - dtype = np.int64) - if self.particle_species == 0: - return None - - if type(particle_ic) == type(None): - pbase_shape = (self.parameters['nparticles'],) - number_of_particles = self.parameters['nparticles'] - else: - pbase_shape = particle_ic.shape[:-1] - assert(particle_ic.shape[-1] == 3) - if len(pbase_shape) == 1: - number_of_particles = pbase_shape[0] - else: - number_of_particles = 1 - for val in pbase_shape[1:]: - number_of_particles *= val - - with h5py.File(self.get_particle_file_name(), 'a') as ofile: - for s in range(self.particle_species): - ofile.create_group('tracers{0}'.format(s)) - time_chunk = 2**20 // (8*3*number_of_particles) - time_chunk = max(time_chunk, 1) - dims = ((1, - self.parameters['tracers{0}_integration_steps'.format(s)]) + - pbase_shape + (3,)) - maxshape = (h5py.h5s.UNLIMITED,) + dims[1:] - if len(pbase_shape) > 1: - chunks = (time_chunk, 1, 1) + dims[3:] - else: - chunks = (time_chunk, 1) + dims[2:] - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/rhs'.format(s), - dims, maxshape, chunks) - if len(pbase_shape) > 1: - chunks = (time_chunk, 1) + pbase_shape[1:] + (3,) - else: - chunks = (time_chunk, pbase_shape[0], 3) - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/state'.format(s), - (1,) + pbase_shape + (3,), - (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks) - # "velocity" is sampled, single precision is enough - # for the results we are interested in. - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/velocity'.format(s), - (1,) + pbase_shape + (3,), - (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks, - dset_dtype = h5py.h5t.IEEE_F32LE) - if self.parameters['tracers{0}_acc_on'.format(s)]: - bfps.tools.create_alloc_early_dataset( - ofile, - '/tracers{0}/acceleration'.format(s), - (1,) + pbase_shape + (3,), - (h5py.h5s.UNLIMITED,) + pbase_shape + (3,), - chunks, - dset_dtype = h5py.h5t.IEEE_F32LE) - return None - def add_particle_fields( - self, - interp_type = 'spline', - kcut = None, - neighbours = 1, - smoothness = 1, - name = 'particle_field', - field_class = 'rFFTW_interpolator', - acc_field_name = 'rFFTW_acc'): - self.fluid_includes += '#include "{0}.hpp"\n'.format(field_class) - self.fluid_variables += field_class + '<{0}, {1}> *vel_{2}, *acc_{2};\n'.format( - self.C_dtype, neighbours, name) - self.parameters[name + '_type'] = interp_type - self.parameters[name + '_neighbours'] = neighbours - if interp_type == 'spline': - self.parameters[name + '_smoothness'] = smoothness - beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness) - elif interp_type == 'Lagrange': - beta_name = 'beta_Lagrange_n{0}'.format(neighbours) - if field_class == 'rFFTW_interpolator': - self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4}, fs->rvelocity);\n' + - 'acc_{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n').format(name, - field_class, - self.C_dtype, - neighbours, - beta_name, - acc_field_name) - elif field_class == 'interpolator': - self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4});\n' + - 'acc_{0} = new {1}<{2}, {3}>(fs, {4});\n').format(name, - field_class, - self.C_dtype, - neighbours, - beta_name, - acc_field_name) - self.fluid_end += ('delete vel_{0};\n' + - 'delete acc_{0};\n').format(name) - update_fields = 'fs->compute_velocity(fs->cvorticity);\n' - if not type(kcut) == type(None): - update_fields += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut) - update_fields += ('fs->ift_velocity();\n' + - 'fs->compute_Lagrangian_acceleration(acc_{0}->field);\n').format(name) - self.fluid_start += update_fields - self.fluid_loop += update_fields - return None - def specific_parser_arguments( - self, - parser): - _fluid_particle_base.specific_parser_arguments(self, parser) - parser.add_argument( - '--src-wd', - type = str, - dest = 'src_work_dir', - default = '') - parser.add_argument( - '--src-simname', - type = str, - dest = 'src_simname', - default = '') - parser.add_argument( - '--src-iteration', - type = int, - dest = 'src_iteration', - default = 0) - parser.add_argument( - '--njobs', - type = int, dest = 'njobs', - default = 1) - parser.add_argument( - '--QR-stats', - action = 'store_true', - dest = 'QR_stats', - help = 'add this option if you want to compute velocity gradient and QR stats') - parser.add_argument( - '--Lag-acc-stats', - action = 'store_true', - dest = 'Lag_acc_stats', - help = 'add this option if you want to compute Lagrangian acceleration statistics') - parser.add_argument( - '--kMeta', - type = float, - dest = 'kMeta', - default = 2.0) - parser.add_argument( - '--dtfactor', - type = float, - dest = 'dtfactor', - default = 0.5, - help = 'dt is computed as DTFACTOR / N') - parser.add_argument( - '--particle-rand-seed', - type = int, - dest = 'particle_rand_seed', - default = None) - parser.add_argument( - '--pclouds', - type = int, - dest = 'pclouds', - default = 1, - help = ('number of particle clouds. Particle "clouds" ' - 'consist of particles distributed according to ' - 'pcloud-type.')) - parser.add_argument( - '--pcloud-type', - choices = ['random-cube', - 'regular-cube'], - dest = 'pcloud_type', - default = 'random-cube') - parser.add_argument( - '--particle-cloud-size', - type = float, - dest = 'particle_cloud_size', - default = 2*np.pi) - parser.add_argument( - '--neighbours', - type = int, - dest = 'neighbours', - default = 1) - parser.add_argument( - '--smoothness', - type = int, - dest = 'smoothness', - default = 1) - return None - def prepare_launch( - self, - args = []): - """Set up reasonable parameters. - - With the default Lundgren forcing applied in the band [2, 4], - we can estimate the dissipation, therefore we can estimate - :math:`k_M \\eta_K` and constrain the viscosity. - Also, if velocity gradient statistics are computed, the - dissipation is used for estimating the bins of the QR histogram. - - In brief, the command line parameter :math:`k_M \\eta_K` is - used in the following formula for :math:`\\nu` (:math:`N` is the - number of real space grid points per coordinate): - - .. math:: - - \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3} - - With this choice, the average dissipation :math:`\\varepsilon` - will be close to 0.4, and the integral scale velocity will be - close to 0.77, yielding the approximate value for the Taylor - microscale and corresponding Reynolds number: - - .. math:: - - \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in - R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6} - - """ - opt = _code.prepare_launch(self, args = args) - self.QR_stats_on = opt.QR_stats - self.Lag_acc_stats_on = opt.Lag_acc_stats - self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3) - self.parameters['dt'] = (opt.dtfactor / opt.n) - # custom famplitude for 288 and 576 - if opt.n == 288: - self.parameters['famplitude'] = 0.45 - elif opt.n == 576: - self.parameters['famplitude'] = 0.47 - if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0): - self.parameters['niter_out'] = self.parameters['niter_todo'] - if self.QR_stats_on: - # max_Q_estimate and max_R_estimate are just used for the 2D pdf - # therefore I just want them to be small multiples of mean trS2 - # I'm already estimating the dissipation with kMeta... - meantrS2 = (opt.n//2 / opt.kMeta)**4 * self.parameters['nu']**2 - self.parameters['max_Q_estimate'] = meantrS2 - self.parameters['max_R_estimate'] = .4*meantrS2**1.5 - # add QR suffix to code name, since we now expect additional - # datasets in the .h5 file - self.name += '-QR' - if self.Lag_acc_stats_on: - self.name += '-Lag_acc' - if len(opt.src_work_dir) == 0: - opt.src_work_dir = os.path.realpath(opt.work_dir) - self.pars_from_namespace(opt) - return opt - def launch( - self, - args = [], - noparticles = False, - **kwargs): - opt = self.prepare_launch(args = args) - self.fill_up_fluid_code() - if noparticles: - opt.nparticles = 0 - elif type(opt.nparticles) == int: - if opt.nparticles > 0: - self.name += '-particles' - self.add_3D_rFFTW_field( - name = 'rFFTW_acc') - self.add_interpolator( - name = 'cubic_spline', - neighbours = opt.neighbours, - smoothness = opt.smoothness, - class_name = 'rFFTW_interpolator') - self.add_particles( - integration_steps = [4], - interpolator = 'cubic_spline', - acc_name = 'rFFTW_acc', - class_name = 'rFFTW_distributed_particles') - self.variables += 'hid_t particle_file;\n' - self.main_start += """ - if (myrank == 0) - { - // set caching parameters - hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); - herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); - DEBUG_MSG("when setting cache for particles I got %d\\n", cache_err); - sprintf(fname, "%s_particles.h5", simname); - particle_file = H5Fopen(fname, H5F_ACC_RDWR, fapl); - } - """ - self.main_end = ('if (myrank == 0)\n' + - '{\n' + - 'H5Fclose(particle_file);\n' + - '}\n') + self.main_end - self.finalize_code() - self.launch_jobs(opt = opt, **kwargs) - return None - def launch_jobs( - self, - opt = None, - particle_initial_condition = None): - if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): - if opt.pclouds > 1: - np.random.seed(opt.particle_rand_seed) - if opt.pcloud_type == 'random-cube': - particle_initial_condition = ( - np.random.random((opt.pclouds, 1, 3))*2*np.pi + - np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) - elif opt.pcloud_type == 'regular-cube': - onedarray = np.linspace( - -opt.particle_cloud_size/2, - opt.particle_cloud_size/2, - self.parameters['nparticles']) - particle_initial_condition = np.zeros( - (opt.pclouds, - self.parameters['nparticles'], - self.parameters['nparticles'], - self.parameters['nparticles'], 3), - dtype = np.float64) - particle_initial_condition[:] = \ - np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi - particle_initial_condition[..., 0] += onedarray[None, None, None, :] - particle_initial_condition[..., 1] += onedarray[None, None, :, None] - particle_initial_condition[..., 2] += onedarray[None, :, None, None] - self.write_par( - particle_ic = particle_initial_condition) - if self.parameters['nparticles'] > 0: - data = self.generate_tracer_state( - species = 0, - rseed = opt.particle_rand_seed, - data = particle_initial_condition) - for s in range(1, self.particle_species): - self.generate_tracer_state(species = s, data = data) - init_condition_file = os.path.join( - self.work_dir, - self.simname + '_cvorticity_i{0:0>5x}'.format(0)) - if not os.path.exists(init_condition_file): - if len(opt.src_simname) > 0: - src_file = os.path.join( - os.path.realpath(opt.src_work_dir), - opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration)) - os.symlink(src_file, init_condition_file) - else: - self.generate_vector_field( - write_to_file = True, - spectra_slope = 2.0, - amplitude = 0.05) - self.run( - nb_processes = opt.nb_processes, - nb_threads_per_process = opt.nb_threads_per_process, - njobs = opt.njobs, - hours = opt.minutes // 60, - minutes = opt.minutes % 60, - no_submit = opt.no_submit) - return None - diff --git a/bfps/PP.py b/bfps/PP.py index 6e02f2aefd5db2e9790f3a16cbc2bfa3c85ab37b..914b90ef9383d986a27d22bab11d2821983631f6 100644 --- a/bfps/PP.py +++ b/bfps/PP.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -33,6 +32,7 @@ import h5py import math import numpy as np import warnings +import glob import bfps from ._code import _code @@ -50,12 +50,6 @@ class PP(_code): self, work_dir = work_dir, simname = simname) - self.host_info = {'type' : 'cluster', - 'environment' : None, - 'deltanprocs' : 1, - 'queue' : '', - 'mail_address': '', - 'mail_events' : None} self.generate_default_parameters() return None def set_precision( @@ -118,6 +112,7 @@ class PP(_code): return None def generate_default_parameters(self): # these parameters are relevant for all PP classes + self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE' self.parameters['dealias_type'] = int(1) self.parameters['dkx'] = float(1.0) self.parameters['dky'] = float(1.0) @@ -139,6 +134,11 @@ class PP(_code): pars['max_acceleration_estimate'] = float(10) pars['max_velocity_estimate'] = float(1) pars['histogram_bins'] = int(129) + elif dns_type == 'resize': + pars['new_nx'] = int(32) + pars['new_ny'] = int(32) + pars['new_nz'] = int(32) + pars['new_simname'] = 'test_resized' return pars def get_data_file_name(self): return os.path.join(self.work_dir, self.simname + '.h5') @@ -429,6 +429,12 @@ class PP(_code): self.simulation_parser_arguments(parser_native_binary_to_hdf5) self.job_parser_arguments(parser_native_binary_to_hdf5) self.parameters_to_parser_arguments(parser_native_binary_to_hdf5) + parser_field_single_to_double = subparsers.add_parser( + 'field_single_to_double', + help = 'convert complex vorticity from single to double') + self.simulation_parser_arguments(parser_field_single_to_double) + self.job_parser_arguments(parser_field_single_to_double) + self.parameters_to_parser_arguments(parser_field_single_to_double) parser_get_rfields = subparsers.add_parser( 'get_rfields', help = 'get real space velocity field') @@ -444,6 +450,15 @@ class PP(_code): self.parameters_to_parser_arguments( parser_joint_acc_vel_stats, parameters = self.extra_postprocessing_parameters('joint_acc_vel_stats')) + parser_resize = subparsers.add_parser( + 'resize', + help = 'get joint acceleration and velocity statistics') + self.simulation_parser_arguments(parser_resize) + self.job_parser_arguments(parser_resize) + self.parameters_to_parser_arguments(parser_resize) + self.parameters_to_parser_arguments( + parser_resize, + parameters = self.extra_postprocessing_parameters('resize')) return None def prepare_launch( self, @@ -626,7 +641,6 @@ class PP(_code): for kz in range(src_file[src_dset_name].shape[0]): dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz] else: - print('aloha') min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]), min(dst_shape[1], src_file[src_dset_name].shape[1]), min(dst_shape[2], src_file[src_dset_name].shape[2]), @@ -674,10 +688,11 @@ class PP(_code): group = self.dns_type + '/parameters', parameters = self.pp_parameters, file_name = os.path.join(self.work_dir, self.simname + '_post.h5')) - histogram_bins = opt.histogram_bins - if (type(histogram_bins) == type(None) and - 'histogram_bins' in self.pp_parameters.keys()): - histogram_bins = self.pp_parameters['histogram_bins'] + if 'histogram_bins' in opt.__dict__.keys(): + histogram_bins = opt.histogram_bins + if (type(histogram_bins) == type(None) and + 'histogram_bins' in self.pp_parameters.keys()): + histogram_bins = self.pp_parameters['histogram_bins'] with h5py.File(os.path.join(self.work_dir, self.simname + '_post.h5'), 'r+') as ofile: group = ofile[self.dns_type] group.require_group('histograms') @@ -772,7 +787,7 @@ class PP(_code): dtype = np.float64) df.close() return None - def prepare_field_file(self): + def prepare_field_file(self, iter0 = 0): df = self.get_data_file() if 'field_dtype' in df.keys(): # we don't need to do anything, raw binary files are used @@ -783,28 +798,22 @@ class PP(_code): with h5py.File(os.path.join(self.work_dir, self.simname + '_fields.h5'), 'a') as ff: ff.require_group('vorticity') ff.require_group('vorticity/complex') - checkpoint = 0 - while True: - cpf_name = os.path.join( - self.work_dir, - self.simname + '_checkpoint_{0}.h5'.format(checkpoint)) - if os.path.exists(cpf_name): - cpf = h5py.File(cpf_name, 'r') - for iter_name in cpf['vorticity/complex'].keys(): - if iter_name not in ff['vorticity/complex'].keys(): - ff['vorticity/complex/' + iter_name] = h5py.ExternalLink( - cpf_name, - 'vorticity/complex/' + iter_name) - checkpoint += 1 - else: - break + checkpoint_file_list = glob.glob(self.simname + '_checkpoint_*.h5') + for cpf_name in checkpoint_file_list: + cpf = h5py.File(cpf_name, 'r') + for iter_name in cpf['vorticity/complex'].keys(): + if iter_name not in ff['vorticity/complex'].keys(): + ff['vorticity/complex/' + iter_name] = h5py.ExternalLink( + cpf_name, + 'vorticity/complex/' + iter_name) + cpf.close() return None def launch_jobs( self, opt = None, particle_initial_condition = None): self.prepare_post_file(opt) - self.prepare_field_file() + self.prepare_field_file(iter0 = opt.iter0) self.run( nb_processes = opt.nb_processes, nb_threads_per_process = opt.nb_threads_per_process, diff --git a/bfps/TEST.py b/bfps/TEST.py index 5f5734030344f15c7b23d7849fede80105e11fc6..2edcdfe46a8cf47360ac5a7dae28b72d1e81978d 100644 --- a/bfps/TEST.py +++ b/bfps/TEST.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -37,6 +36,7 @@ import warnings import bfps from ._code import _code from bfps import tools +from bfps import DNS class TEST(_code): """This class is meant to stitch together the C++ code into a final source file, @@ -50,12 +50,6 @@ class TEST(_code): self, work_dir = work_dir, simname = simname) - self.host_info = {'type' : 'cluster', - 'environment' : None, - 'deltanprocs' : 1, - 'queue' : '', - 'mail_address': '', - 'mail_events' : None} self.generate_default_parameters() return None def set_precision( @@ -118,12 +112,24 @@ class TEST(_code): return None def generate_default_parameters(self): # these parameters are relevant for all TEST classes + self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE' self.parameters['dealias_type'] = int(1) self.parameters['dkx'] = float(1.0) self.parameters['dky'] = float(1.0) self.parameters['dkz'] = float(1.0) self.parameters['filter_length'] = float(1.0) + self.parameters['random_seed'] = int(1) return None + def generate_extra_parameters( + self, + dns_type = None): + pars = {} + if dns_type == 'test_interpolation': + pars['nparticles'] = 3 + pars['tracers0_integration_steps'] = int(4) + pars['tracers0_neighbours'] = int(1) + pars['tracers0_smoothness'] = int(1) + return pars def get_kspace(self): kspace = {} if self.parameters['dealias_type'] == 1: @@ -254,9 +260,28 @@ class TEST(_code): parser_filter_test = subparsers.add_parser( 'filter_test', help = 'plain filter test') - self.simulation_parser_arguments(parser_filter_test) - self.job_parser_arguments(parser_filter_test) - self.parameters_to_parser_arguments(parser_filter_test) + parser_field_test = subparsers.add_parser( + 'field_test', + help = 'plain field test') + parser_symmetrize_test = subparsers.add_parser( + 'symmetrize_test', + help = 'plain symmetrize test') + parser_field_output_test = subparsers.add_parser( + 'field_output_test', + help = 'plain field output test') + parser_test_interpolation = subparsers.add_parser( + 'test_interpolation', + help = 'test velocity gradient interpolation') + for parser in ['parser_filter_test', + 'parser_field_test', + 'parser_symmetrize_test', + 'parser_field_output_test', + 'parser_test_interpolation']: + eval('self.simulation_parser_arguments(' + parser + ')') + eval('self.job_parser_arguments(' + parser + ')') + eval('self.parameters_to_parser_arguments(' + parser + ')') + eval('self.parameters_to_parser_arguments(' + parser + ',' + + 'parameters = self.generate_extra_parameters(dns_type = \'' + parser + '\'))') return None def prepare_launch( self, @@ -273,6 +298,8 @@ class TEST(_code): args = [], **kwargs): opt = self.prepare_launch(args = args) + self.parameters.update( + self.generate_extra_parameters(dns_type = self.dns_type)) self.launch_jobs(opt = opt, **kwargs) return None def launch_jobs( @@ -281,7 +308,62 @@ class TEST(_code): particle_initial_condition = None): if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): self.write_par( - particle_ic = None) + particle_ic = particle_initial_condition) + if self.dns_type == 'test_interpolation': + if type(particle_initial_condition) == type(None): + pbase_shape = (self.parameters['nparticles'],) + number_of_particles = self.parameters['nparticles'] + else: + pbase_shape = particle_initial_condition.shape[:-1] + assert(particle_initial_condition.shape[-1] == 3) + number_of_particles = 1 + for val in pbase_shape[1:]: + number_of_particles *= val + ncomponents = 3 + with h5py.File(os.path.join(self.work_dir, self.simname + '_input.h5'), 'a') as ofile: + s = 0 + ofile.create_group('tracers{0}'.format(s)) + ofile.create_group('tracers{0}/rhs'.format(s)) + ofile.create_group('tracers{0}/state'.format(s)) + ofile['tracers{0}/rhs'.format(s)].create_dataset( + '0', + shape = ( + (self.parameters['tracers{0}_integration_steps'.format(s)],) + + pbase_shape + + (ncomponents,)), + dtype = np.float) + ofile['tracers{0}/state'.format(s)].create_dataset( + '0', + shape = ( + pbase_shape + + (ncomponents,)), + dtype = np.float) + if type(particle_initial_condition) == type(None): + ofile['tracers0/state/0'][:] = np.random.random(pbase_shape + (ncomponents,))*2*np.pi + else: + ofile['tracers0/state/0'][:] = particle_initial_condition + with h5py.File(os.path.join(self.work_dir, self.simname + '_input.h5'), 'a') as ofile: + data = DNS.generate_vector_field(self, + write_to_file = False, + spectra_slope = 1.0, + amplitude = 0.05) + #data[:] = 0.0 + ## ABC + #data[0, 0, 1, 1] = -0.5*(1j) + #data[0, 0, 1, 2] = 0.5*(1j) + #data[0, 1, 0, 0] = -0.5*(1j) + #data[0, self.parameters['nz'] - 1, 0, 0] = 0.5*(1j) + #data[0, 1, 0, 1] = 0.5 + #data[0, self.parameters['nz'] - 1, 0, 1] = 0.5 + #data[1, 0, 0, 0] = 0.5 + #data[self.parameters['ny'] - 1, 0, 0, 0] = 0.5 + #data[1, 0, 0, 2] = -0.5*(1j) + #data[self.parameters['ny'] - 1, 0, 0, 2] = 0.5*(1j) + ofile['vorticity/complex/{0}'.format(0)] = data + with h5py.File(os.path.join(self.work_dir, self.simname + '_output.h5'), 'a') as ofile: + ofile.require_group('tracers0') + for kk in ['position', 'velocity', 'vorticity', 'velocity_gradient']: + ofile['tracers0'].require_group(kk) self.run( nb_processes = opt.nb_processes, nb_threads_per_process = opt.nb_threads_per_process, diff --git a/bfps/__init__.py b/bfps/__init__.py index 6c220e69d877670206e411c5a0f1f1ae78c04d33..9595bee4d6885aaa4be4cfc252f605be835e7e64 100644 --- a/bfps/__init__.py +++ b/bfps/__init__.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -38,18 +37,11 @@ here = os.path.normcase(__file__) header_dir = os.path.join(os.path.join(dist_loc, 'bfps'), 'cpp') lib_dir = os.path.join(dist_loc, 'bfps') -install_info = pickle.load( - open(os.path.join(os.path.dirname(here), 'install_info.pickle'), 'rb')) - homefolder = os.path.expanduser('~') -bfpsfolder = os.path.join(homefolder, '.config/', 'bfps') -sys.path.append(bfpsfolder) -from host_information import host_info +from .host_info import host_info from .DNS import DNS -from .FluidConvert import FluidConvert -from .FluidResize import FluidResize -from .NavierStokes import NavierStokes -from .NSVorticityEquation import NSVorticityEquation +from .PP import PP +from .TEST import TEST #import test diff --git a/bfps/__main__.py b/bfps/__main__.py index c41a6ffb67f91983f7969f40bc048a2e36e23afe..187171d049580498c82ea8551fa5728515c69845 100644 --- a/bfps/__main__.py +++ b/bfps/__main__.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -31,48 +30,16 @@ import bfps from .DNS import DNS from .PP import PP from .TEST import TEST -from .NavierStokes import NavierStokes -from .NSVorticityEquation import NSVorticityEquation -from .FluidResize import FluidResize -from .FluidConvert import FluidConvert -from .NSManyParticles import NSManyParticles def main(): - parser = argparse.ArgumentParser(prog = 'bfps') + parser = argparse.ArgumentParser(prog = 'bfps', conflict_handler = 'resolve') parser.add_argument( '-v', '--version', action = 'version', version = '%(prog)s ' + bfps.__version__) - NSoptions = ['NavierStokes', - 'NavierStokes-single', - 'NavierStokes-double', - 'NS', - 'NS-single', - 'NS-double'] - NSVEoptions = ['NSVorticityEquation', - 'NSVorticityEquation-single', - 'NSVorticityEquation-double', - 'NSVE', - 'NSVE-single', - 'NSVE-double'] - FRoptions = ['FluidResize', - 'FluidResize-single', - 'FluidResize-double', - 'FR', - 'FR-single', - 'FR-double'] - FCoptions = ['FluidConvert'] - NSMPopt = ['NSManyParticles', - 'NSManyParticles-single', - 'NSManyParticles-double'] parser.add_argument( 'base_class', - choices = ['DNS', 'PP', 'TEST'] + - NSoptions + - NSVEoptions + - FRoptions + - FCoptions + - NSMPopt, + choices = ['DNS', 'PP', 'TEST'], type = str) # first option is the choice of base class or -h or -v # all other options are passed on to the base_class instance @@ -81,31 +48,10 @@ def main(): # cannot be executed by mistake. if opt.base_class == 'DNS': c = DNS() - c.launch(args = sys.argv[2:]) - return None if opt.base_class == 'PP': c = PP() - c.launch(args = sys.argv[2:]) - return None if opt.base_class == 'TEST': c = TEST() - c.launch(args = sys.argv[2:]) - return None - if 'double' in opt.base_class: - precision = 'double' - else: - precision = 'single' - if opt.base_class in NSoptions: - base_class = NavierStokes - if opt.base_class in NSVEoptions: - base_class = NSVorticityEquation - elif opt.base_class in FRoptions: - base_class = FluidResize - elif opt.base_class in FCoptions: - base_class = FluidConvert - elif opt.base_class in NSMPopt: - base_class = NSManyParticles - c = base_class(fluid_precision = precision) c.launch(args = sys.argv[2:]) return None diff --git a/bfps/_base.py b/bfps/_base.py index 037261d3f1c6ea7af7fc58b79484ed461f84a28b..15a3c7a22cc783c2f471d96f01a30b4a379cd0dc 100644 --- a/bfps/_base.py +++ b/bfps/_base.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -28,7 +27,6 @@ import os import sys import numpy as np import h5py -from bfps import install_info from bfps import __version__ class _base(object): @@ -97,7 +95,7 @@ class _base(object): 'char fname[256];\n' + 'hsize_t dims[1];\n' + 'char *string_data;\n' + - 'sprintf(fname, "%s.h5", {0});\n'.format(simname_variable) + + 'snprintf(fname, 255, "%s.h5", {0});\n'.format(simname_variable) + 'parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);\n') key_prefix = '' if prepend_this: @@ -106,18 +104,24 @@ class _base(object): src_txt += 'dset = H5Dopen(parameter_file, "/{0}/{1}", H5P_DEFAULT);\n'.format( file_group, key[i]) if (type(parameters[key[i]]) == int and parameters[key[i]] >= 1<<30): - src_txt += 'H5Dread(dset, H5T_NATIVE_LLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i]) + src_txt += ('if (dset > 0) H5Dread(dset, H5T_NATIVE_LLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n' + + 'else {0} = 0;\n').format(key_prefix + key[i]) elif type(parameters[key[i]]) == int: - src_txt += 'H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i]) + src_txt += ('if (dset > 0) H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n' + + 'else {0} = 0;\n').format(key_prefix + key[i]) elif type(parameters[key[i]]) == str: - src_txt += ('space = H5Dget_space(dset);\n' + + src_txt += ('if (dset > 0)\n' + + '{\n' + 'space = H5Dget_space(dset);\n' + 'memtype = H5Dget_type(dset);\n' + 'string_data = (char*)malloc(256);\n' + 'H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);\n' + - 'sprintf({0}, "%s", string_data);\n'.format(key_prefix + key[i]) + + 'snprintf({0}, 255, "%s", string_data);\n'.format(key_prefix + key[i]) + 'free(string_data);\n' 'H5Sclose(space);\n' + - 'H5Tclose(memtype);\n') + 'H5Tclose(memtype);\n' + + '}\n' + + 'else printf({0}, "NULL");\n'.format(key_prefix + key[i])) elif type(parameters[key[i]]) == np.ndarray: if parameters[key[i]].dtype in [np.int, np.int64, np.int32]: template_par = 'int' @@ -126,10 +130,11 @@ class _base(object): src_txt += '{0} = hdf5_tools::read_vector<{1}>(parameter_file, "/{2}/{0}");\n'.format( key_prefix + key[i], template_par, file_group) else: - src_txt += 'H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i]) + src_txt += ('if (dset > 0) H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n' + + 'else {0} = 0.0;\n').format(key_prefix + key[i]) src_txt += 'H5Dclose(dset);\n' src_txt += 'H5Fclose(parameter_file);\n' - src_txt += 'return 0;\n}\n' # finishing read_parameters + src_txt += 'return EXIT_SUCCESS;\n}\n' # finishing read_parameters return src_txt def cprint_pars(self): key = sorted(list(self.parameters.keys())) @@ -168,8 +173,7 @@ class _base(object): ofile['parameters/' + k] = self.parameters[k] ofile['iteration'] = int(iter0) ofile['bfps_info/solver_class'] = type(self).__name__ - for k in install_info.keys(): - ofile['bfps_info/' + k] = str(install_info[k]) + ofile['bfps_info/VERSION'] = __version__ ofile.close() return None def rewrite_par( @@ -217,8 +221,10 @@ class _base(object): ofile[group + '/' + k][...] = parameters[k] ofile.close() return None - def read_parameters(self): - with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r') as data_file: + def read_parameters(self, fname = None): + if type(fname) == type(None): + fname = os.path.join(self.work_dir, self.simname + '.h5') + with h5py.File(fname, 'r') as data_file: for k in data_file['parameters'].keys(): if k in self.parameters.keys(): if type(self.parameters[k]) in [int, str, float]: diff --git a/bfps/_code.py b/bfps/_code.py index 22bcd9101ff6591e00f0455c1de1af2698c5f842..250be61daf1c89cddb9e461748469fd636796eff 100644 --- a/bfps/_code.py +++ b/bfps/_code.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -35,6 +34,7 @@ import math import warnings import bfps +from bfps.host_info import host_info from ._base import _base class _code(_base): @@ -62,7 +62,7 @@ class _code(_base): #include <string> #include <cstring> #include <fftw3-mpi.h> - #include <omp.h> + #include <omp.h> #include <fenv.h> #include <cstdlib> //endcpp @@ -116,7 +116,7 @@ class _code(_base): } #endif strcpy(simname, argv[1]); - sprintf(fname, "%s.h5", simname); + snprintf(fname, 255, "%s.h5", simname); parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); Cdset = H5Dopen(parameter_file, "iteration", H5P_DEFAULT); H5Dread( @@ -174,12 +174,7 @@ class _code(_base): } //endcpp """ - self.host_info = {'type' : 'cluster', - 'environment' : None, - 'deltanprocs' : 1, - 'queue' : '', - 'mail_address': '', - 'mail_events' : None} + self.host_info = host_info self.main = '' return None def write_src(self): @@ -194,33 +189,54 @@ class _code(_base): outfile.write(self.main) outfile.write(self.main_end) return None - def compile_code(self): + def compile_code( + self, + no_debug = True): + if os.path.exists(os.path.join(self.work_dir, self.name)): + return 0 # compile code - if not os.path.isfile(os.path.join(bfps.header_dir, 'base.hpp')): - raise IOError('header not there:\n' + - '{0}\n'.format(os.path.join(bfps.header_dir, 'base.hpp')) + - '{0}\n'.format(bfps.dist_loc)) - libraries = ['bfps'] - libraries += bfps.install_info['libraries'] - - command_strings = [bfps.install_info['compiler']] - command_strings += [self.name + '.cpp', '-o', self.name] - command_strings += bfps.install_info['extra_compile_args'] - command_strings += ['-I' + idir for idir in bfps.install_info['include_dirs']] - command_strings.append('-I' + bfps.header_dir) - command_strings += ['-L' + ldir for ldir in bfps.install_info['library_dirs']] - command_strings += ['-Wl,-rpath=' + ldir for ldir in bfps.install_info['library_dirs']] - command_strings.append('-L' + bfps.lib_dir) - command_strings.append('-Wl,-rpath=' + bfps.lib_dir) - - for libname in libraries: - command_strings += ['-l' + libname] - - command_strings += ['-fopenmp'] - + build_dir = 'bfps_build_' + self.name + os.makedirs(build_dir, exist_ok = True) + os.chdir(build_dir) self.write_src() - print('compiling code with command\n' + ' '.join(command_strings)) - return subprocess.call(command_strings) + with open('CMakeLists.txt', 'w') as outfile: + outfile.write('cmake_minimum_required(VERSION 3.10)\n') + outfile.write('cmake_policy(VERSION 3.12)\n') + outfile.write('if (DEFINED ENV{MPICXX})\n') + outfile.write(' message(STATUS "Using CMAKE_CXX_COMPILER=MPICXX")\n') + outfile.write(' set(CMAKE_CXX_COMPILER $ENV{MPICXX})\n') + outfile.write('else()\n') + outfile.write(' message(STATUS "MPICXX environment variable undefined, trying to find MPI")\n') + outfile.write(' set(MPI_STATIC ON)\n') + outfile.write(' find_package(MPI REQUIRED)\n') + outfile.write('endif()\n') + outfile.write('if (DEFINED ENV{MPICC})\n') + outfile.write(' set(CMAKE_C_COMPILER $ENV{MPICC})\n') + outfile.write(' message(STATUS "Using CMAKE_C_COMPILER=MPICC")\n') + outfile.write('endif()\n') + #ideally we should use something like the following 2 lines + #outfile.write('set(CMAKE_CXX_COMPILER ${BFPS_CXX_COMPILER})\n') + #outfile.write('set(CMAKE_C_COMPILER ${BFPS_C_COMPILER})\n') + outfile.write('project(project_{0} LANGUAGES CXX)\n'.format(self.name)) + outfile.write('find_package(BFPS REQUIRED)\n') + outfile.write('set(CMAKE_CXX_STANDARD 11)\n') + outfile.write('set(CMAKE_CXX_STANDARD_REQUIRED ON)\n') + outfile.write('set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} ${BFPS_CXX_COMPILE_FLAGS}")\n') + outfile.write('set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${BFPS_EXE_LINKER_FLAGS}")\n') + outfile.write('include_directories(${BFPS_INCLUDE_DIRECTORIES} ${BFPS_INCLUDE_DIR}/bfps)\n') + outfile.write('link_directories(${BFPS_LINK_DIRECTORIES} ${BFPS_LIBRARIES_DIR})\n') + outfile.write('find_library(BFPS_STATIC_LIBRARY bfps)\n') + outfile.write('add_executable({0} {0}.cpp)\n'.format(self.name)) + outfile.write('target_link_libraries(' + self.name + ' ${BFPS_STATIC_LIBRARY})\n') + outfile.write('target_link_libraries(' + self.name + ' ${BFPS_LIBS})\n') + subprocess.check_call(['cmake', '.']) + current_environment = os.environ + if not no_debug: + current_environment['VERBOSE'] = '1' + make_result = subprocess.check_call(['make'], env = current_environment) + os.chdir('..') + shutil.copy2(os.path.join(build_dir, self.name), os.path.join(self.work_dir, self.name)) + return make_result def set_host_info( self, host_info = {}): @@ -234,30 +250,29 @@ class _code(_base): hours = 0, minutes = 10, njobs = 1, - no_submit = False): + no_submit = False, + no_debug = True): self.read_parameters() with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r') as data_file: iter0 = data_file['iteration'].value if not os.path.isdir(self.work_dir): os.makedirs(self.work_dir) - if not os.path.exists(os.path.join(self.work_dir, self.name)): - need_to_compile = True - else: - need_to_compile = (datetime.fromtimestamp(os.path.getctime(os.path.join(self.work_dir, self.name))) < - bfps.install_info['install_date']) - if need_to_compile: - assert(self.compile_code() == 0) - if self.work_dir != os.path.realpath(os.getcwd()): - shutil.copy(self.name, self.work_dir) + assert (self.compile_code(no_debug = no_debug) == 0) if 'niter_todo' not in self.parameters.keys(): self.parameters['niter_todo'] = 1 current_dir = os.getcwd() os.chdir(self.work_dir) os.chdir(current_dir) + if not 'MPI' in self.host_info.keys(): + self.host_info['MPI'] = 'openmpi' + if self.host_info['MPI'] == 'openmpi': + mpirun_environment_set = 'x' + else: + mpirun_environment_set = 'env' command_atoms = ['mpirun', '-np', '{0}'.format(nb_processes), - '-x', + '-' + mpirun_environment_set, 'OMP_NUM_THREADS={0}'.format(nb_threads_per_process), './' + self.name, self.simname] @@ -268,20 +283,22 @@ class _code(_base): qsub_script_name = 'run_' + suffix + '.sh' self.write_sge_file( file_name = os.path.join(self.work_dir, qsub_script_name), - nprocesses = nb_processes*nb_threads_per_process, + nprocesses = nb_processes, name_of_run = suffix, command_atoms = command_atoms[5:], hours = hours, minutes = minutes, out_file = out_file + '_' + suffix, - err_file = err_file + '_' + suffix) + err_file = err_file + '_' + suffix, + nb_threads_per_process = nb_threads_per_process) os.chdir(self.work_dir) qsub_atoms = ['qsub'] - if len(job_name_list) >= 1: - qsub_atoms += ['-hold_jid', job_name_list[-1]] - subprocess.call(qsub_atoms + [qsub_script_name]) - os.chdir(current_dir) - job_name_list.append(suffix) + if not no_submit: + if len(job_name_list) >= 1: + qsub_atoms += ['-hold_jid', job_name_list[-1]] + subprocess.check_call(qsub_atoms + [qsub_script_name]) + os.chdir(current_dir) + job_name_list.append(suffix) if self.host_info['type'] == 'SLURM': job_id_list = [] for j in range(njobs): @@ -296,7 +313,7 @@ class _code(_base): out_file = out_file + '_' + suffix, err_file = err_file + '_' + suffix, nb_mpi_processes = nb_processes, - nb_threads_per_process = nb_threads_per_process) + nb_threads_per_process = nb_threads_per_process) os.chdir(self.work_dir) qsub_atoms = ['sbatch'] @@ -313,6 +330,14 @@ class _code(_base): elif self.host_info['type'] == 'IBMLoadLeveler': suffix = self.simname + '_{0}'.format(iter0) job_script_name = 'run_' + suffix + '.sh' + energy_policy_tag = ( + 'bfps' + + '_np{0}_ntpp{1}'.format( + nb_processes, nb_threads_per_process) + + '_Nx{0}_Ny{1}_Nz{2}'.format( + self.parameters['nx'], self.parameters['ny'], self.parameters['nz'])) + if 'nparticles' in self.parameters.keys(): + energy_policy_tag += '_nparticles{0}'.format(self.parameters['nparticles']) if (njobs == 1): self.write_IBMLoadLeveler_file_single_job( file_name = os.path.join(self.work_dir, job_script_name), @@ -323,7 +348,8 @@ class _code(_base): out_file = out_file + '_' + suffix, err_file = err_file + '_' + suffix, nb_mpi_processes = nb_processes, - nb_threads_per_process = nb_threads_per_process) + nb_threads_per_process = nb_threads_per_process, + energy_policy_tag = energy_policy_tag) else: self.write_IBMLoadLeveler_file_many_job( file_name = os.path.join(self.work_dir, job_script_name), @@ -335,21 +361,19 @@ class _code(_base): err_file = err_file + '_' + suffix, njobs = njobs, nb_mpi_processes = nb_processes, - nb_threads_per_process = nb_threads_per_process) + nb_threads_per_process = nb_threads_per_process, + energy_policy_tag = energy_policy_tag) submit_atoms = ['llsubmit'] if not no_submit: - subprocess.call(submit_atoms + [os.path.join(self.work_dir, job_script_name)]) + subprocess.check_call(submit_atoms + [os.path.join(self.work_dir, job_script_name)]) elif self.host_info['type'] == 'pc': os.chdir(self.work_dir) - if os.getenv('LD_LIBRARY_PATH') != None: - os.environ['LD_LIBRARY_PATH'] += ':{0}'.format(bfps.lib_dir) - print('added to LD_LIBRARY_PATH the location {0}'.format(bfps.lib_dir)) for j in range(njobs): suffix = self.simname + '_{0}'.format(iter0 + j*self.parameters['niter_todo']) print('running code with command\n' + ' '.join(command_atoms)) - subprocess.call(command_atoms, + subprocess.check_call(command_atoms, stdout = open(out_file + '_' + suffix, 'w'), stderr = open(err_file + '_' + suffix, 'w')) os.chdir(current_dir) @@ -364,8 +388,9 @@ class _code(_base): minutes = None, out_file = None, err_file = None, - nb_mpi_processes = None, - nb_threads_per_process = None): + nb_mpi_processes = None, + nb_threads_per_process = None, + energy_policy_tag = 'bfps'): script_file = open(file_name, 'w') script_file.write('# @ shell=/bin/bash\n') @@ -380,19 +405,22 @@ class _code(_base): # If Ibm is used should be : script_file.write('# @ job_type = parallel\n') script_file.write('# @ job_type = MPICH\n') + assert(type(self.host_info['environment']) != type(None)) + script_file.write('# @ class = {0}\n'.format(self.host_info['environment'])) script_file.write('# @ node_usage = not_shared\n') script_file.write('# @ notification = complete\n') - script_file.write('# @ notify_user = $(user)@rzg.mpg.de\n') + script_file.write('# @ notify_user = {0}\n'.format(self.host_info['mail_address'])) nb_cpus_per_node = self.host_info['deltanprocs'] - assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1, - 'nb_cpus_per_node is {}'.format(nb_cpus_per_node)) + assert isinstance(nb_cpus_per_node, int) and \ + nb_cpus_per_node >= 1, \ + 'nb_cpus_per_node is {}'.format(nb_cpus_per_node) # No more threads than the number of cores - assert(nb_threads_per_process <= nb_cpus_per_node, + assert nb_threads_per_process <= nb_cpus_per_node, \ "Cannot use more threads ({} asked) than the number of cores ({})".format( - nb_threads_per_process, nb_cpus_per_node)) + nb_threads_per_process, nb_cpus_per_node) # Warn if some core will not be ued if nb_cpus_per_node%nb_threads_per_process != 0: warnings.warn("The number of threads is smaller than the number of cores (machine will be underused)", @@ -410,10 +438,11 @@ class _code(_base): nb_processes_per_node = int(nb_cpus_per_node // nb_threads_per_process) first_node_tasks = int(nb_mpi_processes - (nb_nodes-1)*nb_processes_per_node) + script_file.write('# @ energy_policy_tag = {0}\n'.format(energy_policy_tag)) + script_file.write('# @ minimize_time_to_solution = yes\n') script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_threads_per_process)) script_file.write('# @ network.MPI = sn_all,not_shared,us\n') script_file.write('# @ wall_clock_limit = {0}:{1:0>2d}:00\n'.format(hours, minutes)) - assert(type(self.host_info['environment']) != type(None)) script_file.write('# @ node = {0}\n'.format(nb_nodes)) script_file.write('# @ tasks_per_node = {0}\n'.format(nb_processes_per_node)) if (first_node_tasks > 0): @@ -425,9 +454,6 @@ class _code(_base): script_file.write('module li\n') script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_threads_per_process)) - script_file.write('LD_LIBRARY_PATH=' + - ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + - ':${LD_LIBRARY_PATH}\n') script_file.write('echo "Start time is `date`"\n') script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) script_file.write('cd ' + self.work_dir + '\n') @@ -437,7 +463,7 @@ class _code(_base): script_file.write('mpiexec.hydra ' + ' -np {} '.format(nb_mpi_processes) + ' -ppn {} '.format(nb_processes_per_node) - + ' -ordered-output -prepend-rank ' + #+ ' -ordered-output -prepend-rank ' + os.path.join( self.work_dir, command_atoms[0]) + @@ -460,8 +486,9 @@ class _code(_base): out_file = None, err_file = None, njobs = 2, - nb_mpi_processes = None, - nb_threads_per_process = None): + nb_mpi_processes = None, + nb_threads_per_process = None, + energy_policy_tag = 'bfps'): assert(type(self.host_info['environment']) != type(None)) script_file = open(file_name, 'w') script_file.write('# @ shell=/bin/bash\n') @@ -475,16 +502,23 @@ class _code(_base): script_file.write('# @ output = ' + os.path.join(self.work_dir, out_file) + '\n') # If Ibm is used should be : script_file.write('# @ job_type = parallel\n') script_file.write('# @ job_type = MPICH\n') + assert(type(self.host_info['environment']) != type(None)) + script_file.write('# @ class = {0}\n'.format(self.host_info['environment'])) script_file.write('# @ node_usage = not_shared\n') + + script_file.write('# @ notification = error\n') + script_file.write('# @ notify_user = {0}\n'.format(self.host_info['mail_address'])) script_file.write('#\n') nb_cpus_per_node = self.host_info['deltanprocs'] - assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1, 'nb_cpus_per_node is {}'.format(nb_cpus_per_node)) + assert isinstance(nb_cpus_per_node, int) and \ + nb_cpus_per_node >= 1, \ + 'nb_cpus_per_node is {}'.format(nb_cpus_per_node) # No more threads than the number of cores - assert(nb_threads_per_process <= nb_cpus_per_node, + assert nb_threads_per_process <= nb_cpus_per_node, \ "Cannot use more threads ({} asked) than the number of cores ({})".format( - nb_threads_per_process, nb_cpus_per_node)) + nb_threads_per_process, nb_cpus_per_node) # Warn if some core will not be ued if nb_cpus_per_node%nb_threads_per_process != 0: warnings.warn("The number of threads is smaller than the number of cores (machine will be underused)", @@ -503,11 +537,15 @@ class _code(_base): first_node_tasks = int(nb_mpi_processes - (nb_nodes-1)*nb_processes_per_node) for job in range(njobs): - script_file.write('# @ step_name = {0}.$(stepid)\n'.format(self.simname)) + script_file.write('# @ step_name = {0}.{1}\n'.format(self.simname, job)) + if job > 0: + script_file.write('# @ dependency = {0}.{1} == 0\n'.format(self.simname, job - 1)) script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_threads_per_process)) script_file.write('# @ network.MPI = sn_all,not_shared,us\n') script_file.write('# @ wall_clock_limit = {0}:{1:0>2d}:00\n'.format(hours, minutes)) - assert(type(self.host_info['environment']) != type(None)) + script_file.write('# @ energy_policy_tag = {0}\n'.format(energy_policy_tag)) + script_file.write('# @ minimize_time_to_solution = yes\n') + assert type(self.host_info['environment']) != type(None) script_file.write('# @ node = {0}\n'.format(nb_nodes)) script_file.write('# @ tasks_per_node = {0}\n'.format(nb_processes_per_node)) if (first_node_tasks > 0): @@ -518,9 +556,6 @@ class _code(_base): script_file.write('module li\n') script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_threads_per_process)) - script_file.write('LD_LIBRARY_PATH=' + - ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + - ':${LD_LIBRARY_PATH}\n') script_file.write('echo "Start time is `date`"\n') script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) script_file.write('cd ' + self.work_dir + '\n') @@ -552,7 +587,8 @@ class _code(_base): hours = None, minutes = None, out_file = None, - err_file = None): + err_file = None, + nb_threads_per_process = 1): script_file = open(file_name, 'w') script_file.write('#!/bin/bash\n') # export all environment variables @@ -568,18 +604,17 @@ class _code(_base): if not type(out_file) == type(None): script_file.write('#$ -o ' + out_file + '\n') if not type(self.host_info['environment']) == type(None): - envprocs = self.host_info['deltanprocs'] * int(math.ceil((nprocesses *1.0/ self.host_info['deltanprocs']))) + envprocs = nb_threads_per_process * nprocesses script_file.write('#$ -pe {0} {1}\n'.format( self.host_info['environment'], envprocs)) script_file.write('echo "got $NSLOTS slots."\n') script_file.write('echo "Start time is `date`"\n') - script_file.write('mpiexec -machinefile $TMPDIR/machines ' + - '-genv LD_LIBRARY_PATH ' + - '"' + - ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + - '" ' + - '-n {0} {1}\n'.format(nprocesses, ' '.join(command_atoms))) + script_file.write('mpiexec \\\n' + + '\t-machinefile $TMPDIR/machines \\\n' + + '\t-genv OMP_NUM_THREADS={0} \\\n'.format(nb_threads_per_process) + + '\t-genv OMP_PLACES=cores \\\n' + + '\t-n {0} \\\n\t{1}\n'.format(nprocesses, ' '.join(command_atoms))) script_file.write('echo "End time is `date`"\n') script_file.write('exit 0\n') script_file.close() @@ -593,8 +628,8 @@ class _code(_base): minutes = None, out_file = None, err_file = None, - nb_mpi_processes = None, - nb_threads_per_process = None): + nb_mpi_processes = None, + nb_threads_per_process = None): script_file = open(file_name, 'w') script_file.write('#!/bin/bash -l\n') # job name @@ -611,13 +646,14 @@ class _code(_base): self.host_info['environment'])) nb_cpus_per_node = self.host_info['deltanprocs'] - assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1, - 'nb_cpus_per_node is {}'.format(nb_cpus_per_node)) + assert isinstance(nb_cpus_per_node, int) \ + and nb_cpus_per_node >= 1, \ + 'nb_cpus_per_node is {}'.format(nb_cpus_per_node) # No more threads than the number of cores - assert(nb_threads_per_process <= nb_cpus_per_node, + assert nb_threads_per_process <= nb_cpus_per_node, \ "Cannot use more threads ({} asked) than the number of cores ({})".format( - nb_threads_per_process, nb_cpus_per_node)) + nb_threads_per_process, nb_cpus_per_node) # Warn if some core will not be ued if nb_cpus_per_node%nb_threads_per_process != 0: warnings.warn( @@ -646,9 +682,6 @@ class _code(_base): script_file.write('export OMP_NUM_THREADS={0}\n'.format(nb_threads_per_process)) script_file.write('export OMP_PLACES=cores\n') - script_file.write('LD_LIBRARY_PATH=' + - ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) + - ':${LD_LIBRARY_PATH}\n') script_file.write('echo "Start time is `date`"\n') script_file.write('cd ' + self.work_dir + '\n') script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1])) diff --git a/bfps/_fluid_base.py b/bfps/_fluid_base.py deleted file mode 100644 index 757e6cb81e6c605cbcb3c2e9d19bd7487add115f..0000000000000000000000000000000000000000 --- a/bfps/_fluid_base.py +++ /dev/null @@ -1,503 +0,0 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -from ._code import _code -from bfps import tools - -import os -import numpy as np -import h5py - -class _fluid_particle_base(_code): - """This class is meant to put together all common code between the - different C++ solvers/postprocessing tools, so that development of - specific functionalities is not overwhelming. - """ - def __init__( - self, - name = 'solver', - work_dir = './', - simname = 'test', - dtype = np.float32, - use_fftw_wisdom = True): - _code.__init__( - self, - work_dir = work_dir, - simname = simname) - self.use_fftw_wisdom = use_fftw_wisdom - self.name = name - self.particle_species = 0 - if dtype in [np.float32, np.float64]: - self.dtype = dtype - elif dtype in ['single', 'double']: - if dtype == 'single': - self.dtype = np.dtype(np.float32) - elif dtype == 'double': - self.dtype = np.dtype(np.float64) - self.rtype = self.dtype - if self.rtype == np.float32: - self.ctype = np.dtype(np.complex64) - self.C_dtype = 'float' - elif self.rtype == np.float64: - self.ctype = np.dtype(np.complex128) - self.C_dtype = 'double' - self.parameters['dealias_type'] = 1 - self.parameters['dkx'] = 1.0 - self.parameters['dky'] = 1.0 - self.parameters['dkz'] = 1.0 - self.parameters['niter_todo'] = 8 - self.parameters['niter_part'] = 1 - self.parameters['niter_stat'] = 1 - self.parameters['niter_out'] = 1024 - self.parameters['nparticles'] = 0 - self.parameters['dt'] = 0.01 - self.fluid_includes = '#include "fluid_solver.hpp"\n' - self.fluid_includes = '#include "field.hpp"\n' - self.fluid_variables = '' - self.fluid_definitions = '' - self.fluid_start = '' - self.fluid_loop = '' - self.fluid_end = '' - self.fluid_output = '' - self.stat_src = '' - self.particle_includes = '' - self.particle_variables = '' - self.particle_definitions = '' - self.particle_start = '' - self.particle_loop = '' - self.particle_output = '' - self.particle_end = '' - self.particle_stat_src = '' - self.file_datasets_grow = '' - self.store_kspace = """ - //begincpp - if (myrank == 0 && iteration == 0) - { - TIMEZONE("fuild_base::store_kspace"); - hsize_t dims[4]; - hid_t space, dset; - // store kspace information - hid_t parameter_file = stat_file; - //char fname[256]; - //sprintf(fname, "%s.h5", simname); - //parameter_file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT); - dset = H5Dopen(parameter_file, "/kspace/kshell", H5P_DEFAULT); - space = H5Dget_space(dset); - H5Sget_simple_extent_dims(space, dims, NULL); - H5Sclose(space); - if (fs->nshells != dims[0]) - { - DEBUG_MSG( - "ERROR: computed nshells %d not equal to data file nshells %d\\n", - fs->nshells, dims[0]); - } - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->kshell); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/kspace/nshell", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->nshell); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/kspace/kM", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kMspec); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/kspace/dk", H5P_DEFAULT); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->dk); - H5Dclose(dset); - //H5Fclose(parameter_file); - } - //endcpp - """ - return None - def get_data_file_name(self): - return os.path.join(self.work_dir, self.simname + '.h5') - def get_data_file(self): - return h5py.File(self.get_data_file_name(), 'r') - def get_particle_file_name(self): - return os.path.join(self.work_dir, self.simname + '_particles.h5') - def get_particle_file(self): - return h5py.File(self.get_particle_file_name(), 'r') - def finalize_code( - self, - postprocess_mode = False): - self.includes += self.fluid_includes - self.includes += '#include <ctime>\n' - self.variables += self.fluid_variables - self.definitions += ('int grow_single_dataset(hid_t dset, int tincrement)\n{\n' + - 'int ndims;\n' + - 'hsize_t space;\n' + - 'space = H5Dget_space(dset);\n' + - 'ndims = H5Sget_simple_extent_ndims(space);\n' + - 'hsize_t *dims = new hsize_t[ndims];\n' + - 'H5Sget_simple_extent_dims(space, dims, NULL);\n' + - 'dims[0] += tincrement;\n' + - 'H5Dset_extent(dset, dims);\n' + - 'H5Sclose(space);\n' + - 'delete[] dims;\n' + - 'return EXIT_SUCCESS;\n}\n') - self.definitions+= self.fluid_definitions - if self.particle_species > 0: - self.includes += self.particle_includes - self.variables += self.particle_variables - self.definitions += self.particle_definitions - self.definitions += ('herr_t grow_statistics_dataset(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data)\n{\n' + - 'if (info->type == H5O_TYPE_DATASET)\n{\n' + - 'hsize_t dset = H5Dopen(o_id, name, H5P_DEFAULT);\n' + - 'grow_single_dataset(dset, niter_todo/niter_stat);\n' - 'H5Dclose(dset);\n}\n' + - 'return 0;\n}\n') - self.definitions += ('herr_t grow_particle_datasets(hid_t g_id, const char *name, const H5L_info_t *info, void *op_data)\n{\n' + - 'hsize_t dset;\n') - for key in ['state', 'velocity', 'acceleration']: - self.definitions += ('if (H5Lexists(g_id, "{0}", H5P_DEFAULT))\n'.format(key) + - '{\n' + - 'dset = H5Dopen(g_id, "{0}", H5P_DEFAULT);\n'.format(key) + - 'grow_single_dataset(dset, niter_todo/niter_part);\n' + - 'H5Dclose(dset);\n}\n') - self.definitions += ('if (H5Lexists(g_id, "rhs", H5P_DEFAULT))\n{\n' + - 'dset = H5Dopen(g_id, "rhs", H5P_DEFAULT);\n' + - 'grow_single_dataset(dset, 1);\n' + - 'H5Dclose(dset);\n}\n' + - 'return 0;\n}\n') - self.definitions += ('int grow_file_datasets()\n{\n' + - 'int file_problems = 0;\n' + - self.file_datasets_grow + - 'return file_problems;\n' - '}\n') - self.definitions += 'void do_stats()\n{\n' + self.stat_src + '}\n' - self.definitions += 'void do_particle_stats()\n{\n' + self.particle_stat_src + '}\n' - # take care of wisdom - if self.use_fftw_wisdom: - if self.dtype == np.float32: - fftw_prefix = 'fftwf_' - elif self.dtype == np.float64: - fftw_prefix = 'fftw_' - self.main_start += """ - //begincpp - if (myrank == 0) - {{ - char fname[256]; - sprintf(fname, "%s_fftw_wisdom.txt", simname); - {0}import_wisdom_from_filename(fname); - }} - {0}mpi_broadcast_wisdom(MPI_COMM_WORLD); - //endcpp - """.format(fftw_prefix) - self.main_end = """ - //begincpp - {0}mpi_gather_wisdom(MPI_COMM_WORLD); - MPI_Barrier(MPI_COMM_WORLD); - if (myrank == 0) - {{ - char fname[256]; - sprintf(fname, "%s_fftw_wisdom.txt", simname); - {0}export_wisdom_to_filename(fname); - }} - //endcpp - """.format(fftw_prefix) + self.main_end - self.main = """ - //begincpp - int data_file_problem; - clock_t time0, time1; - double time_difference, local_time_difference; - time0 = clock(); - if (myrank == 0) data_file_problem = grow_file_datasets(); - MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (data_file_problem > 0) - { - std::cerr << data_file_problem << " problems growing file datasets.\\ntrying to exit now." << std::endl; - MPI_Finalize(); - return EXIT_SUCCESS; - } - //endcpp - """ - self.main += self.fluid_start - if self.particle_species > 0: - self.main += self.particle_start - output_time_difference = ('time1 = clock();\n' + - 'local_time_difference = ((unsigned int)(time1 - time0))/((double)CLOCKS_PER_SEC);\n' + - 'time_difference = 0.0;\n' + - 'MPI_Allreduce(&local_time_difference, &time_difference, ' + - '1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);\n' + - 'if (myrank == 0) std::cout << "iteration " ' + - '<< {0} << " took " ' + - '<< time_difference/nprocs << " seconds" << std::endl;\n' + - 'if (myrank == 0) std::cerr << "iteration " ' + - '<< {0} << " took " ' + - '<< time_difference/nprocs << " seconds" << std::endl;\n' + - 'time0 = time1;\n') - if not postprocess_mode: - self.main += 'for (int max_iter = iteration+niter_todo-iteration%niter_todo; iteration < max_iter; iteration++)\n' - self.main += '{\n' - - self.main += """ - #ifdef USE_TIMINGOUTPUT - const std::string loopLabel = "code::main_start::loop-" + std::to_string(iteration); - TIMEZONE(loopLabel.c_str()); - #endif - """ - self.main += 'if (iteration % niter_stat == 0) do_stats();\n' - if self.particle_species > 0: - self.main += 'if (iteration % niter_part == 0) do_particle_stats();\n' - self.main += self.particle_loop - self.main += self.fluid_loop - self.main += output_time_difference.format('iteration') - self.main += '}\n' - self.main += 'do_stats();\n' - self.main += 'do_particle_stats();\n' - self.main += output_time_difference.format('iteration') - else: - self.main += 'for (int frame_index = iter0; frame_index <= iter1; frame_index += niter_out)\n' - self.main += '{\n' - self.main += """ - #ifdef USE_TIMINGOUTPUT - const std::string loopLabel = "code::main_start::loop-" + std::to_string(frame_index); - TIMEZONE(loopLabel.c_str()); - #endif - """ - if self.particle_species > 0: - self.main += self.particle_loop - self.main += self.fluid_loop - self.main += output_time_difference.format('frame_index') - self.main += '}\n' - self.main += self.fluid_end - if self.particle_species > 0: - self.main += self.particle_end - return None - def read_rfield( - self, - field = 'velocity', - iteration = 0, - filename = None): - """ - :note: assumes field is a vector field - """ - if type(filename) == type(None): - filename = os.path.join( - self.work_dir, - self.simname + '_r' + field + '_i{0:0>5x}'.format(iteration)) - return np.memmap( - filename, - dtype = self.dtype, - mode = 'r', - shape = (self.parameters['nz'], - self.parameters['ny'], - self.parameters['nx'], 3)) - def transpose_frame( - self, - field = 'velocity', - iteration = 0, - filename = None, - ofile = None): - Rdata = self.read_rfield( - field = field, - iteration = iteration, - filename = filename) - new_data = np.zeros( - (3, - self.parameters['nz'], - self.parameters['ny'], - self.parameters['nx']), - dtype = self.dtype) - for i in range(3): - new_data[i] = Rdata[..., i] - if type(ofile) == type(None): - ofile = os.path.join( - self.work_dir, - self.simname + '_r' + field + '_i{0:0>5x}_3xNZxNYxNX'.format(iteration)) - else: - new_data.tofile(ofile) - return new_data - def plot_vel_cut( - self, - axis, - field = 'velocity', - iteration = 0, - yval = 13, - filename = None): - axis.set_axis_off() - Rdata0 = self.read_rfield(field = field, iteration = iteration, filename = filename) - energy = np.sum(Rdata0[:, yval, :, :]**2, axis = 2)*.5 - axis.imshow(energy, interpolation='none') - axis.set_title('{0}'.format(np.average(Rdata0[..., 0]**2 + - Rdata0[..., 1]**2 + - Rdata0[..., 2]**2)*.5)) - return Rdata0 - def generate_vector_field( - self, - rseed = 7547, - spectra_slope = 1., - amplitude = 1., - iteration = 0, - field_name = 'vorticity', - write_to_file = False, - # to switch to constant field, use generate_data_3D_uniform - # for scalar_generator - scalar_generator = tools.generate_data_3D): - """generate vector field. - - The generated field is not divergence free, but it has the proper - shape. - - :param rseed: seed for random number generator - :param spectra_slope: spectrum of field will look like k^(-p) - :param amplitude: all amplitudes are multiplied with this value - :param iteration: the field is written at this iteration - :param field_name: the name of the field being generated - :param write_to_file: should we write the field to file? - :param scalar_generator: which function to use for generating the - individual components. - Possible values: bfps.tools.generate_data_3D, - bfps.tools.generate_data_3D_uniform - :type rseed: int - :type spectra_slope: float - :type amplitude: float - :type iteration: int - :type field_name: str - :type write_to_file: bool - :type scalar_generator: function - - :returns: ``Kdata``, a complex valued 4D ``numpy.array`` that uses the - transposed FFTW layout. - Kdata[ky, kz, kx, i] is the amplitude of mode (kx, ky, kz) for - the i-th component of the field. - (i.e. x is the fastest index and z the slowest index in the - real-space representation). - """ - np.random.seed(rseed) - Kdata00 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, - p = spectra_slope, - amplitude = amplitude).astype(self.ctype) - Kdata01 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, - p = spectra_slope, - amplitude = amplitude).astype(self.ctype) - Kdata02 = scalar_generator( - self.parameters['nz']//2, - self.parameters['ny']//2, - self.parameters['nx']//2, - p = spectra_slope, - amplitude = amplitude).astype(self.ctype) - Kdata0 = np.zeros( - Kdata00.shape + (3,), - Kdata00.dtype) - Kdata0[..., 0] = Kdata00 - Kdata0[..., 1] = Kdata01 - Kdata0[..., 2] = Kdata02 - Kdata1 = tools.padd_with_zeros( - Kdata0, - self.parameters['nz'], - self.parameters['ny'], - self.parameters['nx']) - if write_to_file: - Kdata1.tofile( - os.path.join(self.work_dir, - self.simname + "_c{0}_i{1:0>5x}".format(field_name, iteration))) - return Kdata1 - def generate_tracer_state( - self, - rseed = None, - iteration = 0, - species = 0, - write_to_file = False, - ncomponents = 3, - testing = False, - data = None): - if (type(data) == type(None)): - if not type(rseed) == type(None): - np.random.seed(rseed) - #point with problems: 5.37632864e+00, 6.10414710e+00, 6.25256493e+00] - data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents) - data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi - if testing: - #data[0] = np.array([3.26434, 4.24418, 3.12157]) - data[0] = np.array([ 0.72086101, 2.59043666, 6.27501953]) - with h5py.File(self.get_particle_file_name(), 'r+') as data_file: - data_file['tracers{0}/state'.format(species)][0] = data - if write_to_file: - data.tofile( - os.path.join( - self.work_dir, - "tracers{0}_state_i{1:0>5x}".format(species, iteration))) - return data - def generate_initial_condition(self): - self.generate_vector_field(write_to_file = True) - for species in range(self.particle_species): - self.generate_tracer_state( - species = species, - write_to_file = False) - return None - def get_kspace(self): - kspace = {} - if self.parameters['dealias_type'] == 1: - kMx = self.parameters['dkx']*(self.parameters['nx']//2 - 1) - kMy = self.parameters['dky']*(self.parameters['ny']//2 - 1) - kMz = self.parameters['dkz']*(self.parameters['nz']//2 - 1) - else: - kMx = self.parameters['dkx']*(self.parameters['nx']//3 - 1) - kMy = self.parameters['dky']*(self.parameters['ny']//3 - 1) - kMz = self.parameters['dkz']*(self.parameters['nz']//3 - 1) - kspace['kM'] = max(kMx, kMy, kMz) - kspace['dk'] = min(self.parameters['dkx'], - self.parameters['dky'], - self.parameters['dkz']) - nshells = int(kspace['kM'] / kspace['dk']) + 2 - kspace['nshell'] = np.zeros(nshells, dtype = np.int64) - kspace['kshell'] = np.zeros(nshells, dtype = np.float64) - kspace['kx'] = np.arange( 0, - self.parameters['nx']//2 + 1).astype(np.float64)*self.parameters['dkx'] - kspace['ky'] = np.arange(-self.parameters['ny']//2 + 1, - self.parameters['ny']//2 + 1).astype(np.float64)*self.parameters['dky'] - kspace['ky'] = np.roll(kspace['ky'], self.parameters['ny']//2+1) - kspace['kz'] = np.arange(-self.parameters['nz']//2 + 1, - self.parameters['nz']//2 + 1).astype(np.float64)*self.parameters['dkz'] - kspace['kz'] = np.roll(kspace['kz'], self.parameters['nz']//2+1) - return kspace - def write_par(self, iter0 = 0): - assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0) - assert (self.parameters['niter_todo'] % self.parameters['niter_out'] == 0) - assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0) - assert (self.parameters['niter_out'] % self.parameters['niter_stat'] == 0) - assert (self.parameters['niter_out'] % self.parameters['niter_part'] == 0) - _code.write_par(self, iter0 = iter0) - with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r+') as ofile: - ofile['bfps_info/exec_name'] = self.name - ofile['field_dtype'] = np.dtype(self.dtype).str - kspace = self.get_kspace() - for k in kspace.keys(): - ofile['kspace/' + k] = kspace[k] - nshells = kspace['nshell'].shape[0] - ofile.close() - return None - def specific_parser_arguments( - self, - parser): - _code.specific_parser_arguments(self, parser) - return None - diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp deleted file mode 100644 index 73fd0275d8138d41bb4ee7fbc28e2d41e8017661..0000000000000000000000000000000000000000 --- a/bfps/cpp/distributed_particles.cpp +++ /dev/null @@ -1,472 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -//#define NDEBUG - -#include <cmath> -#include <cassert> -#include <cstring> -#include <string> -#include <sstream> -#include <array> - -#include "base.hpp" -#include "distributed_particles.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" - - -extern int myrank, nprocs; - -template <particle_types particle_type, class rnumber, int interp_neighbours> -distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles( - const char *NAME, - const hid_t data_file_id, - interpolator<rnumber, interp_neighbours> *VEL, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS) : particles_io_base<particle_type>( - NAME, - TRAJ_SKIP, - data_file_id, - VEL->descriptor->comm) -{ - assert((INTEGRATION_STEPS <= 6) && - (INTEGRATION_STEPS >= 1)); - this->vel = VEL; - this->rhs.resize(INTEGRATION_STEPS); - this->integration_steps = INTEGRATION_STEPS; - this->state.reserve(2*this->nparticles / this->nprocs); - for (unsigned int i=0; i<this->rhs.size(); i++) - this->rhs[i].reserve(2*this->nparticles / this->nprocs); -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -distributed_particles<particle_type, rnumber, interp_neighbours>::~distributed_particles() -{ -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - std::array<double, 3> yy; - y.clear(); - for (auto &pp: x) - { - (*field)(pp.second.data, &yy.front()); - y[pp.first] = &yy.front(); - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<particle_type>> &y) -{ - std::unordered_map<int, single_particle_state<POINT3D>> yy; - switch(particle_type) - { - case VELOCITY_TRACER: - this->sample(this->vel, this->state, yy); - y.clear(); - for (auto &pp: x) - y[pp.first] = yy[pp.first].data; - break; - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - interpolator<rnumber, interp_neighbours> *field, - const char *dset_name) -{ - std::unordered_map<int, single_particle_state<POINT3D>> y; - this->sample(field, this->state, y); - this->write(dset_name, y); -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs() -{ - for (int i=this->integration_steps-2; i>=0; i--) - rhs[i+1] = rhs[i]; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals) -{ - TIMEZONE("distributed_particles::redistribute"); - //DEBUG_MSG("entered redistribute\n"); - /* neighbouring rank offsets */ - int ro[2]; - ro[0] = -1; - ro[1] = 1; - /* neighbouring ranks */ - int nr[2]; - nr[0] = MOD(this->myrank+ro[0], this->nprocs); - nr[1] = MOD(this->myrank+ro[1], this->nprocs); - /* particles to send, particles to receive */ - std::vector<int> ps[2], pr[2]; - /* number of particles to send, number of particles to receive */ - int nps[2], npr[2]; - int rsrc, rdst; - /* get list of id-s to send */ - for (auto &pp: x) - for (unsigned int i=0; i<2; i++) - if (this->vel->get_rank(pp.second.data[2]) == nr[i]) - ps[i].push_back(pp.first); - /* prepare data for send recv */ - for (unsigned int i=0; i<2; i++) - nps[i] = ps[i].size(); - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (unsigned int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc) - MPI_Send( - nps+i, - 1, - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm); - if (this->myrank == rdst) - MPI_Recv( - npr+1-i, - 1, - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm, - MPI_STATUS_IGNORE); - } - //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]); - //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]); - for (unsigned int i=0; i<2; i++) - pr[i].resize(npr[i]); - - int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1]; - buffer_size = (buffer_size > npr[0])? buffer_size : npr[0]; - buffer_size = (buffer_size > npr[1])? buffer_size : npr[1]; - //DEBUG_MSG("buffer size is %d\n", buffer_size); - double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())]; - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (unsigned int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc && nps[i] > 0) - { - MPI_Send( - &ps[i].front(), - nps[i], - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst), - this->comm); - int pcounter = 0; - for (int p: ps[i]) - { - std::copy(x[p].data, - x[p].data + state_dimension(particle_type), - buffer + pcounter*(1+vals.size())*state_dimension(particle_type)); - x.erase(p); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - std::copy(vals[tindex][p].data, - vals[tindex][p].data + state_dimension(particle_type), - buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type)); - vals[tindex].erase(p); - } - pcounter++; - } - MPI_Send( - buffer, - nps[i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rdst, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm); - } - if (this->myrank == rdst && npr[1-i] > 0) - { - MPI_Recv( - &pr[1-i].front(), - npr[1-i], - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst), - this->comm, - MPI_STATUS_IGNORE); - MPI_Recv( - buffer, - npr[1-i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rsrc, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm, - MPI_STATUS_IGNORE); - unsigned int pcounter = 0; - for (int p: pr[1-i]) - { - x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type); - } - pcounter++; - } - } - } - delete[] buffer; - - -#ifndef NDEBUG - /* check that all particles at x are local */ - for (auto &pp: x) - if (this->vel->get_rank(pp.second.data[2]) != this->myrank) - { - DEBUG_MSG("found particle %d with rank %d\n", - pp.first, - this->vel->get_rank(pp.second.data[2])); - assert(false); - } -#endif - //DEBUG_MSG("exiting redistribute\n"); -} - - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( - const int nsteps) -{ - this->get_rhs(this->state, this->rhs[0]); - for (auto &pp: this->state) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - switch(nsteps) - { - case 1: - pp.second[i] += this->dt*this->rhs[0][pp.first][i]; - break; - case 2: - pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] - - this->rhs[1][pp.first][i])/2; - break; - case 3: - pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] - - 16*this->rhs[1][pp.first][i] - + 5*this->rhs[2][pp.first][i])/12; - break; - case 4: - pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] - - 59*this->rhs[1][pp.first][i] - + 37*this->rhs[2][pp.first][i] - - 9*this->rhs[3][pp.first][i])/24; - break; - case 5: - pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] - - 2774*this->rhs[1][pp.first][i] - + 2616*this->rhs[2][pp.first][i] - - 1274*this->rhs[3][pp.first][i] - + 251*this->rhs[4][pp.first][i])/720; - break; - case 6: - pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] - - 7923*this->rhs[1][pp.first][i] - + 9982*this->rhs[2][pp.first][i] - - 7298*this->rhs[3][pp.first][i] - + 2877*this->rhs[4][pp.first][i] - - 475*this->rhs[5][pp.first][i])/1440; - break; - } - this->redistribute(this->state, this->rhs); - this->roll_rhs(); -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::step() -{ - TIMEZONE("distributed_particles::step"); - this->AdamsBashforth((this->iteration < this->integration_steps) ? - this->iteration+1 : - this->integration_steps); - this->iteration++; -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::read() -{ - double *temp = new double[this->chunk_size*state_dimension(particle_type)]; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //read state - if (this->myrank == 0) - this->read_state_chunk(cindex, temp); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - for (unsigned int p=0; p<this->chunk_size; p++) - { - if (this->vel->get_rank(temp[state_dimension(particle_type)*p+2]) == this->myrank) - this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - //read rhs - if (this->iteration > 0) - for (int i=0; i<this->integration_steps; i++) - { - if (this->myrank == 0) - this->read_rhs_chunk(cindex, i, temp); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->state.find(p+cindex*this->chunk_size); - if (pp != this->state.end()) - this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - } - } - DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size()); - delete[] temp; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - TIMEZONE("distributed_particles::write"); - double *data = new double[this->nparticles*3]; - double *yy = new double[this->nparticles*3]; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - std::fill_n(yy, this->chunk_size*3, 0); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = y.find(p+cindex*this->chunk_size); - if (pp != y.end()) - std::copy(pp->second.data, - pp->second.data + 3, - yy + pp->first*3); - } - MPI_Allreduce( - yy, - data, - 3*this->nparticles, - MPI_DOUBLE, - MPI_SUM, - this->comm); - if (this->myrank == 0) - this->write_point3D_chunk(dset_name, cindex, data); - } - delete[] yy; - delete[] data; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const bool write_rhs) -{ - TIMEZONE("distributed_particles::write2"); - double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; - double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //write state - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->state.find(p + cindex*this->chunk_size); - if (pp != this->state.end()) - std::copy(pp->second.data, - pp->second.data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - if (this->myrank == 0) - this->write_state_chunk(cindex, temp1); - //write rhs - if (write_rhs) - for (int i=0; i<this->integration_steps; i++) - { - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->rhs[i].find(p + cindex*this->chunk_size); - if (pp != this->rhs[i].end()) - std::copy(pp->second.data, - pp->second.data + state_dimension(particle_type), - temp0 + p*state_dimension(particle_type)); - } - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - if (this->myrank == 0) - this->write_rhs_chunk(cindex, i, temp1); - } - } - delete[] temp0; - delete[] temp1; -} - - -/*****************************************************************************/ -template class distributed_particles<VELOCITY_TRACER, float, 1>; -template class distributed_particles<VELOCITY_TRACER, float, 2>; -template class distributed_particles<VELOCITY_TRACER, float, 3>; -template class distributed_particles<VELOCITY_TRACER, float, 4>; -template class distributed_particles<VELOCITY_TRACER, float, 5>; -template class distributed_particles<VELOCITY_TRACER, float, 6>; -template class distributed_particles<VELOCITY_TRACER, double, 1>; -template class distributed_particles<VELOCITY_TRACER, double, 2>; -template class distributed_particles<VELOCITY_TRACER, double, 3>; -template class distributed_particles<VELOCITY_TRACER, double, 4>; -template class distributed_particles<VELOCITY_TRACER, double, 5>; -template class distributed_particles<VELOCITY_TRACER, double, 6>; -/*****************************************************************************/ diff --git a/bfps/cpp/distributed_particles.hpp b/bfps/cpp/distributed_particles.hpp deleted file mode 100644 index cf6e124a7744c049b6fcf0c84c1618a0a214c30e..0000000000000000000000000000000000000000 --- a/bfps/cpp/distributed_particles.hpp +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <unordered_map> -#include <vector> -#include <hdf5.h> -#include "base.hpp" -#include "particles_base.hpp" -#include "fluid_solver_base.hpp" -#include "interpolator.hpp" - -#ifndef DISTRIBUTED_PARTICLES - -#define DISTRIBUTED_PARTICLES - -template <particle_types particle_type, class rnumber, int interp_neighbours> -class distributed_particles: public particles_io_base<particle_type> -{ - private: - std::unordered_map<int, single_particle_state<particle_type> > state; - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs; - - public: - int integration_steps; - // this class only works with buffered interpolator - interpolator<rnumber, interp_neighbours> *vel; - - /* simulation parameters */ - double dt; - - /* methods */ - - /* constructor and destructor. - * allocate and deallocate: - * this->state - * this->rhs - * */ - distributed_particles( - const char *NAME, - const hid_t data_file_id, - interpolator<rnumber, interp_neighbours> *FIELD, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS = 2); - ~distributed_particles(); - - void sample( - interpolator<rnumber, interp_neighbours> *field, - const char *dset_name); - void sample( - interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, single_particle_state<particle_type>> &y); - - void redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals); - - - /* input/output */ - void read(); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<particle_type>> &y); - void write(const bool write_rhs = true); - - /* solvers */ - void step(); - void roll_rhs(); - void AdamsBashforth(const int nsteps); -}; - -#endif//DISTRIBUTED_PARTICLES - diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp deleted file mode 100644 index 495ec9fa3712153df4d31faf7dfb3046637b5483..0000000000000000000000000000000000000000 --- a/bfps/cpp/fftw_interface.hpp +++ /dev/null @@ -1,173 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - -#ifndef FFTW_INTERFACE_HPP -#define FFTW_INTERFACE_HPP - -#include <fftw3-mpi.h> - -#ifdef USE_FFTWESTIMATE -#define DEFAULT_FFTW_FLAG FFTW_ESTIMATE -#warning You are using FFTW estimate -#else -#define DEFAULT_FFTW_FLAG FFTW_PATIENT -#endif - -template <class realtype> -class fftw_interface; - -template <> -class fftw_interface<float> -{ -public: - using real = float; - using complex = fftwf_complex; - using plan = fftwf_plan; - using iodim = fftwf_iodim; - - static complex* alloc_complex(const size_t in_size){ - return fftwf_alloc_complex(in_size); - } - - static real* alloc_real(const size_t in_size){ - return fftwf_alloc_real(in_size); - } - - static void free(void* ptr){ - fftwf_free(ptr); - } - - static void execute(plan in_plan){ - fftwf_execute(in_plan); - } - - static void destroy_plan(plan in_plan){ - fftwf_destroy_plan(in_plan); - } - - template <class ... Params> - static plan mpi_plan_transpose(Params ... params){ - return fftwf_mpi_plan_transpose(params...); - } - - template <class ... Params> - static plan mpi_plan_many_transpose(Params ... params){ - return fftwf_mpi_plan_many_transpose(params...); - } - - template <class ... Params> - static plan plan_guru_r2r(Params ... params){ - return fftwf_plan_guru_r2r(params...); - } - - template <class ... Params> - static plan plan_guru_dft(Params ... params){ - return fftwf_plan_guru_dft(params...); - } - - template <class ... Params> - static plan mpi_plan_many_dft_c2r(Params ... params){ - return fftwf_mpi_plan_many_dft_c2r(params...); - } - - template <class ... Params> - static plan mpi_plan_many_dft_r2c(Params ... params){ - return fftwf_mpi_plan_many_dft_r2c(params...); - } - - template <class ... Params> - static plan mpi_plan_dft_c2r_3d(Params ... params){ - return fftwf_mpi_plan_dft_c2r_3d(params...); - } -}; - -template <> -class fftw_interface<double> -{ -public: - using real = double; - using complex = fftw_complex; - using plan = fftw_plan; - using iodim = fftw_iodim; - - static complex* alloc_complex(const size_t in_size){ - return fftw_alloc_complex(in_size); - } - - static real* alloc_real(const size_t in_size){ - return fftw_alloc_real(in_size); - } - - static void free(void* ptr){ - fftw_free(ptr); - } - - static void execute(plan in_plan){ - fftw_execute(in_plan); - } - - static void destroy_plan(plan in_plan){ - fftw_destroy_plan(in_plan); - } - - template <class ... Params> - static plan mpi_plan_transpose(Params ... params){ - return fftw_mpi_plan_transpose(params...); - } - - template <class ... Params> - static plan mpi_plan_many_transpose(Params ... params){ - return fftw_mpi_plan_many_transpose(params...); - } - - template <class ... Params> - static plan plan_guru_r2r(Params ... params){ - return fftw_plan_guru_r2r(params...); - } - - template <class ... Params> - static plan plan_guru_dft(Params ... params){ - return fftw_plan_guru_dft(params...); - } - - template <class ... Params> - static plan mpi_plan_many_dft_c2r(Params ... params){ - return fftw_mpi_plan_many_dft_c2r(params...); - } - - template <class ... Params> - static plan mpi_plan_many_dft_r2c(Params ... params){ - return fftw_mpi_plan_many_dft_r2c(params...); - } - - template <class ... Params> - static plan mpi_plan_dft_c2r_3d(Params ... params){ - return fftw_mpi_plan_dft_c2r_3d(params...); - } -}; - - - -#endif // FFTW_INTERFACE_HPP - diff --git a/bfps/cpp/fftw_tools.cpp b/bfps/cpp/fftw_tools.cpp deleted file mode 100644 index 61e03d292f81aed1fa4b2dfcab880fb7105b676e..0000000000000000000000000000000000000000 --- a/bfps/cpp/fftw_tools.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - -#include <stdlib.h> -#include <algorithm> -#include <iostream> -#include "base.hpp" -#include "fftw_tools.hpp" -#include "fftw_interface.hpp" - -#define NDEBUG - -template <class rnumber> -int clip_zero_padding( - field_descriptor<rnumber> *f, - rnumber *a, - int howmany) -{ - if (f->ndims < 3) - return EXIT_FAILURE; - rnumber *b = a; - ptrdiff_t copy_size = f->sizes[2] * howmany; - ptrdiff_t skip_size = copy_size + 2*howmany; - for (int i0 = 0; i0 < f->subsizes[0]; i0++) - for (int i1 = 0; i1 < f->sizes[1]; i1++) - { - std::copy(a, a + copy_size, b); - a += skip_size; - b += copy_size; - } - return EXIT_SUCCESS; -} - -template -int clip_zero_padding<float>( - field_descriptor<float> *f, - float *a, - int howmany); - -template -int clip_zero_padding<double>( - field_descriptor<double> *f, - double *a, - int howmany); - - - -template <class rnumber> -int copy_complex_array( - field_descriptor<rnumber> *fi, - rnumber (*ai)[2], -field_descriptor<rnumber> *fo, -rnumber (*ao)[2], -int howmany) -{ - DEBUG_MSG("entered copy_complex_array\n"); - typename fftw_interface<rnumber>::complex *buffer; - buffer = fftw_interface<rnumber>::alloc_complex(fi->slice_size*howmany); - - int min_fast_dim; - min_fast_dim = - (fi->sizes[2] > fo->sizes[2]) ? - fo->sizes[2] : fi->sizes[2]; - - /* clean up destination, in case we're padding with zeros - (even if only for one dimension) */ - std::fill_n((rnumber*)ao, fo->local_size*2, 0.0); - - int64_t ii0, ii1; - int64_t oi0, oi1; - int64_t delta1, delta0; - int irank, orank; - delta0 = (fo->sizes[0] - fi->sizes[0]); - delta1 = (fo->sizes[1] - fi->sizes[1]); - for (ii0=0; ii0 < fi->sizes[0]; ii0++) - { - if (ii0 <= fi->sizes[0]/2) - { - oi0 = ii0; - if (oi0 > fo->sizes[0]/2) - continue; - } - else - { - oi0 = ii0 + delta0; - if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) - continue; - } - irank = fi->rank[ii0]; - orank = fo->rank[oi0]; - if ((irank == orank) && - (irank == fi->myrank)) - { - std::copy( - (rnumber*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), - (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), - (rnumber*)buffer); - } - else - { - if (fi->myrank == irank) - { - MPI_Send( - (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), - fi->slice_size, - mpi_real_type<rnumber>::complex(), - orank, - ii0, - fi->comm); - } - if (fi->myrank == orank) - { - MPI_Recv( - (void*)(buffer), - fi->slice_size, - mpi_real_type<rnumber>::complex(), - irank, - ii0, - fi->comm, - MPI_STATUS_IGNORE); - } - } - if (fi->myrank == orank) - { - for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) - { - if (ii1 <= fi->sizes[1]/2) - { - oi1 = ii1; - if (oi1 > fo->sizes[1]/2) - continue; - } - else - { - oi1 = ii1 + delta1; - if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) - continue; - } - std::copy( - (rnumber*)(buffer + (ii1*fi->sizes[2]*howmany)), - (rnumber*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), - (rnumber*)(ao + - ((oi0 - fo->starts[0])*fo->sizes[1] + - oi1)*fo->sizes[2]*howmany)); - } - } - } - fftw_interface<rnumber>::free(buffer); - MPI_Barrier(fi->comm); - - DEBUG_MSG("exiting copy_complex_array\n"); - return EXIT_SUCCESS; -} - -template -int copy_complex_array<float>( - field_descriptor<float> *fi, - float (*ai)[2], - field_descriptor<float> *fo, - float (*ao)[2], - int howmany); - -template -int copy_complex_array<double>( - field_descriptor<double> *fi, - double (*ai)[2], - field_descriptor<double> *fo, - double (*ao)[2], - int howmany); - - -template <class rnumber> -int get_descriptors_3D( - int n0, int n1, int n2, - field_descriptor<rnumber> **fr, - field_descriptor<rnumber> **fc) -{ - int ntmp[3]; - ntmp[0] = n0; - ntmp[1] = n1; - ntmp[2] = n2; - *fr = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); - ntmp[0] = n0; - ntmp[1] = n1; - ntmp[2] = n2/2+1; - *fc = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::complex(), MPI_COMM_WORLD); - return EXIT_SUCCESS; -} - -template -int get_descriptors_3D<float>( - int n0, int n1, int n2, - field_descriptor<float> **fr, - field_descriptor<float> **fc); - -template -int get_descriptors_3D<double>( - int n0, int n1, int n2, - field_descriptor<double> **fr, - field_descriptor<double> **fc); - diff --git a/bfps/cpp/field_descriptor.cpp b/bfps/cpp/field_descriptor.cpp deleted file mode 100644 index 20c634262dbb45ad4c2bb5a1b5640b6df23d4d2c..0000000000000000000000000000000000000000 --- a/bfps/cpp/field_descriptor.cpp +++ /dev/null @@ -1,543 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <stdlib.h> -#include <algorithm> -#include <iostream> -#include "base.hpp" -#include "field_descriptor.hpp" -#include "fftw_interface.hpp" -#include "scope_timer.hpp" - -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ - - -template <class rnumber> -field_descriptor<rnumber>::field_descriptor( - int ndims, - int *n, - MPI_Datatype element_type, - MPI_Comm COMM_TO_USE) -{ - TIMEZONE("field_descriptor"); - DEBUG_MSG("entered field_descriptor::field_descriptor\n"); - this->comm = COMM_TO_USE; - MPI_Comm_rank(this->comm, &this->myrank); - MPI_Comm_size(this->comm, &this->nprocs); - this->ndims = ndims; - this->sizes = new int[ndims]; - this->subsizes = new int[ndims]; - this->starts = new int[ndims]; - int tsizes [ndims]; - int tsubsizes[ndims]; - int tstarts [ndims]; - std::vector<ptrdiff_t> nfftw; - nfftw.resize(ndims); - ptrdiff_t local_n0, local_0_start; - for (int i = 0; i < this->ndims; i++) - nfftw[i] = n[i]; - this->local_size = fftw_mpi_local_size_many( - this->ndims, - &nfftw.front(), - 1, - FFTW_MPI_DEFAULT_BLOCK, - this->comm, - &local_n0, - &local_0_start); - this->sizes[0] = n[0]; - this->subsizes[0] = (int)local_n0; - this->starts[0] = (int)local_0_start; - DEBUG_MSG_WAIT( - this->comm, - "first subsizes[0] = %d %d %d\n", - this->subsizes[0], - tsubsizes[0], - (int)local_n0); - tsizes[0] = n[0]; - tsubsizes[0] = (int)local_n0; - tstarts[0] = (int)local_0_start; - DEBUG_MSG_WAIT( - this->comm, - "second subsizes[0] = %d %d %d\n", - this->subsizes[0], - tsubsizes[0], - (int)local_n0); - this->mpi_dtype = element_type; - this->slice_size = 1; - this->full_size = this->sizes[0]; - for (int i = 1; i < this->ndims; i++) - { - this->sizes[i] = n[i]; - this->subsizes[i] = n[i]; - this->starts[i] = 0; - this->slice_size *= this->subsizes[i]; - this->full_size *= this->sizes[i]; - tsizes[i] = this->sizes[i]; - tsubsizes[i] = this->subsizes[i]; - tstarts[i] = this->starts[i]; - } - tsizes[ndims-1] *= sizeof(rnumber); - tsubsizes[ndims-1] *= sizeof(rnumber); - tstarts[ndims-1] *= sizeof(rnumber); - if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) - { - tsizes[ndims-1] *= 2; - tsubsizes[ndims-1] *= 2; - tstarts[ndims-1] *= 2; - } - int local_zero_array[this->nprocs], zero_array[this->nprocs]; - for (int i=0; i<this->nprocs; i++) - local_zero_array[i] = 0; - local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0; - MPI_Allreduce( - local_zero_array, - zero_array, - this->nprocs, - MPI_INT, - MPI_SUM, - this->comm); - int no_of_excluded_ranks = 0; - for (int i = 0; i<this->nprocs; i++) - no_of_excluded_ranks += zero_array[i]; - DEBUG_MSG_WAIT( - this->comm, - "subsizes[0] = %d %d\n", - this->subsizes[0], - tsubsizes[0]); - if (no_of_excluded_ranks == 0) - { - this->io_comm = this->comm; - this->io_nprocs = this->nprocs; - this->io_myrank = this->myrank; - } - else - { - int excluded_rank[no_of_excluded_ranks]; - for (int i=0, j=0; i<this->nprocs; i++) - if (zero_array[i]) - { - excluded_rank[j] = i; - j++; - } - MPI_Group tgroup0, tgroup; - MPI_Comm_group(this->comm, &tgroup0); - MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup); - MPI_Comm_create(this->comm, tgroup, &this->io_comm); - MPI_Group_free(&tgroup0); - MPI_Group_free(&tgroup); - if (this->subsizes[0] > 0) - { - MPI_Comm_rank(this->io_comm, &this->io_myrank); - MPI_Comm_size(this->io_comm, &this->io_nprocs); - } - else - { - this->io_myrank = MPI_PROC_NULL; - this->io_nprocs = -1; - } - } - DEBUG_MSG_WAIT( - this->comm, - "inside field_descriptor constructor, about to call " - "MPI_Type_create_subarray " - "%d %d %d\n", - this->sizes[0], - this->subsizes[0], - this->starts[0]); - for (int i=0; i<this->ndims; i++) - DEBUG_MSG_WAIT( - this->comm, - "tsizes " - "%d %d %d\n", - tsizes[i], - tsubsizes[i], - tstarts[i]); - if (this->subsizes[0] > 0) - { - DEBUG_MSG("creating subarray\n"); - MPI_Type_create_subarray( - ndims, - tsizes, - tsubsizes, - tstarts, - MPI_ORDER_C, - MPI_UNSIGNED_CHAR, - &this->mpi_array_dtype); - MPI_Type_commit(&this->mpi_array_dtype); - } - this->rank = new int[this->sizes[0]]; - int *local_rank = new int[this->sizes[0]]; - std::fill_n(local_rank, this->sizes[0], 0); - for (int i = 0; i < this->sizes[0]; i++) - if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0]) - local_rank[i] = this->myrank; - MPI_Allreduce( - local_rank, - this->rank, - this->sizes[0], - MPI_INT, - MPI_SUM, - this->comm); - delete[] local_rank; - this->all_start0 = new int[this->nprocs]; - int *local_start0 = new int[this->nprocs]; - std::fill_n(local_start0, this->nprocs, 0); - for (int i = 0; i < this->nprocs; i++) - if (this->myrank == i) - local_start0[i] = this->starts[0]; - MPI_Allreduce( - local_start0, - this->all_start0, - this->nprocs, - MPI_INT, - MPI_SUM, - this->comm); - delete[] local_start0; - this->all_size0 = new int[this->nprocs]; - int *local_size0 = new int[this->nprocs]; - std::fill_n(local_size0, this->nprocs, 0); - for (int i = 0; i < this->nprocs; i++) - if (this->myrank == i) - local_size0[i] = this->subsizes[0]; - MPI_Allreduce( - local_size0, - this->all_size0, - this->nprocs, - MPI_INT, - MPI_SUM, - this->comm); - delete[] local_size0; - DEBUG_MSG("exiting field_descriptor constructor\n"); -} - -template <class rnumber> -int field_descriptor<rnumber>::read( - const char *fname, - void *buffer) -{ - TIMEZONE("field_descriptor::read"); - DEBUG_MSG("entered field_descriptor::read\n"); - char representation[] = "native"; - if (this->subsizes[0] > 0) - { - MPI_Info info; - MPI_Info_create(&info); - MPI_File f; - ptrdiff_t read_size = this->local_size*sizeof(rnumber); - DEBUG_MSG("read size is %ld\n", read_size); - char ffname[200]; - if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) - read_size *= 2; - DEBUG_MSG("read size is %ld\n", read_size); - sprintf(ffname, "%s", fname); - - MPI_File_open( - this->io_comm, - ffname, - MPI_MODE_RDONLY, - info, - &f); - DEBUG_MSG("opened file\n"); - MPI_File_set_view( - f, - 0, - MPI_UNSIGNED_CHAR, - this->mpi_array_dtype, - representation, - info); - DEBUG_MSG("view is set\n"); - MPI_File_read_all( - f, - buffer, - read_size, - MPI_UNSIGNED_CHAR, - MPI_STATUS_IGNORE); - DEBUG_MSG("info is read\n"); - MPI_File_close(&f); - } - DEBUG_MSG("finished with field_descriptor::read\n"); - return EXIT_SUCCESS; -} - -template <class rnumber> -int field_descriptor<rnumber>::write( - const char *fname, - void *buffer) -{ - TIMEZONE("field_descriptor::write"); - char representation[] = "native"; - if (this->subsizes[0] > 0) - { - MPI_Info info; - MPI_Info_create(&info); - MPI_File f; - ptrdiff_t read_size = this->local_size*sizeof(rnumber); - char ffname[200]; - if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) - read_size *= 2; - sprintf(ffname, "%s", fname); - - MPI_File_open( - this->io_comm, - ffname, - MPI_MODE_CREATE | MPI_MODE_WRONLY, - info, - &f); - MPI_File_set_view( - f, - 0, - MPI_UNSIGNED_CHAR, - this->mpi_array_dtype, - representation, - info); - MPI_File_write_all( - f, - buffer, - read_size, - MPI_UNSIGNED_CHAR, - MPI_STATUS_IGNORE); - MPI_File_close(&f); - } - - return EXIT_SUCCESS; -} - -template <class rnumber> -int field_descriptor<rnumber>::transpose( - rnumber *input, - rnumber *output) -{ - TIMEZONE("field_descriptor::transpose"); - /* IMPORTANT NOTE: - for 3D transposition, the input data is messed up */ - typename fftw_interface<rnumber>::plan tplan; - if (this->ndims == 3) - { - /* transpose the two local dimensions 1 and 2 */ - rnumber *atmp; - atmp = fftw_interface<rnumber>::alloc_real(this->slice_size); - for (int k = 0; k < this->subsizes[0]; k++) - { - /* put transposed slice in atmp */ - for (int j = 0; j < this->sizes[1]; j++) - for (int i = 0; i < this->sizes[2]; i++) - atmp[i*this->sizes[1] + j] = - input[(k*this->sizes[1] + j)*this->sizes[2] + i]; - /* copy back transposed slice */ - std::copy( - atmp, - atmp + this->slice_size, - input + k*this->slice_size); - } - fftw_interface<rnumber>::free(atmp); - } - tplan = fftw_interface<rnumber>::mpi_plan_transpose( - this->sizes[0], this->slice_size, - input, output, - this->comm, - DEFAULT_FFTW_FLAG); - fftw_interface<rnumber>::execute(tplan); - fftw_interface<rnumber>::destroy_plan(tplan); - return EXIT_SUCCESS; -} - -template <class rnumber> -int field_descriptor<rnumber>::transpose( - typename fftw_interface<rnumber>::complex *input, - typename fftw_interface<rnumber>::complex *output) -{ - TIMEZONE("field_descriptor::transpose2"); - switch (this->ndims) - { - case 2: - /* do a global transpose over the 2 dimensions */ - if (output == NULL) - { - std::cerr << "bad arguments for transpose.\n" << std::endl; - return EXIT_FAILURE; - } - typename fftw_interface<rnumber>::plan tplan; - tplan = fftw_interface<rnumber>::mpi_plan_many_transpose( - this->sizes[0], this->sizes[1], 2, - FFTW_MPI_DEFAULT_BLOCK, - FFTW_MPI_DEFAULT_BLOCK, - (rnumber*)input, (rnumber*)output, - this->comm, - DEFAULT_FFTW_FLAG); - fftw_interface<rnumber>::execute(tplan); - fftw_interface<rnumber>::destroy_plan(tplan); - break; - case 3: - /* transpose the two local dimensions 1 and 2 */ - typename fftw_interface<rnumber>::complex *atmp; - atmp = fftw_interface<rnumber>::alloc_complex(this->slice_size); - for (int k = 0; k < this->subsizes[0]; k++) - { - /* put transposed slice in atmp */ - for (int j = 0; j < this->sizes[1]; j++) - for (int i = 0; i < this->sizes[2]; i++) - { - atmp[i*this->sizes[1] + j][0] = - input[(k*this->sizes[1] + j)*this->sizes[2] + i][0]; - atmp[i*this->sizes[1] + j][1] = - input[(k*this->sizes[1] + j)*this->sizes[2] + i][1]; - } - /* copy back transposed slice */ - std::copy( - (rnumber*)(atmp), - (rnumber*)(atmp + this->slice_size), - (rnumber*)(input + k*this->slice_size)); - } - fftw_interface<rnumber>::free(atmp); - break; - default: - return EXIT_FAILURE; - break; - } - return EXIT_SUCCESS; -} - -template <class rnumber> -int field_descriptor<rnumber>::interleave( - rnumber *a, - int dim) -{ - TIMEZONE("field_descriptor::interleav"); - /* the following is copied from - * http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html - * */ - typename fftw_interface<rnumber>::iodim howmany_dims[2]; - howmany_dims[0].n = dim; - howmany_dims[0].is = this->local_size; - howmany_dims[0].os = 1; - howmany_dims[1].n = this->local_size; - howmany_dims[1].is = 1; - howmany_dims[1].os = dim; - const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); - - typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_r2r( - /*rank*/0, - /*dims*/nullptr, - howmany_rank, - howmany_dims, - a, - a, - /*kind*/nullptr, - DEFAULT_FFTW_FLAG); - fftw_interface<rnumber>::execute(tmp); - fftw_interface<rnumber>::destroy_plan(tmp); - return EXIT_SUCCESS; -} - -template <class rnumber> -int field_descriptor<rnumber>::interleave( - typename fftw_interface<rnumber>::complex *a, - int dim) -{ - TIMEZONE("field_descriptor::interleave2"); - typename fftw_interface<rnumber>::iodim howmany_dims[2]; - howmany_dims[0].n = dim; - howmany_dims[0].is = this->local_size; - howmany_dims[0].os = 1; - howmany_dims[1].n = this->local_size; - howmany_dims[1].is = 1; - howmany_dims[1].os = dim; - const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); - - typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_dft( - /*rank*/0, - /*dims*/nullptr, - howmany_rank, - howmany_dims, - a, - a, - +1, - DEFAULT_FFTW_FLAG); - fftw_interface<rnumber>::execute(tmp); - fftw_interface<rnumber>::destroy_plan(tmp); - return EXIT_SUCCESS; -} - -template <class rnumber> -field_descriptor<rnumber>* field_descriptor<rnumber>::get_transpose() -{ - TIMEZONE("field_descriptor::get_transpose"); - int n[this->ndims]; - for (int i=0; i<this->ndims; i++) - n[i] = this->sizes[this->ndims - i - 1]; - return new field_descriptor<rnumber>(this->ndims, n, this->mpi_dtype, this->comm); -} - -/*****************************************************************************/ -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* destructor looks the same for both float and double */ -template <class rnumber> -field_descriptor<rnumber>::~field_descriptor() -{ - DEBUG_MSG_WAIT( - MPI_COMM_WORLD, - this->io_comm == MPI_COMM_NULL ? "null\n" : "not null\n"); - DEBUG_MSG_WAIT( - MPI_COMM_WORLD, - "subsizes[0] = %d \n", this->subsizes[0]); - if (this->subsizes[0] > 0) - { - DEBUG_MSG_WAIT( - this->io_comm, - "deallocating mpi_array_dtype\n"); - MPI_Type_free(&this->mpi_array_dtype); - } - if (this->nprocs != this->io_nprocs && this->io_myrank != MPI_PROC_NULL) - { - DEBUG_MSG_WAIT( - this->io_comm, - "freeing io_comm\n"); - MPI_Comm_free(&this->io_comm); - } - delete[] this->sizes; - delete[] this->subsizes; - delete[] this->starts; - delete[] this->rank; - delete[] this->all_start0; - delete[] this->all_size0; -} -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* finally, force generation of code */ -template class field_descriptor<float>; -template class field_descriptor<double>; -/*****************************************************************************/ - diff --git a/bfps/cpp/field_descriptor.hpp b/bfps/cpp/field_descriptor.hpp deleted file mode 100644 index 2fb491bca7c130704fc5de5d22c3393cb196eec7..0000000000000000000000000000000000000000 --- a/bfps/cpp/field_descriptor.hpp +++ /dev/null @@ -1,114 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <mpi.h> -#include <fftw3-mpi.h> -#include "fftw_interface.hpp" - -#ifndef FIELD_DESCRIPTOR - -#define FIELD_DESCRIPTOR - -extern int myrank, nprocs; - -template <class rnumber> -class field_descriptor -{ - private: - typedef rnumber cnumber[2]; - public: - - /* data */ - int *sizes; - int *subsizes; - int *starts; - int ndims; - int *rank; - int *all_start0; - int *all_size0; - ptrdiff_t slice_size, local_size, full_size; - MPI_Datatype mpi_array_dtype, mpi_dtype; - int myrank, nprocs, io_myrank, io_nprocs; - MPI_Comm comm, io_comm; - - - /* methods */ - field_descriptor( - int ndims, - int *n, - MPI_Datatype element_type, - MPI_Comm COMM_TO_USE); - ~field_descriptor(); - - /* io is performed using MPI_File stuff, and our - * own mpi_array_dtype that was defined in the constructor. - * */ - int read( - const char *fname, - void *buffer); - int write( - const char *fname, - void *buffer); - - /* a function that generates the transposed descriptor. - * don't forget to delete the result once you're done with it. - * the transposed descriptor is useful for io operations. - * */ - field_descriptor<rnumber> *get_transpose(); - - /* we don't actually need the transposed descriptor to perform - * the transpose operation: we only need the in/out fields. - * */ - int transpose( - rnumber *input, - rnumber *output); - int transpose( - typename fftw_interface<rnumber>::complex *input, - typename fftw_interface<rnumber>::complex *output = NULL); - - int interleave( - rnumber *input, - int dim); - int interleave( - typename fftw_interface<rnumber>::complex *input, - int dim); -}; - - -inline float btle(const float be) - { - float le; - char *befloat = (char *) & be; - char *lefloat = (char *) & le; - lefloat[0] = befloat[3]; - lefloat[1] = befloat[2]; - lefloat[2] = befloat[1]; - lefloat[3] = befloat[0]; - return le; - } - -#endif//FIELD_DESCRIPTOR - diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp deleted file mode 100644 index 319186103797f8135d4d3e2244ed5e3a8f271b00..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver.cpp +++ /dev/null @@ -1,1057 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -//#define NDEBUG - -#include <cassert> -#include <cmath> -#include <cstring> -#include "fluid_solver.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" -#include "shared_array.hpp" - - -template <class rnumber> -void fluid_solver<rnumber>::impose_zero_modes() -{ - if (this->cd->myrank == this->cd->rank[0]) - { - std::fill_n((rnumber*)(this->cu), 6, 0.0); - std::fill_n((rnumber*)(this->cv[0]), 6, 0.0); - std::fill_n((rnumber*)(this->cv[1]), 6, 0.0); - std::fill_n((rnumber*)(this->cv[2]), 6, 0.0); - } -} -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ - -template <class rnumber> -fluid_solver<rnumber>::fluid_solver( - const char *NAME, - int nx, - int ny, - int nz, - double DKX, - double DKY, - double DKZ, - int DEALIAS_TYPE, - unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<rnumber>( - NAME, - nx , ny , nz, - DKX, DKY, DKZ, - DEALIAS_TYPE, - FFTW_PLAN_RIGOR) -{ - TIMEZONE("fluid_solver::fluid_solver"); - this->cvorticity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); - this->cvelocity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); - this->rvorticity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); - /*this->rvelocity = (rnumber*)(this->cvelocity);*/ - this->rvelocity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); - - this->ru = this->rvelocity; - this->cu = this->cvelocity; - - this->rv[0] = this->rvorticity; - this->rv[3] = this->rvorticity; - this->cv[0] = this->cvorticity; - this->cv[3] = this->cvorticity; - - this->cv[1] = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); - this->cv[2] = this->cv[1]; - this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); - this->rv[2] = this->rv[1]; - - this->c2r_vorticity = new typename fftw_interface<rnumber>::plan; - this->r2c_vorticity = new typename fftw_interface<rnumber>::plan; - this->c2r_velocity = new typename fftw_interface<rnumber>::plan; - this->r2c_velocity = new typename fftw_interface<rnumber>::plan; - - ptrdiff_t sizes[] = {nz, - ny, - nx}; - - *this->c2r_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cvorticity, this->rvorticity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->r2c_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rvorticity, this->cvorticity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - *this->c2r_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cvelocity, this->rvelocity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->r2c_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rvelocity, this->cvelocity, - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - this->uc2r = this->c2r_velocity; - this->ur2c = this->r2c_velocity; - this->vc2r[0] = this->c2r_vorticity; - this->vr2c[0] = this->r2c_vorticity; - - this->vc2r[1] = new typename fftw_interface<rnumber>::plan; - this->vr2c[1] = new typename fftw_interface<rnumber>::plan; - this->vc2r[2] = new typename fftw_interface<rnumber>::plan; - this->vr2c[2] = new typename fftw_interface<rnumber>::plan; - - *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cv[1], this->rv[1], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->vc2r[2] = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->cv[2], this->rv[2], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - - *this->vr2c[1] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rv[1], this->cv[1], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - *this->vr2c[2] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, - this->rv[2], this->cv[2], - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); - - /* ``physical'' parameters etc, initialized here just in case */ - - this->nu = 0.1; - this->fmode = 1; - this->famplitude = 1.0; - this->fk0 = 0; - this->fk1 = 3.0; - /* initialization of fields must be done AFTER planning */ - std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); - std::fill_n((rnumber*)this->cvelocity, this->cd->local_size*2, 0.0); - std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0); - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); - std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); - std::fill_n(this->rv[1], this->cd->local_size*2, 0.0); - std::fill_n(this->rv[2], this->cd->local_size*2, 0.0); -} - -template <class rnumber> -fluid_solver<rnumber>::~fluid_solver() -{ - fftw_interface<rnumber>::destroy_plan(*this->c2r_vorticity); - fftw_interface<rnumber>::destroy_plan(*this->r2c_vorticity); - fftw_interface<rnumber>::destroy_plan(*this->c2r_velocity ); - fftw_interface<rnumber>::destroy_plan(*this->r2c_velocity ); - fftw_interface<rnumber>::destroy_plan(*this->vc2r[1]); - fftw_interface<rnumber>::destroy_plan(*this->vr2c[1]); - fftw_interface<rnumber>::destroy_plan(*this->vc2r[2]); - fftw_interface<rnumber>::destroy_plan(*this->vr2c[2]); - - delete this->c2r_vorticity; - delete this->r2c_vorticity; - delete this->c2r_velocity ; - delete this->r2c_velocity ; - delete this->vc2r[1]; - delete this->vr2c[1]; - delete this->vc2r[2]; - delete this->vr2c[2]; - - fftw_interface<rnumber>::free(this->cv[1]); - fftw_interface<rnumber>::free(this->rv[1]); - fftw_interface<rnumber>::free(this->cvorticity); - fftw_interface<rnumber>::free(this->rvorticity); - fftw_interface<rnumber>::free(this->cvelocity); - fftw_interface<rnumber>::free(this->rvelocity); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_vorticity() -{ - TIMEZONE("fluid_solver::compute_vorticity"); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - // cindex indexing is thread safe (and tindex too) + it is a write - ptrdiff_t tindex = 3*cindex; - if (k2 <= this->kM2) - { - this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); - this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); - this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); - this->cvorticity[tindex+0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); - this->cvorticity[tindex+1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); - this->cvorticity[tindex+2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); - } - else{ - std::fill_n((rnumber*)(this->cvorticity+tindex), 6, 0.0); - } - } - ); - this->symmetrize(this->cvorticity, 3); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_velocity(rnumber (*__restrict__ vorticity)[2]) -{ - TIMEZONE("fluid_solver::compute_velocity"); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - // cindex indexing is thread safe (and tindex too) + it is a write - ptrdiff_t tindex = 3*cindex; - if (k2 <= this->kM2 && k2 > 0) - { - this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2; - this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2; - this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2; - this->cu[tindex+0][1] = (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2; - this->cu[tindex+1][1] = (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2; - this->cu[tindex+2][1] = (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2; - } - else - std::fill_n((rnumber*)(this->cu+tindex), 6, 0.0); - } - ); - /*this->symmetrize(this->cu, 3);*/ -} - -template <class rnumber> -void fluid_solver<rnumber>::ift_velocity() -{ - TIMEZONE("fluid_solver::ift_velocity"); - fftw_interface<rnumber>::execute(*(this->c2r_velocity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::ift_vorticity() -{ - TIMEZONE("fluid_solver::ift_vorticity"); - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); - fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::dft_velocity() -{ - TIMEZONE("fluid_solver::dft_velocity"); - fftw_interface<rnumber>::execute(*(this->r2c_velocity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::dft_vorticity() -{ - TIMEZONE("fluid_solver::dft_vorticity"); - std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); - fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); -} - -template <class rnumber> -void fluid_solver<rnumber>::add_forcing( - rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor) -{ - TIMEZONE("fluid_solver::add_forcing"); - if (strcmp(this->forcing_type, "none") == 0) - return; - if (strcmp(this->forcing_type, "Kolmogorov") == 0) - { - ptrdiff_t cindex; - if (this->cd->myrank == this->cd->rank[this->fmode]) - { - cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; - acc_field[cindex+2][0] -= this->famplitude*factor/2; - } - if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode]) - { - cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; - acc_field[cindex+2][0] -= this->famplitude*factor/2; - } - return; - } - if (strcmp(this->forcing_type, "linear") == 0) - { - CLOOP( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ - // cindex indexing is thread safe (and cindex*3+c too) - double knorm = sqrt(this->kx[xindex]*this->kx[xindex] + - this->ky[yindex]*this->ky[yindex] + - this->kz[zindex]*this->kz[zindex]); - if ((this->fk0 <= knorm) && (this->fk1 >= knorm)) - for (int c=0; c<3; c++) - for (int i=0; i<2; i++) - acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor; - } - ); - return; - } -} - -template <class rnumber> -void fluid_solver<rnumber>::omega_nonlin( - int src) -{ - TIMEZONE("fluid_solver::omega_nonlin"); - assert(src >= 0 && src < 3); - this->compute_velocity(this->cv[src]); - /* get fields from Fourier space to real space */ - { - TIMEZONE("fluid_solver::omega_nonlin::fftw"); - fftw_interface<rnumber>::execute(*(this->c2r_velocity )); - fftw_interface<rnumber>::execute(*(this->vc2r[src])); - } - /* compute cross product $u \times \omega$, and normalize */ - { - TIMEZONE("fluid_solver::omega_nonlin::RLOOP"); - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - ptrdiff_t tindex = 3*rindex; - rnumber tmp[3][2]; - for (int cc=0; cc<3; cc++) - tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] - - this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]); - // Access to rindex is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) - this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor; - } - ); - } - /* go back to Fourier space */ - this->clean_up_real_space(this->ru, 3); - { - TIMEZONE("fluid_solver::omega_nonlin::fftw-2"); - fftw_interface<rnumber>::execute(*(this->r2c_velocity )); - } - this->dealias(this->cu, 3); - /* $\imath k \times Fourier(u \times \omega)$ */ - { - TIMEZONE("fluid_solver::omega_nonlin::CLOOP"); - CLOOP( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ - rnumber tmp[3][2]; - ptrdiff_t tindex = 3*cindex; - { - tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); - tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); - tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); - tmp[0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); - tmp[1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); - tmp[2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); - } - // cindex indexing is thread safe so it is 3*cindex so there is no overlap between threads - for (int cc=0; cc<3; cc++) - for (int i=0; i<2; i++) - this->cu[tindex+cc][i] = tmp[cc][i]; - } - ); - } - { - TIMEZONE("fluid_solver::omega_nonlin::add_forcing"); - this->add_forcing(this->cu, this->cv[src], 1.0); - } - { - TIMEZONE("fluid_solver::omega_nonlin::force_divfree"); - this->force_divfree(this->cu); - } -} - -template <class rnumber> -void fluid_solver<rnumber>::step(double dt) -{ - TIMEZONE("fluid_solver::step"); - std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); - this->omega_nonlin(0); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - double factor0 = exp(-this->nu * k2 * dt); - // cindex indexing is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) - this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] + - dt*this->cu[3*cindex+cc][i])*factor0; - } - } - ); - - this->omega_nonlin(1); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - double factor0 = exp(-this->nu * k2 * dt/2); - double factor1 = exp( this->nu * k2 * dt/2); - // cindex indexing is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) - this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 + - (this->cv[1][3*cindex+cc][i] + - dt*this->cu[3*cindex+cc][i])*factor1)*0.25; - } - } - ); - - this->omega_nonlin(2); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - double factor0 = exp(-this->nu * k2 * dt * 0.5); - // cindex indexing is thread safe so there is no overlap between threads - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) - this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 + - 2*(this->cv[2][3*cindex+cc][i] + - dt*this->cu[3*cindex+cc][i]))*factor0/3; - } - } - ); - - this->force_divfree(this->cvorticity); - this->symmetrize(this->cvorticity, 3); - this->iteration++; -} - -template <class rnumber> -int fluid_solver<rnumber>::read(char field, char representation) -{ - TIMEZONE("fluid_solver::read"); - char fname[512]; - int read_result; - if (field == 'v') - { - if (representation == 'c') - { - this->fill_up_filename("cvorticity", fname); - read_result = this->cd->read(fname, (void*)this->cvorticity); - if (read_result != EXIT_SUCCESS) - return read_result; - } - if (representation == 'r') - { - read_result = this->read_base("rvorticity", this->rvorticity); - if (read_result != EXIT_SUCCESS) - return read_result; - else - fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); - } - this->low_pass_Fourier(this->cvorticity, 3, this->kM); - this->force_divfree(this->cvorticity); - this->symmetrize(this->cvorticity, 3); - return EXIT_SUCCESS; - } - if ((field == 'u') && (representation == 'c')) - { - read_result = this->read_base("cvelocity", this->cvelocity); - this->low_pass_Fourier(this->cvelocity, 3, this->kM); - this->force_divfree(this->cvorticity); - this->symmetrize(this->cvorticity, 3); - return read_result; - } - if ((field == 'u') && (representation == 'r')) - return this->read_base("rvelocity", this->rvelocity); - return EXIT_FAILURE; -} - -template <class rnumber> -int fluid_solver<rnumber>::write(char field, char representation) -{ - TIMEZONE("fluid_solver::write"); - char fname[512]; - if ((field == 'v') && (representation == 'c')) - { - this->fill_up_filename("cvorticity", fname); - return this->cd->write(fname, (void*)this->cvorticity); - } - if ((field == 'v') && (representation == 'r')) - { - fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); - clip_zero_padding<rnumber>(this->rd, this->rvorticity, 3); - this->fill_up_filename("rvorticity", fname); - return this->rd->write(fname, this->rvorticity); - } - this->compute_velocity(this->cvorticity); - if ((field == 'u') && (representation == 'c')) - { - this->fill_up_filename("cvelocity", fname); - return this->cd->write(fname, this->cvelocity); - } - if ((field == 'u') && (representation == 'r')) - { - this->ift_velocity(); - clip_zero_padding<rnumber>(this->rd, this->rvelocity, 3); - this->fill_up_filename("rvelocity", fname); - return this->rd->write(fname, this->rvelocity); - } - return EXIT_FAILURE; -} - -template <class rnumber> -int fluid_solver<rnumber>::write_rTrS2() -{ - TIMEZONE("fluid_solver::write_rTrS2"); - char fname[512]; - this->fill_up_filename("rTrS2", fname); - typename fftw_interface<rnumber>::complex *ca; - rnumber *ra; - ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); - ra = (rnumber*)(ca); - this->compute_velocity(this->cvorticity); - this->compute_vector_gradient(ca, this->cvelocity); - for (int cc=0; cc<3; cc++) - { - std::copy( - (rnumber*)(ca + cc*this->cd->local_size), - (rnumber*)(ca + (cc+1)*this->cd->local_size), - (rnumber*)this->cv[1]); - fftw_interface<rnumber>::execute(*(this->vc2r[1])); - std::copy( - this->rv[1], - this->rv[1] + this->cd->local_size*2, - ra + cc*this->cd->local_size*2); - } - /* velocity gradient is now stored, in real space, in ra */ - rnumber *dx_u, *dy_u, *dz_u; - dx_u = ra; - dy_u = ra + 2*this->cd->local_size; - dz_u = ra + 4*this->cd->local_size; - rnumber *trS2 = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); - shared_array<double> average_local(1, [&](double* data){ - data[0] = 0; - }); - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - rnumber AxxAxx; - rnumber AyyAyy; - rnumber AzzAzz; - rnumber Sxy; - rnumber Syz; - rnumber Szx; - ptrdiff_t tindex = 3*rindex; - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; - Syz = dy_u[tindex+2]+dz_u[tindex+1]; - Szx = dz_u[tindex+0]+dx_u[tindex+2]; - // rindex is thread safe + No overlap between thread it is a write - trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz + - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); - average_local.getMine()[0] += trS2[rindex]; - } - ); - average_local.mergeParallel(); - double average; - MPI_Allreduce( - average_local.getMasterData(), - &average, - 1, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - DEBUG_MSG("average TrS2 is %g\n", average); - fftw_interface<rnumber>::free(ca); - /* output goes here */ - int ntmp[3]; - ntmp[0] = this->rd->sizes[0]; - ntmp[1] = this->rd->sizes[1]; - ntmp[2] = this->rd->sizes[2]; - field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); - clip_zero_padding<rnumber>(scalar_descriptor, trS2, 1); - int return_value = scalar_descriptor->write(fname, trS2); - delete scalar_descriptor; - fftw_interface<rnumber>::free(trS2); - return return_value; -} - -template <class rnumber> -int fluid_solver<rnumber>::write_renstrophy() -{ - TIMEZONE("fluid_solver::write_renstrophy"); - char fname[512]; - this->fill_up_filename("renstrophy", fname); - rnumber *enstrophy = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); - this->ift_vorticity(); - shared_array<double> average_local(1, [&](double* data){ - data[0] = 0; - }); - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - ptrdiff_t tindex = 3*rindex; - // rindex indexing is thread safe so there is no overlap between threads - enstrophy[rindex] = ( - this->rvorticity[tindex+0]*this->rvorticity[tindex+0] + - this->rvorticity[tindex+1]*this->rvorticity[tindex+1] + - this->rvorticity[tindex+2]*this->rvorticity[tindex+2] - )/2; - average_local.getMine()[0] += enstrophy[rindex]; - } - ); - average_local.mergeParallel(); - double average; - MPI_Allreduce( - average_local.getMasterData(), - &average, - 1, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - DEBUG_MSG("average enstrophy is %g\n", average); - /* output goes here */ - int ntmp[3]; - ntmp[0] = this->rd->sizes[0]; - ntmp[1] = this->rd->sizes[1]; - ntmp[2] = this->rd->sizes[2]; - field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); - clip_zero_padding<rnumber>(scalar_descriptor, enstrophy, 1); - int return_value = scalar_descriptor->write(fname, enstrophy); - delete scalar_descriptor; - fftw_interface<rnumber>::free(enstrophy); - return return_value; -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_pressure(rnumber (*__restrict__ pressure)[2]) -{ - TIMEZONE("fluid_solver::compute_pressure"); - /* assume velocity is already in real space representation */ - /* diagonal terms 11 22 33 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // rindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc]; - } - ); - this->clean_up_real_space(this->rv[1], 3); - { - TIMEZONE("fftw_interface<rnumber>::execute"); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - } - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2 && k2 > 0) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int i=0; i<2; i++) - { - pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] + - this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] + - this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]); - } - } - else - std::fill_n((rnumber*)(pressure+cindex), 2, 0.0); - } - ); - /* off-diagonal terms 12 23 31 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // rindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3]; - } - ); - this->clean_up_real_space(this->rv[1], 3); - { - TIMEZONE("fftw_interface<rnumber>::execute"); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - } - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2 && k2 > 0) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int i=0; i<2; i++) - { - pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] + - this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] + - this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]); - pressure[cindex][i] /= this->normalization_factor*k2; - } - } - } - ); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_gradient_statistics( - rnumber (*__restrict__ vec)[2], -double *gradu_moments, -double *trS2QR_moments, -ptrdiff_t *gradu_hist, -ptrdiff_t *trS2QR_hist, -ptrdiff_t *QR2D_hist, -double trS2QR_max_estimates[], -double gradu_max_estimates[], -int nbins, -int QR2D_nbins) -{ - TIMEZONE("fluid_solver::compute_gradient_statistics"); - typename fftw_interface<rnumber>::complex *ca; - rnumber *ra; - ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); - ra = (rnumber*)(ca); - this->compute_vector_gradient(ca, vec); - for (int cc=0; cc<3; cc++) - { - std::copy( - (rnumber*)(ca + cc*this->cd->local_size), - (rnumber*)(ca + (cc+1)*this->cd->local_size), - (rnumber*)this->cv[1]); - fftw_interface<rnumber>::execute(*(this->vc2r[1])); - std::copy( - this->rv[1], - this->rv[1] + this->cd->local_size*2, - ra + cc*this->cd->local_size*2); - } - /* velocity gradient is now stored, in real space, in ra */ - std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0); - rnumber *dx_u, *dy_u, *dz_u; - dx_u = ra; - dy_u = ra + 2*this->cd->local_size; - dz_u = ra + 4*this->cd->local_size; - double binsize[2]; - double tmp_max_estimate[3]; - tmp_max_estimate[0] = trS2QR_max_estimates[0]; - tmp_max_estimate[1] = trS2QR_max_estimates[1]; - tmp_max_estimate[2] = trS2QR_max_estimates[2]; - binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins; - binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins; - ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins]; - std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0); - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - rnumber AxxAxx; - rnumber AyyAyy; - rnumber AzzAzz; - rnumber AxyAyx; - rnumber AyzAzy; - rnumber AzxAxz; - rnumber Sxy; - rnumber Syz; - rnumber Szx; - // rindex indexing is thread safe so there is no overlap between threads - // tindex[0:2] is thread safe too - ptrdiff_t tindex = 3*rindex; - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; - AxyAyx = dx_u[tindex+1]*dy_u[tindex+0]; - AyzAzy = dy_u[tindex+2]*dz_u[tindex+1]; - AzxAxz = dz_u[tindex+0]*dx_u[tindex+2]; - this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz; - this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) + - dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) + - dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) + - dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] + - dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]); - int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0])); - int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1])); - if ((bin0 >= 0 && bin0 < QR2D_nbins) && - (bin1 >= 0 && bin1 < QR2D_nbins)) - local_hist[bin1*QR2D_nbins + bin0]++; - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; - Syz = dy_u[tindex+2]+dz_u[tindex+1]; - Szx = dz_u[tindex+0]+dx_u[tindex+2]; - this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz + - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); - } - ); - MPI_Allreduce( - local_hist, - QR2D_hist, - QR2D_nbins * QR2D_nbins, - MPI_INT64_T, MPI_SUM, this->cd->comm); - delete[] local_hist; - this->compute_rspace_stats3( - this->rv[1], - trS2QR_moments, - trS2QR_hist, - tmp_max_estimate, - nbins); - double *tmp_moments = new double[10*3]; - ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3]; - for (int cc=0; cc<3; cc++) - { - tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0]; - tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1]; - tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2]; - this->compute_rspace_stats3( - dx_u + cc*2*this->cd->local_size, - tmp_moments, - tmp_hist, - tmp_max_estimate, - nbins); - for (int n = 0; n < 10; n++) - for (int i = 0; i < 3 ; i++) - { - gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i]; - } - for (int n = 0; n < nbins; n++) - for (int i = 0; i < 3; i++) - { - gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i]; - } - } - delete[] tmp_moments; - delete[] tmp_hist; - fftw_interface<rnumber>::free(ca); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber (*acceleration)[2]) -{ - TIMEZONE("fluid_solver::compute_Lagrangian_acceleration"); - typename fftw_interface<rnumber>::complex *pressure; - pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); - this->compute_velocity(this->cvorticity); - this->ift_velocity(); - this->compute_pressure(pressure); - this->compute_velocity(this->cvorticity); - std::fill_n((rnumber*)this->cv[1], 2*this->cd->local_size, 0.0); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int cc=0; cc<3; cc++) - for (int i=0; i<2; i++) - this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; - if (strcmp(this->forcing_type, "linear") == 0) - { - double knorm = sqrt(k2); - if ((this->fk0 <= knorm) && - (this->fk1 >= knorm)) - for (int c=0; c<3; c++) - for (int i=0; i<2; i++) - this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; - } - this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1]; - this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1]; - this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1]; - this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0]; - this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0]; - this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0]; - } - } - ); - std::copy( - (rnumber*)this->cv[1], - (rnumber*)(this->cv[1] + this->cd->local_size), - (rnumber*)acceleration); - fftw_interface<rnumber>::free(pressure); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_Eulerian_acceleration(rnumber (*__restrict__ acceleration)[2]) -{ - TIMEZONE("fluid_solver::compute_Eulerian_acceleration"); - std::fill_n((rnumber*)(acceleration), 2*this->cd->local_size, 0.0); - this->compute_velocity(this->cvorticity); - /* put in linear terms */ - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - for (int cc=0; cc<3; cc++) - for (int i=0; i<2; i++) - acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; - if (strcmp(this->forcing_type, "linear") == 0) - { - double knorm = sqrt(k2); - if ((this->fk0 <= knorm) && - (this->fk1 >= knorm)) - { - for (int c=0; c<3; c++) - for (int i=0; i<2; i++) - acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; - } - } - } - } - ); - this->ift_velocity(); - /* compute uu */ - /* 11 22 33 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor; - } - ); - this->clean_up_real_space(this->rv[1], 3); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - acceleration[tindex+0][0] += - this->kx[xindex]*this->cv[1][tindex+0][1]; - acceleration[tindex+0][1] += - -this->kx[xindex]*this->cv[1][tindex+0][0]; - acceleration[tindex+1][0] += - this->ky[yindex]*this->cv[1][tindex+1][1]; - acceleration[tindex+1][1] += - -this->ky[yindex]*this->cv[1][tindex+1][0]; - acceleration[tindex+2][0] += - this->kz[zindex]*this->cv[1][tindex+2][1]; - acceleration[tindex+2][1] += - -this->kz[zindex]*this->cv[1][tindex+2][0]; - } - } - ); - /* 12 23 31 */ - RLOOP ( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*rindex; - for (int cc=0; cc<3; cc++) - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor; - } - ); - this->clean_up_real_space(this->rv[1], 3); - fftw_interface<rnumber>::execute(*(this->vr2c[1])); - this->dealias(this->cv[1], 3); - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // cindex indexing is thread safe so there is no overlap between threads - ptrdiff_t tindex = 3*cindex; - acceleration[tindex+0][0] += - (this->ky[yindex]*this->cv[1][tindex+0][1] + - this->kz[zindex]*this->cv[1][tindex+2][1]); - acceleration[tindex+0][1] += - - (this->ky[yindex]*this->cv[1][tindex+0][0] + - this->kz[zindex]*this->cv[1][tindex+2][0]); - acceleration[tindex+1][0] += - (this->kz[zindex]*this->cv[1][tindex+1][1] + - this->kx[xindex]*this->cv[1][tindex+0][1]); - acceleration[tindex+1][1] += - - (this->kz[zindex]*this->cv[1][tindex+1][0] + - this->kx[xindex]*this->cv[1][tindex+0][0]); - acceleration[tindex+2][0] += - (this->kx[xindex]*this->cv[1][tindex+2][1] + - this->ky[yindex]*this->cv[1][tindex+1][1]); - acceleration[tindex+2][1] += - - (this->kx[xindex]*this->cv[1][tindex+2][0] + - this->ky[yindex]*this->cv[1][tindex+1][0]); - } - } - ); - if (this->cd->myrank == this->cd->rank[0]) - std::fill_n((rnumber*)(acceleration), 6, 0.0); - this->force_divfree(acceleration); -} - -template <class rnumber> -void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber *__restrict__ acceleration) -{ - TIMEZONE("fluid_solver::compute_Lagrangian_acceleration"); - this->compute_Lagrangian_acceleration((typename fftw_interface<rnumber>::complex*)acceleration); - fftw_interface<rnumber>::execute(*(this->vc2r[1])); - std::copy( - this->rv[1], - this->rv[1] + 2*this->cd->local_size, - acceleration); -} - -template <class rnumber> -int fluid_solver<rnumber>::write_rpressure() -{ - TIMEZONE("fluid_solver::write_rpressure"); - char fname[512]; - typename fftw_interface<rnumber>::complex *pressure; - pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); - this->compute_velocity(this->cvorticity); - this->ift_velocity(); - this->compute_pressure(pressure); - this->fill_up_filename("rpressure", fname); - rnumber *rpressure = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); - typename fftw_interface<rnumber>::plan c2r; - c2r = fftw_interface<rnumber>::mpi_plan_dft_c2r_3d( - this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2], - pressure, rpressure, this->cd->comm, - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); - fftw_interface<rnumber>::execute(c2r); - /* output goes here */ - int ntmp[3]; - ntmp[0] = this->rd->sizes[0]; - ntmp[1] = this->rd->sizes[1]; - ntmp[2] = this->rd->sizes[2]; - field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); - clip_zero_padding<rnumber>(scalar_descriptor, rpressure, 1); - int return_value = scalar_descriptor->write(fname, rpressure); - delete scalar_descriptor; - fftw_interface<rnumber>::destroy_plan(c2r); - fftw_interface<rnumber>::free(pressure); - fftw_interface<rnumber>::free(rpressure); - return return_value; -} - -/*****************************************************************************/ - - - - -/*****************************************************************************/ -/* finally, force generation of code for single precision */ -template class fluid_solver<float>; -template class fluid_solver<double>; -/*****************************************************************************/ - diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp deleted file mode 100644 index 4cc75cee4385353f64dc9bc9e7d34c6efba9ad48..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include "field_descriptor.hpp" -#include "fluid_solver_base.hpp" - -#ifndef FLUID_SOLVER - -#define FLUID_SOLVER - -extern int myrank, nprocs; - - -/* container for field descriptor, fields themselves, parameters, etc - * using the same big macro idea that they're using in fftw3.h - * I feel like I should quote: Ugh. - * */ - -template <class rnumber> -class fluid_solver:public fluid_solver_base<rnumber> -{ - public: - /* fields */ - rnumber *rvorticity; - rnumber *rvelocity ; - typename fluid_solver_base<rnumber>::cnumber *cvorticity; - typename fluid_solver_base<rnumber>::cnumber *cvelocity ; - - /* short names for velocity, and 4 vorticity fields */ - rnumber *ru, *rv[4]; - typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4]; - - /* plans */ - typename fftw_interface<rnumber>::plan *c2r_vorticity; - typename fftw_interface<rnumber>::plan *r2c_vorticity; - typename fftw_interface<rnumber>::plan *c2r_velocity; - typename fftw_interface<rnumber>::plan *r2c_velocity; - typename fftw_interface<rnumber>::plan *uc2r, *ur2c; - typename fftw_interface<rnumber>::plan *vr2c[3], *vc2r[3]; - - /* physical parameters */ - double nu; - int fmode; // for Kolmogorov flow - double famplitude; // both for Kflow and band forcing - double fk0, fk1; // for band forcing - char forcing_type[128]; - - /* methods */ - fluid_solver( - const char *NAME, - int nx, - int ny, - int nz, - double DKX = 1.0, - double DKY = 1.0, - double DKZ = 1.0, - int DEALIAS_TYPE = 1, - unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE); - ~fluid_solver(void); - - void compute_gradient_statistics( - rnumber (*__restrict__ vec)[2], - double *__restrict__ gradu_moments, - double *__restrict__ trS2_Q_R_moments, - ptrdiff_t *__restrict__ gradu_histograms, - ptrdiff_t *__restrict__ trS2_Q_R_histograms, - ptrdiff_t *__restrict__ QR2D_histogram, - double trS2_Q_R_max_estimates[3], - double gradu_max_estimates[9], - const int nbins_1D = 256, - const int nbins_2D = 64); - - void compute_vorticity(void); - void compute_velocity(rnumber (*__restrict__ vorticity)[2]); - void compute_pressure(rnumber (*__restrict__ pressure)[2]); - void compute_Eulerian_acceleration(rnumber (*__restrict__ dst)[2]); - void compute_Lagrangian_acceleration(rnumber (*__restrict__ dst)[2]); - void compute_Lagrangian_acceleration(rnumber *__restrict__ dst); - void ift_velocity(); - void dft_velocity(); - void ift_vorticity(); - void dft_vorticity(); - void omega_nonlin(int src); - void step(double dt); - void impose_zero_modes(void); - void add_forcing(rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor); - - int read(char field, char representation); - int write(char field, char representation); - int write_rTrS2(); - int write_renstrophy(); - int write_rpressure(); -}; - -#endif//FLUID_SOLVER - diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp deleted file mode 100644 index 6e4fd3335238218bad0b78462d3506ca9b48c721..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver_base.cpp +++ /dev/null @@ -1,834 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cassert> -#include <cmath> -#include <cstring> -#include "base.hpp" -#include "fluid_solver_base.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" -#include "shared_array.hpp" - -template <class rnumber> -void fluid_solver_base<rnumber>::fill_up_filename(const char *base_name, char *destination) -{ - sprintf(destination, "%s_%s_i%.5x", this->name, base_name, this->iteration); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany) -{ - TIMEZONE("fluid_solver_base::clean_up_real_space"); - for (ptrdiff_t rindex = 0; rindex < this->cd->local_size*2; rindex += howmany*(this->rd->subsizes[2]+2)) - std::fill_n(a+rindex+this->rd->subsizes[2]*howmany, 2*howmany, 0.0); -} - -template <class rnumber> -double fluid_solver_base<rnumber>::autocorrel(cnumber *a) -{ - double *spec = fftw_alloc_real(this->nshells*9); - double sum_local; - this->cospectrum(a, a, spec); - sum_local = 0.0; - for (unsigned int n = 0; n < this->nshells; n++) - { - sum_local += spec[n*9] + spec[n*9 + 4] + spec[n*9 + 8]; - } - fftw_free(spec); - return sum_local; -} - -template <class rnumber> -void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec) -{ - TIMEZONE("fluid_solver_base::cospectrum"); - shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){ - std::fill_n(cospec_local, this->nshells*9, 0); - }); - - CLOOP_K2_NXMODES( - this, - - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2, int nxmodes){ - if (k2 <= this->kMspec2) - { - int tmp_int = int(sqrt(k2)/this->dk)*9; - double* cospec_local = cospec_local_thread.getMine(); - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += nxmodes * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - }} - ); - cospec_local_thread.mergeParallel(); - MPI_Allreduce( - cospec_local_thread.getMasterData(), - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec, const double k2exponent) -{ - TIMEZONE("fluid_solver_base::cospectrum2"); - shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){ - std::fill_n(cospec_local, this->nshells*9, 0); - }); - - CLOOP_K2_NXMODES( - this, - - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2, int nxmodes){ - if (k2 <= this->kMspec2) - { - double factor = nxmodes*pow(k2, k2exponent); - int tmp_int = int(sqrt(k2)/this->dk)*9; - double* cospec_local = cospec_local_thread.getMine(); - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += factor * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - }} - ); - cospec_local_thread.mergeParallel(); - MPI_Allreduce( - cospec_local_thread.getMasterData(), - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - //for (int n=0; n<this->nshells; n++) - //{ - // spec[n] *= 12.5663706144*pow(this->kshell[n], 2) / this->nshell[n]; - // /*is normalization needed? - // * spec[n] /= this->normalization_factor*/ - //} -} - -template <class rnumber> -void fluid_solver_base<rnumber>::compute_rspace_stats( - const rnumber *a, - const hid_t group, - const std::string dset_name, - const hsize_t toffset, - const std::vector<double> max_estimate) -{ - TIMEZONE("fluid_solver_base::compute_rspace_stats"); - const int nmoments = 10; - int nvals, nbins; - if (this->rd->myrank == 0) - { - hid_t dset, wspace; - hsize_t dims[3]; - int ndims; - dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); - assert(ndims == 3); - variable_used_only_in_assert(ndims); - assert(dims[1] == nmoments); - nvals = dims[2]; - H5Sclose(wspace); - H5Dclose(dset); - dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); - assert(ndims == 3); - nbins = dims[1]; - assert(nvals == dims[2]); - H5Sclose(wspace); - H5Dclose(dset); - } - MPI_Bcast(&nvals, 1, MPI_INT, 0, this->rd->comm); - MPI_Bcast(&nbins, 1, MPI_INT, 0, this->rd->comm); - assert(nvals == max_estimate.size()); - shared_array<double> threaded_local_moments(nmoments*nvals, [&](double* local_moments){ - std::fill_n(local_moments, nmoments*nvals, 0); - if (nvals == 4) local_moments[3] = max_estimate[3]; - }); - - shared_array<double> threaded_val_tmp(nvals); - - shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){ - std::fill_n(local_hist, nbins*nvals, 0); - }); - - // Not written by threads - double *binsize = new double[nvals]; - for (int i=0; i<nvals; i++) - binsize[i] = 2*max_estimate[i] / nbins; - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - double *val_tmp = threaded_val_tmp.getMine(); - ptrdiff_t* local_hist = threaded_local_hist.getMine(); - double *local_moments = threaded_local_moments.getMine(); - - if (nvals == 4) val_tmp[3] = 0.0; - for (int i=0; i<3; i++) - { - val_tmp[i] = a[rindex*3+i]; - if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i]; - } - if (nvals == 4) - { - val_tmp[3] = sqrt(val_tmp[3]); - if (val_tmp[3] < local_moments[0*nvals+3]) - local_moments[0*nvals+3] = val_tmp[3]; - if (val_tmp[3] > local_moments[9*nvals+3]) - local_moments[9*nvals+3] = val_tmp[3]; - int bin = int(floor(val_tmp[3]*2/binsize[3])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+3]++; - } - for (int i=0; i<3; i++) - { - if (val_tmp[i] < local_moments[0*nvals+i]) - local_moments[0*nvals+i] = val_tmp[i]; - if (val_tmp[i] > local_moments[(nmoments-1)*nvals+i]) - local_moments[(nmoments-1)*nvals+i] = val_tmp[i]; - int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+i]++; - } - for (int n=1; n < nmoments-1; n++){ - double pow_tmp = 1.; - for (int i=0; i<nvals; i++){ - local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp); - } - } - } - ); - - threaded_local_hist.mergeParallel(); - threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double { - if(nvals == int(4) && idx == 0*nvals+3){ - return std::min(v1, v2); - } - if(nvals == int(4) && idx == 9*nvals+3){ - return std::max(v1, v2); - } - if(idx < 3){ - return std::min(v1, v2); - } - if((nmoments-1)*nvals <= idx && idx < (nmoments-1)*nvals+3){ - return std::max(v1, v2); - } - return v1 + v2; - }); - - - double *moments = new double[nmoments*nvals]; - MPI_Allreduce( - threaded_local_moments.getMasterData(), - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + nvals), - (void*)(moments+nvals), - (nmoments-2)*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + (nmoments-1)*nvals), - (void*)(moments+(nmoments-1)*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); - ptrdiff_t *hist = new ptrdiff_t[nbins*nvals]; - MPI_Allreduce( - threaded_local_hist.getMasterData(), - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); - for (int n=1; n < nmoments-1; n++) - for (int i=0; i<nvals; i++) - moments[n*nvals + i] /= this->normalization_factor; - delete[] binsize; - if (this->rd->myrank == 0) - { - hid_t dset, wspace, mspace; - hsize_t count[3], offset[3], dims[3]; - dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - H5Sget_simple_extent_dims(wspace, dims, NULL); - offset[0] = toffset; - offset[1] = 0; - offset[2] = 0; - count[0] = 1; - count[1] = nmoments; - count[2] = nvals; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, moments); - H5Sclose(wspace); - H5Sclose(mspace); - H5Dclose(dset); - dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT); - wspace = H5Dget_space(dset); - count[1] = nbins; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(dset, H5T_NATIVE_INT64, mspace, wspace, H5P_DEFAULT, hist); - H5Sclose(wspace); - H5Sclose(mspace); - H5Dclose(dset); - } - delete[] moments; - delete[] hist; -} - - - -template <class rnumber> -template<int nvals> -void fluid_solver_base<rnumber>::compute_rspace_stats( - rnumber *a, - double *moments, - ptrdiff_t *hist, - double max_estimate[], - const int nbins) -{ - TIMEZONE("fluid_solver_base::compute_rspace_stats"); - shared_array<double> threaded_local_moments(10*nvals,[&](double* local_moments){ - std::fill_n(local_moments, 10*nvals, 0); - if (nvals == 4) local_moments[3] = max_estimate[3]; - }); - - shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){ - std::fill_n(local_hist, nbins*nvals, 0); - }); - - // Will not be modified by the threads - double binsize[nvals]; - for (int i=0; i<nvals; i++) - binsize[i] = 2*max_estimate[i] / nbins; - - RLOOP( - this, - [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){ - ptrdiff_t *local_hist = threaded_local_hist.getMine(); - double *local_moments = threaded_local_moments.getMine(); - - double val_tmp[nvals]; - if (nvals == 4) val_tmp[3] = 0.0; - for (int i=0; i<3; i++) - { - val_tmp[i] = a[rindex*3+i]; - if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i]; - } - if (nvals == 4) - { - val_tmp[3] = sqrt(val_tmp[3]); - if (val_tmp[3] < local_moments[0*nvals+3]) - local_moments[0*nvals+3] = val_tmp[3]; - if (val_tmp[3] > local_moments[9*nvals+3]) - local_moments[9*nvals+3] = val_tmp[3]; - int bin = int(floor(val_tmp[3]*2/binsize[3])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+3]++; - } - for (int i=0; i<3; i++) - { - if (val_tmp[i] < local_moments[0*nvals+i]) - local_moments[0*nvals+i] = val_tmp[i]; - if (val_tmp[i] > local_moments[9*nvals+i]) - local_moments[9*nvals+i] = val_tmp[i]; - int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); - if (bin >= 0 && bin < nbins) - local_hist[bin*nvals+i]++; - } - for (int n=1; n<9; n++){ - double pow_tmp = 1; - for (int i=0; i<nvals; i++){ - local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp); - } - } - } - ); - - threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double { - if(nvals == int(4) && idx == 0*nvals+3){ - return std::min(v1, v2); - } - if(nvals == int(4) && idx == 9*nvals+3){ - return std::max(v1, v2); - } - if(idx < 3){ - return std::min(v1, v2); - } - if(9*nvals <= idx && idx < 9*nvals+3){ - return std::max(v1, v2); - } - return v1 + v2; - }); - threaded_local_hist.mergeParallel(); - - MPI_Allreduce( - threaded_local_moments.getMasterData(), - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + nvals), - (void*)(moments+nvals), - 8*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - MPI_Allreduce( - (threaded_local_moments.getMasterData() + 9*nvals), - (void*)(moments+9*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); - MPI_Allreduce( - (void*)threaded_local_hist.getMasterData(), - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); - for (int n=1; n<9; n++) - for (int i=0; i<nvals; i++) - moments[n*nvals + i] /= this->normalization_factor; -} - -template <class rnumber> -void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent) -{ - TIMEZONE("fluid_solver_base::write_spectrum"); - double *spec = fftw_alloc_real(this->nshells); - this->cospectrum(a, a, spec, k2exponent); - if (this->cd->myrank == 0) - { - FILE *spec_file; - char full_name[512]; - sprintf(full_name, "%s_%s_spec", this->name, fname); - spec_file = fopen(full_name, "ab"); - fwrite((void*)&this->iteration, sizeof(int), 1, spec_file); - fwrite((void*)spec, sizeof(double), this->nshells, spec_file); - fclose(spec_file); - } - fftw_free(spec); -} - -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ - -template <class rnumber> -fluid_solver_base<rnumber>::fluid_solver_base( - const char *NAME, - int nx, - int ny, - int nz, - double DKX, - double DKY, - double DKZ, - int DEALIAS_TYPE, - unsigned FFTW_PLAN_RIGOR) -{ - TIMEZONE("fluid_solver_base::fluid_solver_base"); - strncpy(this->name, NAME, 256); - this->name[255] = '\0'; - this->iteration = 0; - this->fftw_plan_rigor = FFTW_PLAN_RIGOR; - - int ntmp[4]; - ntmp[0] = nz; - ntmp[1] = ny; - ntmp[2] = nx; - ntmp[3] = 3; - this->rd = new field_descriptor<rnumber>( - 4, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); - this->normalization_factor = (this->rd->full_size/3); - ntmp[0] = ny; - ntmp[1] = nz; - ntmp[2] = nx/2 + 1; - ntmp[3] = 3; - this->cd = new field_descriptor<rnumber>( - 4, ntmp, mpi_real_type<rnumber>::complex(), this->rd->comm); - - this->dkx = DKX; - this->dky = DKY; - this->dkz = DKZ; - this->kx = new double[this->cd->sizes[2]]; - this->ky = new double[this->cd->subsizes[0]]; - this->kz = new double[this->cd->sizes[1]]; - this->dealias_type = DEALIAS_TYPE; - switch(this->dealias_type) - { - /* HL07 smooth filter */ - case 1: - this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); - this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); - this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); - break; - default: - this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); - this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1); - this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1); - } - int i, ii; - for (i = 0; i<this->cd->sizes[2]; i++) - this->kx[i] = i*this->dkx; - for (i = 0; i<this->cd->subsizes[0]; i++) - { - ii = i + this->cd->starts[0]; - if (ii <= this->rd->sizes[1]/2) - this->ky[i] = this->dky*ii; - else - this->ky[i] = this->dky*(ii - this->rd->sizes[1]); - } - for (i = 0; i<this->cd->sizes[1]; i++) - { - if (i <= this->rd->sizes[0]/2) - this->kz[i] = this->dkz*i; - else - this->kz[i] = this->dkz*(i - this->rd->sizes[0]); - } - this->kM = this->kMx; - if (this->kM < this->kMy) this->kM = this->kMy; - if (this->kM < this->kMz) this->kM = this->kMz; - this->kM2 = this->kM * this->kM; - this->kMspec = this->kM; - this->kMspec2 = this->kM2; - this->dk = this->dkx; - if (this->dk > this->dky) this->dk = this->dky; - if (this->dk > this->dkz) this->dk = this->dkz; - this->dk2 = this->dk*this->dk; - DEBUG_MSG( - "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", - this->kM, this->kM2, this->dk, this->dk2); - /* spectra stuff */ - this->nshells = int(this->kMspec / this->dk) + 2; - DEBUG_MSG( - "kMspec = %g, kMspec2 = %g, nshells = %ld\n", - this->kMspec, this->kMspec2, this->nshells); - this->kshell = new double[this->nshells]; - std::fill_n(this->kshell, this->nshells, 0.0); - this->nshell = new int64_t[this->nshells]; - std::fill_n(this->nshell, this->nshells, 0); - DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n"); - - shared_array<double> kshell_local_threaded(this->nshells,[&](double* kshell_local){ - std::fill_n(kshell_local, this->nshells, 0.0); - }); - DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n"); - shared_array<int64_t> nshell_local_threaded(this->nshells,[&](int64_t* nshell_local){ - std::fill_n(nshell_local, this->nshells, 0); - }); - - std::vector<std::unordered_map<int, double>> Fourier_filter_threaded(omp_get_max_threads()); - - DEBUG_MSG("fluid_solver_base::fluid_solver_base before cloop_k2_nxmodes\n"); - CLOOP_K2_NXMODES( - this, - - [&](ptrdiff_t /*cindex*/, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2, int nxmodes){ - if (k2 < this->kM2) - { - double knorm = sqrt(k2); - nshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes; - kshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes*knorm; - } - Fourier_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));} - ); - - // Merge results - nshell_local_threaded.mergeParallel(); - kshell_local_threaded.mergeParallel(); - for(int idxMerge = 0 ; idxMerge < int(Fourier_filter_threaded.size()) ; ++idxMerge){ - for(const auto kv : Fourier_filter_threaded[idxMerge]){ - this->Fourier_filter[kv.first] = kv.second; - } - } - - MPI_Allreduce( - (void*)(nshell_local_threaded.getMasterData()), - (void*)(this->nshell), - this->nshells, - MPI_INT64_T, MPI_SUM, this->cd->comm); - MPI_Allreduce( - (void*)(kshell_local_threaded.getMasterData()), - (void*)(this->kshell), - this->nshells, - MPI_DOUBLE, MPI_SUM, this->cd->comm); - for (unsigned int n=0; n<this->nshells; n++) - { - if (this->nshell[n] != 0) - this->kshell[n] /= this->nshell[n]; - else - this->kshell[n] = -1; - } - DEBUG_MSG("exiting fluid_solver_base::fluid_solver_base\n"); -} - -template <class rnumber> -fluid_solver_base<rnumber>::~fluid_solver_base() -{ - delete[] this->kshell; - delete[] this->nshell; - - delete[] this->kx; - delete[] this->ky; - delete[] this->kz; - - delete this->cd; - delete this->rd; -} - -template <class rnumber> -void fluid_solver_base<rnumber>::low_pass_Fourier(cnumber *a, const int howmany, const double kmax) -{ - TIMEZONE("fluid_solver_base::low_pass_Fourier"); - const double km2 = kmax*kmax; - const int howmany2 = 2*howmany; - /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ - CLOOP_K2( - this, - /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", - this->kx[xindex], - this->ky[yindex], - this->kz[zindex], - k2);*/ - - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex, double k2){ - if (k2 >= km2) - std::fill_n((rnumber*)(a + howmany*cindex), howmany2, 0.0);} - ); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::dealias(cnumber *a, const int howmany) -{ - TIMEZONE("fluid_solver_base::dealias"); - if (this->dealias_type == 0) - { - this->low_pass_Fourier(a, howmany, this->kM); - return; - } - - CLOOP_K2( - this, - [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, - ptrdiff_t /*zindex*/, double k2){ - double tval = this->Fourier_filter[int(round(k2/this->dk2))]; - // It is thread safe on the index cindex - for (int tcounter = 0; tcounter < howmany; tcounter++) - for (int i=0; i<2; i++) - a[howmany*cindex+tcounter][i] *= tval; - } - ); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::force_divfree(cnumber *a) -{ - TIMEZONE("fluid_solver_base::force_divfree"); - CLOOP_K2( - this, - - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex, double k2){ - if (k2 > 0) - { - // It is thread safe on index cindex - cnumber tval; - tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + - this->ky[yindex]*((*(a + cindex*3+1))[0]) + - this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; - tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + - this->ky[yindex]*((*(a + cindex*3+1))[1]) + - this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; - for (int imag_part=0; imag_part<2; imag_part++) - { - a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; - a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; - a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; - } - }} - ); - if (this->cd->myrank == this->cd->rank[0]) - std::fill_n((rnumber*)(a), 6, 0.0); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::compute_vector_gradient(cnumber *A, cnumber *cvec) -{ - TIMEZONE("fluid_solver_base::compute_vector_gradient"); - std::fill_n((rnumber*)A, 3*2*this->cd->local_size, 0.0); - cnumber *dx_u, *dy_u, *dz_u; - dx_u = A; - dy_u = A + this->cd->local_size; - dz_u = A + 2*this->cd->local_size; - CLOOP_K2( - this, - - [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex, double k2){ - if (k2 <= this->kM2) - { - // It is thread safe on cindex - ptrdiff_t tindex = 3*cindex; - for (int cc=0; cc<3; cc++) - { - dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; - dx_u[tindex + cc][1] = this->kx[xindex]*cvec[tindex+cc][0]; - dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; - dy_u[tindex + cc][1] = this->ky[yindex]*cvec[tindex+cc][0]; - dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; - dz_u[tindex + cc][1] = this->kz[zindex]*cvec[tindex+cc][0]; - } - }} - ); -} - -template <class rnumber> -void fluid_solver_base<rnumber>::symmetrize(cnumber *data, const int howmany) -{ - TIMEZONE("fluid_solver_base::symmetrize"); - ptrdiff_t ii, cc; - MPI_Status *mpistatus = new MPI_Status; - if (this->cd->myrank == this->cd->rank[0]) - { - for (cc = 0; cc < howmany; cc++) - data[cc][1] = 0.0; - for (ii = 1; ii < this->cd->sizes[1]/2; ii++) - for (cc = 0; cc < howmany; cc++) { - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] = - (*(data + cc + howmany*( ii)*this->cd->sizes[2]))[0]; - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] = - -(*(data + cc + howmany*( ii)*this->cd->sizes[2]))[1]; - } - } - cnumber *buffer; - buffer = fftw_interface<rnumber>::alloc_complex(howmany*this->cd->sizes[1]); - ptrdiff_t yy; - /*ptrdiff_t tindex;*/ - int ranksrc, rankdst; - for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { - ranksrc = this->cd->rank[yy]; - rankdst = this->cd->rank[this->cd->sizes[0] - yy]; - if (this->cd->myrank == ranksrc) - for (ii = 0; ii < this->cd->sizes[1]; ii++) - for (cc = 0; cc < howmany; cc++) - for (int imag_comp=0; imag_comp<2; imag_comp++) - (*(buffer + howmany*ii+cc))[imag_comp] = - (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; - if (ranksrc != rankdst) - { - if (this->cd->myrank == ranksrc) - MPI_Send((void*)buffer, - howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy, - this->cd->comm); - if (this->cd->myrank == rankdst) - MPI_Recv((void*)buffer, - howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy, - this->cd->comm, mpistatus); - } - if (this->cd->myrank == rankdst) - { - for (ii = 1; ii < this->cd->sizes[1]; ii++) - for (cc = 0; cc < howmany; cc++) - { - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] = - (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] = - -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; - } - for (cc = 0; cc < howmany; cc++) - { - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] = (*(buffer + cc))[0]; - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1]; - } - } - } - fftw_interface<rnumber>::free(buffer); - delete mpistatus; - /* put asymmetric data to 0 */ - /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2]) - { - tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]; - for (ii = 0; ii < this->cd->sizes[1]; ii++) - { - std::fill_n((rnumber*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0); - tindex += howmany*this->cd->sizes[2]; - } - } - tindex = howmany*(); - std::fill_n((rnumber*)(data + tindex), howmany*2, 0.0);*/ -} - -template <class rnumber> -int fluid_solver_base<rnumber>::read_base(const char *fname, rnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->rd->read(full_name, (void*)data); -} - -template <class rnumber> -int fluid_solver_base<rnumber>::read_base(const char *fname, cnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->cd->read(full_name, (void*)data); -} - -template <class rnumber> -int fluid_solver_base<rnumber>::write_base(const char *fname, rnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->rd->write(full_name, (void*)data); -} - -template <class rnumber> -int fluid_solver_base<rnumber>::write_base(const char *fname, cnumber *data) -{ - char full_name[512]; - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); - return this->cd->write(full_name, (void*)data); -} - -/* finally, force generation of code */ -template class fluid_solver_base<float>; -template class fluid_solver_base<double>; - -/*****************************************************************************/ - - - - diff --git a/bfps/cpp/fluid_solver_base.hpp b/bfps/cpp/fluid_solver_base.hpp deleted file mode 100644 index e446956001a08fdbf0d3b11da8552e1cb6c61a45..0000000000000000000000000000000000000000 --- a/bfps/cpp/fluid_solver_base.hpp +++ /dev/null @@ -1,272 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - -#include <stdio.h> -#include <stdlib.h> -#include <hdf5.h> -#include <iostream> -#include <unordered_map> -#include <vector> -#include "base.hpp" -#include "field_descriptor.hpp" -#include "scope_timer.hpp" -#include "omputils.hpp" - -#ifndef FLUID_SOLVER_BASE - -#define FLUID_SOLVER_BASE - -extern int myrank, nprocs; - - -/* container for field descriptor, fields themselves, parameters, etc - * using the same big macro idea that they're using in fftw3.h - * I feel like I should quote: Ugh. - * */ - -template <class rnumber> -class fluid_solver_base -{ - protected: - typedef rnumber cnumber[2]; - public: - field_descriptor<rnumber> *cd, *rd; - ptrdiff_t normalization_factor; - unsigned fftw_plan_rigor; - - /* simulation parameters */ - char name[256]; - int iteration; - - /* physical parameters */ - double dkx, dky, dkz, dk, dk2; - - /* mode and dealiasing information */ - int dealias_type; - double kMx, kMy, kMz, kM, kM2; - double kMspec, kMspec2; - double *kx, *ky, *kz; - std::unordered_map<int, double> Fourier_filter; - double *kshell; - int64_t *nshell; - unsigned int nshells; - - - /* methods */ - fluid_solver_base( - const char *NAME, - int nx, - int ny, - int nz, - double DKX = 1.0, - double DKY = 1.0, - double DKZ = 1.0, - int DEALIAS_TYPE = 0, - unsigned FFTW_PLAN_RIGOR = DEFAULT_FFTW_FLAG); - ~fluid_solver_base(); - - void low_pass_Fourier(cnumber *__restrict__ a, int howmany, double kmax); - void dealias(cnumber *__restrict__ a, int howmany); - void force_divfree(cnumber *__restrict__ a); - void symmetrize(cnumber *__restrict__ a, int howmany); - void clean_up_real_space(rnumber *__restrict__ a, int howmany); - void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec); - void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec, const double k2exponent); - double autocorrel(cnumber *__restrict__ a); - void compute_rspace_stats( - const rnumber *__restrict__ a, - const hid_t group, - const std::string dset_name, - const hsize_t toffset, - const std::vector<double> max_estimate); - template <int nvals> - void compute_rspace_stats(rnumber *__restrict__ a, - double *__restrict__ moments, - ptrdiff_t *__restrict__ hist, - double max_estimate[nvals], - const int nbins = 256); - inline void compute_rspace_stats3(rnumber *__restrict__ a, - double *__restrict__ moments, - ptrdiff_t *__restrict__ hist, - double max_estimate[3], - const int nbins = 256) - { - this->compute_rspace_stats<3>(a, moments, hist, max_estimate, nbins); - } - inline void compute_rspace_stats4(rnumber *__restrict__ a, - double *__restrict__ moments, - ptrdiff_t *__restrict__ hist, - double max_estimate[4], - const int nbins = 256) - { - this->compute_rspace_stats<4>(a, moments, hist, max_estimate, nbins); - } - void compute_vector_gradient(rnumber (*__restrict__ A)[2], rnumber(*__restrict__ source)[2]); - void write_spectrum(const char *fname, cnumber *a, const double k2exponent = 0.0); - void fill_up_filename(const char *base_name, char *full_name); - int read_base(const char *fname, rnumber *data); - int read_base(const char *fname, cnumber *data); - int write_base(const char *fname, rnumber *data); - int write_base(const char *fname, cnumber *data); -}; - - - -/*****************************************************************************/ -/* macros for loops */ - -/* Fourier space loop */ -template <class ObjectType, class FuncType> -void CLOOP(ObjectType* obj, FuncType expression) -{ - TIMEZONE("CLOOP"); - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]); - for (ptrdiff_t yindex = start; yindex < ptrdiff_t(end); yindex++){ - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]; - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) - { - expression(cindex, xindex, yindex, zindex); - cindex++; - } - } - } -} - -template <class ObjectType, class FuncType> -void CLOOP_NXMODES(ObjectType* obj, FuncType expression) -{ - TIMEZONE("CLOOP_NXMODES"); - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ - for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++) - { - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] - + zindex*obj->cd->subsizes[2]; - int nxmodes = 1; - ptrdiff_t xindex = 0; - expression(); - cindex++; - nxmodes = 2; - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) - { - expression(); - cindex++; - } - } - } - } -} - - -template <class ObjectType, class FuncType> -void CLOOP_K2(ObjectType* obj, FuncType expression) -{ - TIMEZONE("CLOOP_K2"); - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ - for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++){ - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] - + zindex*obj->cd->subsizes[2]; - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) - { - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, xindex, yindex, zindex, k2); - cindex++; - } - } - } - } -} - - -template <class ObjectType, class FuncType> -void CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression) -{ - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]); - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){ - for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++) - { - ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2] - + zindex*obj->cd->subsizes[2]; - int nxmodes = 1; - ptrdiff_t xindex = 0; - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, xindex, yindex, zindex, k2, nxmodes); - cindex++; - nxmodes = 2; - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) - { - double k2 = (obj->kx[xindex]*obj->kx[xindex] + - obj->ky[yindex]*obj->ky[yindex] + - obj->kz[zindex]*obj->kz[zindex]); - expression(cindex, xindex, yindex, zindex, k2, nxmodes); - cindex++; - } - } - } - } -} - - -template <class ObjectType, class FuncType> -void RLOOP(ObjectType* obj, FuncType expression) -{ - #pragma omp parallel - { - const hsize_t start = OmpUtils::ForIntervalStart(obj->rd->subsizes[1]); - const hsize_t end = OmpUtils::ForIntervalEnd(obj->rd->subsizes[1]); - for (int zindex = 0; zindex < obj->rd->subsizes[0] ; zindex++) - for (int yindex = start; yindex < ptrdiff_t(end); yindex++) - { - ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2); - for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++) - { - expression(rindex, xindex, yindex, zindex); - rindex++; - } - } - } -} - -/*****************************************************************************/ - -#endif//FLUID_SOLVER_BASE - diff --git a/bfps/cpp/full_code/NSVE.cpp b/bfps/cpp/full_code/NSVE.cpp deleted file mode 100644 index 1e24c7af531e7184f75b1f14257d42b822db7a9c..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/NSVE.cpp +++ /dev/null @@ -1,139 +0,0 @@ -#include <string> -#include <cmath> -#include "NSVE.hpp" -#include "scope_timer.hpp" - - -template <typename rnumber> -int NSVE<rnumber>::initialize(void) -{ - this->read_iteration(); - this->read_parameters(); - if (this->myrank == 0) - { - // set caching parameters - hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); - herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); - DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err); - this->stat_file = H5Fopen( - (this->simname + ".h5").c_str(), - H5F_ACC_RDWR, - fapl); - } - int data_file_problem; - if (this->myrank == 0) - data_file_problem = this->grow_file_datasets(); - MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm); - if (data_file_problem > 0) - { - std::cerr << - data_file_problem << - " problems growing file datasets.\ntrying to exit now." << - std::endl; - return EXIT_FAILURE; - } - this->fs = new vorticity_equation<rnumber, FFTW>( - simname.c_str(), - nx, ny, nz, - dkx, dky, dkz, - DEFAULT_FFTW_FLAG); - this->tmp_vec_field = new field<rnumber, FFTW, THREE>( - nx, ny, nz, - this->comm, - DEFAULT_FFTW_FLAG); - - - this->fs->checkpoints_per_file = checkpoints_per_file; - this->fs->nu = nu; - this->fs->fmode = fmode; - this->fs->famplitude = famplitude; - this->fs->fk0 = fk0; - this->fs->fk1 = fk1; - strncpy(this->fs->forcing_type, forcing_type, 128); - this->fs->iteration = this->iteration; - this->fs->checkpoint = this->checkpoint; - - this->fs->cvorticity->real_space_representation = false; - this->fs->io_checkpoint(); - - if (this->myrank == 0 && this->iteration == 0) - this->fs->kk->store(stat_file); - return EXIT_SUCCESS; -} - -template <typename rnumber> -int NSVE<rnumber>::step(void) -{ - this->fs->step(this->dt); - this->iteration = this->fs->iteration; - return EXIT_SUCCESS; -} - -template <typename rnumber> -int NSVE<rnumber>::write_checkpoint(void) -{ - this->fs->io_checkpoint(false); - this->checkpoint = this->fs->checkpoint; - this->write_iteration(); - return EXIT_SUCCESS; -} - -template <typename rnumber> -int NSVE<rnumber>::finalize(void) -{ - if (this->myrank == 0) - H5Fclose(this->stat_file); - delete this->fs; - delete this->tmp_vec_field; - return EXIT_SUCCESS; -} - -/** \brief Compute standard statistics for velocity and vorticity fields. - * - * IMPORTANT: at the end of this subroutine, `this->fs->cvelocity` contains - * the Fourier space representation of the velocity field, and - * `this->tmp_vec_field` contains the real space representation of the - * velocity field. - * This behavior is relied upon in the `NSVEparticles` class, so please - * don't break it. - */ - -template <typename rnumber> -int NSVE<rnumber>::do_stats() -{ - if (!(this->iteration % this->niter_stat == 0)) - return EXIT_SUCCESS; - hid_t stat_group; - if (this->myrank == 0) - stat_group = H5Gopen( - this->stat_file, - "statistics", - H5P_DEFAULT); - else - stat_group = 0; - - *tmp_vec_field = fs->cvorticity->get_cdata(); - tmp_vec_field->compute_stats( - fs->kk, - stat_group, - "vorticity", - fs->iteration / niter_stat, - max_vorticity_estimate/sqrt(3)); - - fs->compute_velocity(fs->cvorticity); - *tmp_vec_field = fs->cvelocity->get_cdata(); - tmp_vec_field->compute_stats( - fs->kk, - stat_group, - "velocity", - fs->iteration / niter_stat, - max_velocity_estimate/sqrt(3)); - - if (this->myrank == 0) - H5Gclose(stat_group); - return EXIT_SUCCESS; -} - -template class NSVE<float>; -template class NSVE<double>; - diff --git a/bfps/cpp/full_code/NSVE_no_output.hpp b/bfps/cpp/full_code/NSVE_no_output.hpp deleted file mode 100644 index 0047a45a02dd58ae8934f78fdd8d804424ae817c..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/NSVE_no_output.hpp +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef NSVE_NO_OUTPUT_HPP -#define NSVE_NO_OUTPUT_HPP - -#include "full_code/NSVE.hpp" - -template <typename rnumber> -class NSVE_no_output: public NSVE<rnumber> -{ - public: - NSVE_no_output( - const MPI_Comm COMMUNICATOR, - const std::string &simulation_name): - NSVE<rnumber>( - COMMUNICATOR, - simulation_name){} - ~NSVE_no_output(){} - int write_checkpoint(void) - { - return 0; - } - int read_parameters(void); -}; - -#endif//NSVE_NO_OUTPUT_HPP - diff --git a/bfps/cpp/full_code/NSVEparticles.cpp b/bfps/cpp/full_code/NSVEparticles.cpp deleted file mode 100644 index ba84b3943d579965836f05af2447722e273f2dc3..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/NSVEparticles.cpp +++ /dev/null @@ -1,102 +0,0 @@ -#include <string> -#include <cmath> -#include "NSVEparticles.hpp" -#include "scope_timer.hpp" -#include "particles/particles_sampling.hpp" - -template <typename rnumber> -int NSVEparticles<rnumber>::initialize(void) -{ - this->NSVE<rnumber>::initialize(); - - this->ps = particles_system_builder( - this->fs->cvelocity, // (field object) - this->fs->kk, // (kspace object, contains dkx, dky, dkz) - tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) - (long long int)nparticles, // to check coherency between parameters and hdf input file - this->fs->get_current_fname(), // particles input filename - std::string("/tracers0/state/") + std::to_string(this->fs->iteration), // dataset name for initial input - std::string("/tracers0/rhs/") + std::to_string(this->fs->iteration), // dataset name for initial input - tracers0_neighbours, // parameter (interpolation no neighbours) - tracers0_smoothness, // parameter - this->comm, - this->fs->iteration+1); - this->particles_output_writer_mpi = new particles_output_hdf5< - long long int, double, 3, 3>( - MPI_COMM_WORLD, - "tracers0", - nparticles, - tracers0_integration_steps); - return EXIT_SUCCESS; -} - -template <typename rnumber> -int NSVEparticles<rnumber>::step(void) -{ - this->fs->compute_velocity(this->fs->cvorticity); - this->fs->cvelocity->ift(); - this->ps->completeLoop(this->dt); - this->NSVE<rnumber>::step(); - return EXIT_SUCCESS; -} - -template <typename rnumber> -int NSVEparticles<rnumber>::write_checkpoint(void) -{ - this->NSVE<rnumber>::write_checkpoint(); - this->particles_output_writer_mpi->open_file(this->fs->get_current_fname()); - this->particles_output_writer_mpi->save( - this->ps->getParticlesPositions(), - this->ps->getParticlesRhs(), - this->ps->getParticlesIndexes(), - this->ps->getLocalNbParticles(), - this->fs->iteration); - this->particles_output_writer_mpi->close_file(); - return EXIT_SUCCESS; -} - -template <typename rnumber> -int NSVEparticles<rnumber>::finalize(void) -{ - this->NSVE<rnumber>::finalize(); - this->ps.release(); - delete this->particles_output_writer_mpi; - return EXIT_SUCCESS; -} - -/** \brief Compute fluid stats and sample fields at particle locations. - */ - -template <typename rnumber> -int NSVEparticles<rnumber>::do_stats() -{ - /// fluid stats go here - this->NSVE<rnumber>::do_stats(); - - - if (!(this->iteration % this->niter_part == 0)) - return EXIT_SUCCESS; - - /// sample velocity - sample_from_particles_system(*this->tmp_vec_field, // field to save - this->ps, - (this->simname + "_particles.h5"), // filename - "tracers0", // hdf5 parent group - "velocity" // dataset basename TODO - ); - - /// compute acceleration and sample it - this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field); - this->tmp_vec_field->ift(); - sample_from_particles_system(*this->tmp_vec_field, - this->ps, - (this->simname + "_particles.h5"), - "tracers0", - "acceleration"); - - return EXIT_SUCCESS; -} - -template class NSVEparticles<float>; -template class NSVEparticles<double>; - diff --git a/bfps/cpp/full_code/NSVEparticles_no_output.hpp b/bfps/cpp/full_code/NSVEparticles_no_output.hpp deleted file mode 100644 index 264fd75ac9b0628aff167d018d888030b7029a35..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/NSVEparticles_no_output.hpp +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef NSVEPARTICLES_NO_OUTPUT_HPP -#define NSVEPARTICLES_NO_OUTPUT_HPP - -#include "full_code/NSVEparticles.hpp" - -template <typename rnumber> -class NSVEparticles_no_output: public NSVEparticles<rnumber> -{ - public: - NSVEparticles_no_output( - const MPI_Comm COMMUNICATOR, - const std::string &simulation_name): - NSVEparticles<rnumber>( - COMMUNICATOR, - simulation_name){} - ~NSVEparticles_no_output(){} - int write_checkpoint(void) - { - return 0; - } - int read_parameters(void); -}; - -#endif//NSVEPARTICLES_NO_OUTPUT_HPP - diff --git a/bfps/cpp/full_code/code_base.cpp b/bfps/cpp/full_code/code_base.cpp deleted file mode 100644 index 1b06fe8e66a4180034b9f6a494a1a432ae5ea3f9..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/code_base.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "code_base.hpp" -#include "scope_timer.hpp" - -code_base::code_base( - const MPI_Comm COMMUNICATOR, - const std::string &simulation_name): - comm(COMMUNICATOR), - simname(simulation_name) -{ - MPI_Comm_rank(this->comm, &this->myrank); - MPI_Comm_size(this->comm, &this->nprocs); - this->stop_code_now = false; -} - -int code_base::check_stopping_condition(void) -{ - if (myrank == 0) - { - std::string fname = ( - std::string("stop_") + - std::string(this->simname)); - { - struct stat file_buffer; - this->stop_code_now = ( - stat(fname.c_str(), &file_buffer) == 0); - } - } - MPI_Bcast( - &this->stop_code_now, - 1, - MPI_C_BOOL, - 0, - MPI_COMM_WORLD); - return EXIT_SUCCESS; -} - diff --git a/bfps/cpp/full_code/codes_with_no_output.hpp b/bfps/cpp/full_code/codes_with_no_output.hpp deleted file mode 100644 index f4cd3b5495ecb432653a7027bcaa330954865d21..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/codes_with_no_output.hpp +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CODES_WITH_NO_OUTPUT_HPP -#define CODES_WITH_NO_OUTPUT_HPP - -#include "full_code/NSVE_no_output.hpp" -#include "full_code/NSVEparticles_no_output.hpp" - - -#endif//CODES_WITH_NO_OUTPUT_HPP - diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.cpp b/bfps/cpp/full_code/native_binary_to_hdf5.cpp deleted file mode 100644 index 7774e2dea9012394c389858038e8ca82674256d7..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/native_binary_to_hdf5.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include <string> -#include <cmath> -#include "native_binary_to_hdf5.hpp" -#include "scope_timer.hpp" - - -template <typename rnumber> -int native_binary_to_hdf5<rnumber>::initialize(void) -{ - this->read_parameters(); - this->vec_field = new field<rnumber, FFTW, THREE>( - nx, ny, nz, - this->comm, - DEFAULT_FFTW_FLAG); - this->vec_field->real_space_representation = false; - this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>( - this->vec_field->clayout->sizes, - this->vec_field->clayout->subsizes, - this->vec_field->clayout->starts, - this->vec_field->clayout->comm); - return EXIT_SUCCESS; -} - -template <typename rnumber> -int native_binary_to_hdf5<rnumber>::work_on_current_iteration(void) -{ - char itername[16]; - sprintf(itername, "i%.5x", this->iteration); - std::string native_binary_fname = ( - this->simname + - std::string("_cvorticity_") + - std::string(itername)); - this->bin_IO->read( - native_binary_fname, - this->vec_field->get_cdata()); - this->vec_field->io( - (native_binary_fname + - std::string(".h5")), - "vorticity", - this->iteration, - false); - return EXIT_SUCCESS; -} - -template <typename rnumber> -int native_binary_to_hdf5<rnumber>::finalize(void) -{ - delete this->bin_IO; - delete this->vec_field; - return EXIT_SUCCESS; -} - -template <typename rnumber> -int native_binary_to_hdf5<rnumber>::read_parameters(void) -{ - this->postprocess::read_parameters(); - hid_t parameter_file = H5Fopen( - (this->simname + std::string(".h5")).c_str(), - H5F_ACC_RDONLY, - H5P_DEFAULT); - this->iteration_list = hdf5_tools::read_vector<int>( - parameter_file, - "/native_binary_to_hdf5/iteration_list"); - H5Fclose(parameter_file); - return EXIT_SUCCESS; -} - -template class native_binary_to_hdf5<float>; -template class native_binary_to_hdf5<double>; - diff --git a/bfps/cpp/full_code/postprocess.cpp b/bfps/cpp/full_code/postprocess.cpp deleted file mode 100644 index edb5929f72c5197c123f8f4e20d426ca1ad9eb6f..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/postprocess.cpp +++ /dev/null @@ -1,94 +0,0 @@ -#include <cstdlib> -#include <sys/types.h> -#include <sys/stat.h> -#include "scope_timer.hpp" -#include "hdf5_tools.hpp" -#include "full_code/postprocess.hpp" - - -int postprocess::main_loop(void) -{ - this->start_simple_timer(); - for (unsigned int iteration_counter = 0; - iteration_counter < iteration_list.size(); - iteration_counter++) - { - this->iteration = iteration_list[iteration_counter]; - #ifdef USE_TIMINGOUTPUT - const std::string loopLabel = ("postprocess::main_loop-" + - std::to_string(this->iteration)); - TIMEZONE(loopLabel.c_str()); - #endif - this->work_on_current_iteration(); - this->print_simple_timer( - "iteration " + std::to_string(this->iteration)); - - this->check_stopping_condition(); - if (this->stop_code_now) - break; - } - return EXIT_SUCCESS; -} - - -int postprocess::read_parameters() -{ - hid_t parameter_file; - hid_t dset, memtype, space; - char fname[256]; - char *string_data; - sprintf(fname, "%s.h5", this->simname.c_str()); - parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); - dset = H5Dopen(parameter_file, "/parameters/dealias_type", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dealias_type); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/dkx", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkx); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/dky", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dky); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/dkz", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkz); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/dt", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dt); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/famplitude", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->famplitude); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/fk0", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fk0); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/fk1", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fk1); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/fmode", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fmode); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/forcing_type", H5P_DEFAULT); - space = H5Dget_space(dset); - memtype = H5Dget_type(dset); - string_data = (char*)malloc(256); - H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); - sprintf(this->forcing_type, "%s", string_data); - free(string_data); - H5Sclose(space); - H5Tclose(memtype); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/nu", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nu); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/nx", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nx); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/ny", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->ny); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/nz", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nz); - H5Dclose(dset); - H5Fclose(parameter_file); - return 0; -} - diff --git a/bfps/cpp/full_code/test.cpp b/bfps/cpp/full_code/test.cpp deleted file mode 100644 index 4f7a402c44c2a2999975881929c2582107897c5c..0000000000000000000000000000000000000000 --- a/bfps/cpp/full_code/test.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include <cstdlib> -#include <sys/types.h> -#include <sys/stat.h> -#include "scope_timer.hpp" -#include "hdf5_tools.hpp" -#include "full_code/test.hpp" - - -int test::main_loop(void) -{ - #ifdef USE_TIMINGOUTPUT - TIMEZONE("test::main_loop"); - #endif - this->start_simple_timer(); - this->do_work(); - this->print_simple_timer( - "do_work required " + std::to_string(this->iteration)); - return EXIT_SUCCESS; -} - - -int test::read_parameters() -{ - hid_t parameter_file; - hid_t dset, memtype, space; - char fname[256]; - char *string_data; - sprintf(fname, "%s.h5", this->simname.c_str()); - parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); - dset = H5Dopen(parameter_file, "/parameters/dealias_type", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dealias_type); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/dkx", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkx); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/dky", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dky); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/dkz", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkz); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/nx", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nx); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/ny", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->ny); - H5Dclose(dset); - dset = H5Dopen(parameter_file, "/parameters/nz", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nz); - H5Dclose(dset); - H5Fclose(parameter_file); - return 0; -} - diff --git a/bfps/cpp/interpolator.cpp b/bfps/cpp/interpolator.cpp deleted file mode 100644 index a0b38c4059585cc7fd58ab830b792be4f8bc193d..0000000000000000000000000000000000000000 --- a/bfps/cpp/interpolator.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include "interpolator.hpp" - -template <class rnumber, int interp_neighbours> -interpolator<rnumber, interp_neighbours>::interpolator( - fluid_solver_base<rnumber> *fs, - base_polynomial_values BETA_POLYS, - ...) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) -{ - int tdims[4]; - this->compute_beta = BETA_POLYS; - tdims[0] = (interp_neighbours+1)*2*this->descriptor->nprocs + this->descriptor->sizes[0]; - tdims[1] = this->descriptor->sizes[1]; - tdims[2] = this->descriptor->sizes[2]+2; - tdims[3] = this->descriptor->sizes[3]; - this->buffered_descriptor = new field_descriptor<rnumber>( - 4, tdims, - this->descriptor->mpi_dtype, - this->descriptor->comm); - this->buffer_size = (interp_neighbours+1)*this->buffered_descriptor->slice_size; - this->field = new rnumber[this->buffered_descriptor->local_size]; -} - -template <class rnumber, int interp_neighbours> -interpolator<rnumber, interp_neighbours>::~interpolator() -{ - delete[] this->field; - delete this->buffered_descriptor; -} - -template <class rnumber, int interp_neighbours> -int interpolator<rnumber, interp_neighbours>::read_rFFTW(const void *void_src) -{ - rnumber *src = (rnumber*)void_src; - rnumber *dst = this->field; - /* do big copy of middle stuff */ - std::copy(src, - src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0], - dst + this->buffer_size); - MPI_Datatype MPI_RNUM = (sizeof(rnumber) == 4) ? MPI_FLOAT : MPI_DOUBLE; - int rsrc; - /* get upper slices */ - for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++) - { - rsrc = this->descriptor->rank[(this->descriptor->all_start0[rdst] + - this->descriptor->all_size0[rdst]) % - this->descriptor->sizes[0]]; - if (this->descriptor->myrank == rsrc) - MPI_Send( - src, - this->buffer_size, - MPI_RNUM, - rdst, - 2*(rsrc*this->descriptor->nprocs + rdst), - this->buffered_descriptor->comm); - if (this->descriptor->myrank == rdst) - MPI_Recv( - dst + this->buffer_size + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0], - this->buffer_size, - MPI_RNUM, - rsrc, - 2*(rsrc*this->descriptor->nprocs + rdst), - this->buffered_descriptor->comm, - MPI_STATUS_IGNORE); - } - /* get lower slices */ - for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++) - { - rsrc = this->descriptor->rank[MOD(this->descriptor->all_start0[rdst] - 1, - this->descriptor->sizes[0])]; - if (this->descriptor->myrank == rsrc) - MPI_Send( - src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0] - this->buffer_size, - this->buffer_size, - MPI_RNUM, - rdst, - 2*(rsrc*this->descriptor->nprocs + rdst)+1, - this->descriptor->comm); - if (this->descriptor->myrank == rdst) - MPI_Recv( - dst, - this->buffer_size, - MPI_RNUM, - rsrc, - 2*(rsrc*this->descriptor->nprocs + rdst)+1, - this->descriptor->comm, - MPI_STATUS_IGNORE); - } - return EXIT_SUCCESS; -} - -template <class rnumber, int interp_neighbours> -void interpolator<rnumber, interp_neighbours>::sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv) -{ - /* get grid coordinates */ - int *xg = new int[3*nparticles]; - double *xx = new double[3*nparticles]; - double *yy = new double[3*nparticles]; - std::fill_n(yy, 3*nparticles, 0.0); - this->get_grid_coordinates(nparticles, pdimension, x, xg, xx); - /* perform interpolation */ - for (int p=0; p<nparticles; p++) - if (this->descriptor->rank[MOD(xg[p*3+2], this->descriptor->sizes[0])] == this->descriptor->myrank) - this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); - MPI_Allreduce( - yy, - y, - 3*nparticles, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - delete[] yy; - delete[] xg; - delete[] xx; -} - -template <class rnumber, int interp_neighbours> -void interpolator<rnumber, interp_neighbours>::operator()( - const int *xg, - const double *xx, - double *__restrict__ dest, - const int *deriv) -{ - double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; - if (deriv == NULL) - { - this->compute_beta(0, xx[0], bx); - this->compute_beta(0, xx[1], by); - this->compute_beta(0, xx[2], bz); - } - else - { - this->compute_beta(deriv[0], xx[0], bx); - this->compute_beta(deriv[1], xx[1], by); - this->compute_beta(deriv[2], xx[2], bz); - } - std::fill_n(dest, 3, 0); - ptrdiff_t bigiz, bigiy, bigix; - for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) - { - bigiz = ptrdiff_t(xg[2]+iz)-this->descriptor->starts[0]; - for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++) - { - bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1])); - for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++) - { - bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2])); - ptrdiff_t tindex = ((bigiz *this->buffered_descriptor->sizes[1] + - bigiy)*this->buffered_descriptor->sizes[2] + - bigix)*3 + this->buffer_size; - for (int c=0; c<3; c++) - { - dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]* - by[iy+interp_neighbours]* - bx[ix+interp_neighbours]); - } - } - } - } -} - -template class interpolator<float, 1>; -template class interpolator<float, 2>; -template class interpolator<float, 3>; -template class interpolator<float, 4>; -template class interpolator<float, 5>; -template class interpolator<float, 6>; -template class interpolator<float, 7>; -template class interpolator<float, 8>; -template class interpolator<float, 9>; -template class interpolator<float, 10>; -template class interpolator<double, 1>; -template class interpolator<double, 2>; -template class interpolator<double, 3>; -template class interpolator<double, 4>; -template class interpolator<double, 5>; -template class interpolator<double, 6>; -template class interpolator<double, 7>; -template class interpolator<double, 8>; -template class interpolator<double, 9>; -template class interpolator<double, 10>; - diff --git a/bfps/cpp/interpolator_base.cpp b/bfps/cpp/interpolator_base.cpp deleted file mode 100644 index 668a965c65744ac5aae31afb6bee05711a433657..0000000000000000000000000000000000000000 --- a/bfps/cpp/interpolator_base.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cmath> -#include "interpolator_base.hpp" - -template <class rnumber, int interp_neighbours> -interpolator_base<rnumber, interp_neighbours>::interpolator_base( - fluid_solver_base<rnumber> *fs, - base_polynomial_values BETA_POLYS) -{ - this->descriptor = fs->rd; - this->compute_beta = BETA_POLYS; - - // compute dx, dy, dz; - this->dx = 4*acos(0) / (fs->dkx*this->descriptor->sizes[2]); - this->dy = 4*acos(0) / (fs->dky*this->descriptor->sizes[1]); - this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]); -} - -template <class rnumber, int interp_neighbours> -interpolator_base<rnumber, interp_neighbours>::interpolator_base( - vorticity_equation<rnumber, FFTW> *fs, - base_polynomial_values BETA_POLYS) -{ -// this->descriptor = fs->rd; -// this->compute_beta = BETA_POLYS; -// -// // compute dx, dy, dz; -// this->dx = 4*acos(0) / (fs->kk->dkx*this->descriptor->sizes[2]); -// this->dy = 4*acos(0) / (fs->kk->dky*this->descriptor->sizes[1]); -// this->dz = 4*acos(0) / (fs->kk->dkz*this->descriptor->sizes[0]); -} - -template <class rnumber, int interp_neighbours> -void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates( - const int nparticles, - const int pdimension, - const double *x, - int *xg, - double *xx) -{ - for (int p=0; p<nparticles; p++) - this->get_grid_coordinates( - x + p*pdimension, - xg + p*3, - xx + p*3); -} - -template <class rnumber, int interp_neighbours> -void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates( - const double *x, - int *xg, - double *xx) -{ - static double grid_size[] = {this->dx, this->dy, this->dz}; - double tval; - for (int c=0; c<3; c++) - { - tval = floor(x[c]/grid_size[c]); - xg[c] = MOD(int(tval), this->descriptor->sizes[2-c]); - xx[c] = (x[c] - tval*grid_size[c]) / grid_size[c]; - } -} - - - -template class interpolator_base<float, 1>; -template class interpolator_base<float, 2>; -template class interpolator_base<float, 3>; -template class interpolator_base<float, 4>; -template class interpolator_base<float, 5>; -template class interpolator_base<float, 6>; -template class interpolator_base<float, 7>; -template class interpolator_base<float, 8>; -template class interpolator_base<float, 9>; -template class interpolator_base<float, 10>; -template class interpolator_base<double, 1>; -template class interpolator_base<double, 2>; -template class interpolator_base<double, 3>; -template class interpolator_base<double, 4>; -template class interpolator_base<double, 5>; -template class interpolator_base<double, 6>; -template class interpolator_base<double, 7>; -template class interpolator_base<double, 8>; -template class interpolator_base<double, 9>; -template class interpolator_base<double, 10>; - diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp deleted file mode 100644 index f4c28db7b9de632e8ec4977dd67f929f06080e19..0000000000000000000000000000000000000000 --- a/bfps/cpp/interpolator_base.hpp +++ /dev/null @@ -1,114 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include "fluid_solver_base.hpp" -#include "vorticity_equation.hpp" -#include "spline_n1.hpp" -#include "spline_n2.hpp" -#include "spline_n3.hpp" -#include "spline_n4.hpp" -#include "spline_n5.hpp" -#include "spline_n6.hpp" -#include "spline_n7.hpp" -#include "spline_n8.hpp" -#include "spline_n9.hpp" -#include "spline_n10.hpp" -#include "Lagrange_polys.hpp" - -#ifndef INTERPOLATOR_BASE - -#define INTERPOLATOR_BASE - -typedef void (*base_polynomial_values)( - const int derivative, - const double fraction, - double *__restrict__ destination); - -template <class rnumber, int interp_neighbours> -class interpolator_base -{ - public: - /* pointer to polynomial function */ - base_polynomial_values compute_beta; - - /* descriptor of field to interpolate */ - field_descriptor<rnumber> *descriptor; - - /* physical parameters of field */ - double dx, dy, dz; - - interpolator_base( - fluid_solver_base<rnumber> *FSOLVER, - base_polynomial_values BETA_POLYS); - - interpolator_base( - vorticity_equation<rnumber, FFTW> *FSOLVER, - base_polynomial_values BETA_POLYS); - virtual ~interpolator_base(){} - - /* may not destroy input */ - virtual int read_rFFTW(const void *src) = 0; - - /* map real locations to grid coordinates */ - void get_grid_coordinates( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - int *__restrict__ xg, - double *__restrict__ xx); - void get_grid_coordinates( - const double *__restrict__ x, - int *__restrict__ xg, - double *__restrict__ xx); - /* interpolate field at an array of locations */ - virtual void sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv = NULL) = 0; - /* interpolate 1 point */ - virtual void operator()( - const int *__restrict__ xg, - const double *__restrict__ xx, - double *__restrict__ dest, - const int *deriv = NULL) = 0; - - /* interpolate 1 point */ - inline void operator()( - const double *__restrict__ x, - double *__restrict__ dest, - const int *deriv = NULL) - { - int xg[3]; - double xx[3]; - this->get_grid_coordinates(x, xg, xx); - (*this)(xg, xx, dest, deriv); - } -}; - -#endif//INTERPOLATOR_BASE - diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp deleted file mode 100644 index cdaf157cb912c3074faf84bfecf1d9b3752c78a7..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles.cpp +++ /dev/null @@ -1,254 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cmath> -#include <cassert> -#include <cstring> -#include <string> -#include <sstream> - -#include "base.hpp" -#include "particles.hpp" -#include "fftw_tools.hpp" - - -extern int myrank, nprocs; - -template <particle_types particle_type, class rnumber, int interp_neighbours> -particles<particle_type, rnumber, interp_neighbours>::particles( - const char *NAME, - const hid_t data_file_id, - interpolator_base<rnumber, interp_neighbours> *VEL, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS) : particles_io_base<particle_type>( - NAME, - TRAJ_SKIP, - data_file_id, - VEL->descriptor->comm) -{ - assert((INTEGRATION_STEPS <= 6) && - (INTEGRATION_STEPS >= 1)); - this->vel = VEL; - this->integration_steps = INTEGRATION_STEPS; - this->array_size = this->nparticles * state_dimension(particle_type); - this->state = new double[this->array_size]; - std::fill_n(this->state, this->array_size, 0.0); - for (int i=0; i < this->integration_steps; i++) - { - this->rhs[i] = new double[this->array_size]; - std::fill_n(this->rhs[i], this->array_size, 0.0); - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -particles<particle_type, rnumber, interp_neighbours>::~particles() -{ - delete[] this->state; - for (int i=0; i < this->integration_steps; i++) - { - delete[] this->rhs[i]; - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void particles<particle_type, rnumber, interp_neighbours>::get_rhs(double *x, double *y) -{ - switch(particle_type) - { - case VELOCITY_TRACER: - this->vel->sample(this->nparticles, state_dimension(particle_type), x, y); - break; - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void particles<particle_type, rnumber, interp_neighbours>::roll_rhs() -{ - for (int i=this->integration_steps-2; i>=0; i--) - std::copy(this->rhs[i], - this->rhs[i] + this->array_size, - this->rhs[i+1]); -} - - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( - const int nsteps) -{ - ptrdiff_t ii; - this->get_rhs(this->state, this->rhs[0]); - switch(nsteps) - { - case 1: - for (unsigned int p=0; p<this->nparticles; p++) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - { - ii = p*state_dimension(particle_type)+i; - this->state[ii] += this->dt*this->rhs[0][ii]; - } - break; - case 2: - for (unsigned int p=0; p<this->nparticles; p++) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - { - ii = p*state_dimension(particle_type)+i; - this->state[ii] += this->dt*(3*this->rhs[0][ii] - - this->rhs[1][ii])/2; - } - break; - case 3: - for (unsigned int p=0; p<this->nparticles; p++) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - { - ii = p*state_dimension(particle_type)+i; - this->state[ii] += this->dt*(23*this->rhs[0][ii] - - 16*this->rhs[1][ii] - + 5*this->rhs[2][ii])/12; - } - break; - case 4: - for (unsigned int p=0; p<this->nparticles; p++) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - { - ii = p*state_dimension(particle_type)+i; - this->state[ii] += this->dt*(55*this->rhs[0][ii] - - 59*this->rhs[1][ii] - + 37*this->rhs[2][ii] - - 9*this->rhs[3][ii])/24; - } - break; - case 5: - for (unsigned int p=0; p<this->nparticles; p++) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - { - ii = p*state_dimension(particle_type)+i; - this->state[ii] += this->dt*(1901*this->rhs[0][ii] - - 2774*this->rhs[1][ii] - + 2616*this->rhs[2][ii] - - 1274*this->rhs[3][ii] - + 251*this->rhs[4][ii])/720; - } - break; - case 6: - for (unsigned int p=0; p<this->nparticles; p++) - for (unsigned int i=0; i<state_dimension(particle_type); i++) - { - ii = p*state_dimension(particle_type)+i; - this->state[ii] += this->dt*(4277*this->rhs[0][ii] - - 7923*this->rhs[1][ii] - + 9982*this->rhs[2][ii] - - 7298*this->rhs[3][ii] - + 2877*this->rhs[4][ii] - - 475*this->rhs[5][ii])/1440; - } - break; - } - this->roll_rhs(); -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void particles<particle_type, rnumber, interp_neighbours>::step() -{ - this->AdamsBashforth((this->iteration < this->integration_steps) ? - this->iteration+1 : - this->integration_steps); - this->iteration++; -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void particles<particle_type, rnumber, interp_neighbours>::read() -{ - if (this->myrank == 0) - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - this->read_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type)); - if (this->iteration > 0) - for (int i=0; i<this->integration_steps; i++) - this->read_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type)); - } - MPI_Bcast( - this->state, - this->array_size, - MPI_DOUBLE, - 0, - this->comm); - if (this->iteration > 0) - for (int i = 0; i<this->integration_steps; i++) - MPI_Bcast( - this->rhs[i], - this->array_size, - MPI_DOUBLE, - 0, - this->comm); -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void particles<particle_type, rnumber, interp_neighbours>::write( - const bool write_rhs) -{ - if (this->myrank == 0) - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - this->write_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type)); - if (write_rhs) - for (int i=0; i<this->integration_steps; i++) - this->write_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type)); - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void particles<particle_type, rnumber, interp_neighbours>::sample( - interpolator_base<rnumber, interp_neighbours> *field, - const char *dset_name) -{ - double *y = new double[this->nparticles*3]; - field->sample(this->nparticles, state_dimension(particle_type), this->state, y); - if (this->myrank == 0) - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - this->write_point3D_chunk(dset_name, cindex, y+cindex*this->chunk_size*3); - delete[] y; -} - - -/*****************************************************************************/ -template class particles<VELOCITY_TRACER, float, 1>; -template class particles<VELOCITY_TRACER, float, 2>; -template class particles<VELOCITY_TRACER, float, 3>; -template class particles<VELOCITY_TRACER, float, 4>; -template class particles<VELOCITY_TRACER, float, 5>; -template class particles<VELOCITY_TRACER, float, 6>; -template class particles<VELOCITY_TRACER, double, 1>; -template class particles<VELOCITY_TRACER, double, 2>; -template class particles<VELOCITY_TRACER, double, 3>; -template class particles<VELOCITY_TRACER, double, 4>; -template class particles<VELOCITY_TRACER, double, 5>; -template class particles<VELOCITY_TRACER, double, 6>; -/*****************************************************************************/ diff --git a/bfps/cpp/particles.hpp b/bfps/cpp/particles.hpp deleted file mode 100644 index 03daf3e3fc866ac485b3649a28dfb13cf1b50ff1..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles.hpp +++ /dev/null @@ -1,99 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <hdf5.h> -#include "base.hpp" -#include "particles_base.hpp" -#include "fluid_solver_base.hpp" -#include "interpolator_base.hpp" - -#ifndef PARTICLES - -#define PARTICLES - -template <particle_types particle_type, class rnumber, int interp_neighbours> -class particles: public particles_io_base<particle_type> -{ - private: - double *state; - double *rhs[6]; - - public: - int array_size; - int integration_steps; - interpolator_base<rnumber, interp_neighbours> *vel; - - /* simulation parameters */ - double dt; - - /* methods */ - - /* constructor and destructor. - * allocate and deallocate: - * this->state - * this->rhs - * */ - particles( - const char *NAME, - const hid_t data_file_id, - interpolator_base<rnumber, interp_neighbours> *FIELD, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS = 2); - ~particles(); - - void sample( - interpolator_base<rnumber, interp_neighbours> *field, - const char *dset_name); - - inline void sample( - interpolator_base<rnumber, interp_neighbours> *field, - double *y) - { - field->sample(this->nparticles, state_dimension(particle_type), this->state, y); - } - - void get_rhs( - double *__restrict__ x, - double *__restrict__ rhs); - - /* input/output */ - void read(); - void write( - const char *dset_name, - const double *data); - void write(const bool write_rhs = true); - - /* solvers */ - void step(); - void roll_rhs(); - void AdamsBashforth(const int nsteps); -}; - -#endif//PARTICLES - diff --git a/bfps/cpp/particles/abstract_particles_input.hpp b/bfps/cpp/particles/abstract_particles_input.hpp deleted file mode 100644 index 77dcbc638903a668ce6e2a0084815832b0580495..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles/abstract_particles_input.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef ABSTRACT_PARTICLES_INPUT_HPP -#define ABSTRACT_PARTICLES_INPUT_HPP - -#include <tuple> - -template <class partsize_t, class real_number> -class abstract_particles_input { -public: - virtual ~abstract_particles_input(){} - - virtual partsize_t getTotalNbParticles() = 0; - virtual partsize_t getLocalNbParticles() = 0; - virtual int getNbRhs() = 0; - - virtual std::unique_ptr<real_number[]> getMyParticles() = 0; - virtual std::unique_ptr<partsize_t[]> getMyParticlesIndexes() = 0; - virtual std::vector<std::unique_ptr<real_number[]>> getMyRhs() = 0; -}; - - -#endif diff --git a/bfps/cpp/particles/abstract_particles_system.hpp b/bfps/cpp/particles/abstract_particles_system.hpp deleted file mode 100644 index 1c8592f37536e5c6c6b4df8f45cc855b3f21eb3f..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles/abstract_particles_system.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef ABSTRACT_PARTICLES_SYSTEM_HPP -#define ABSTRACT_PARTICLES_SYSTEM_HPP - -#include <memory> - -//- Not generic to enable sampling begin -#include "field.hpp" -#include "kspace.hpp" -//- Not generic to enable sampling end - - -template <class partsize_t, class real_number> -class abstract_particles_system { -public: - virtual void compute() = 0; - - virtual void move(const real_number dt) = 0; - - virtual void redistribute() = 0; - - virtual void inc_step_idx() = 0; - - virtual void shift_rhs_vectors() = 0; - - virtual void completeLoop(const real_number dt) = 0; - - virtual const real_number* getParticlesPositions() const = 0; - - virtual const std::unique_ptr<real_number[]>* getParticlesRhs() const = 0; - - virtual const partsize_t* getParticlesIndexes() const = 0; - - virtual partsize_t getLocalNbParticles() const = 0; - - virtual partsize_t getGlobalNbParticles() const = 0; - - virtual int getNbRhs() const = 0; - - virtual int get_step_idx() const = 0; - - //- Not generic to enable sampling begin - virtual void sample_compute_field(const field<float, FFTW, ONE>& sample_field, - real_number sample_rhs[]) = 0; - virtual void sample_compute_field(const field<float, FFTW, THREE>& sample_field, - real_number sample_rhs[]) = 0; - virtual void sample_compute_field(const field<float, FFTW, THREExTHREE>& sample_field, - real_number sample_rhs[]) = 0; - virtual void sample_compute_field(const field<double, FFTW, ONE>& sample_field, - real_number sample_rhs[]) = 0; - virtual void sample_compute_field(const field<double, FFTW, THREE>& sample_field, - real_number sample_rhs[]) = 0; - virtual void sample_compute_field(const field<double, FFTW, THREExTHREE>& sample_field, - real_number sample_rhs[]) = 0; - //- Not generic to enable sampling end -}; - -#endif diff --git a/bfps/cpp/particles/particles_output_sampling_hdf5.hpp b/bfps/cpp/particles/particles_output_sampling_hdf5.hpp deleted file mode 100644 index 238c9acf9a16db9c36b81d3c6eb6dc2388bbf117..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles/particles_output_sampling_hdf5.hpp +++ /dev/null @@ -1,188 +0,0 @@ -#ifndef PARTICLES_OUTPUT_SAMPLING_HDF5_HPP -#define PARTICLES_OUTPUT_SAMPLING_HDF5_HPP - -#include "abstract_particles_output.hpp" - -#include <hdf5.h> - -template <class partsize_t, - class real_number, - int size_particle_positions, - int size_particle_rhs> -class particles_output_sampling_hdf5 : public abstract_particles_output<partsize_t, - real_number, - size_particle_positions, - size_particle_rhs>{ - using Parent = abstract_particles_output<partsize_t, - real_number, - size_particle_positions, - size_particle_rhs>; - - hid_t file_id, pgroup_id; - - const std::string dataset_name; - const bool use_collective_io; - -public: - static bool DatasetExistsCol(MPI_Comm in_mpi_com, - const std::string& in_filename, - const std::string& in_groupname, - const std::string& in_dataset_name){ - int my_rank; - AssertMpi(MPI_Comm_rank(in_mpi_com, &my_rank)); - - int dataset_exists = -1; - - if(my_rank == 0){ - // Parallel HDF5 write - hid_t file_id = H5Fopen( - in_filename.c_str(), - H5F_ACC_RDWR | H5F_ACC_DEBUG, - H5P_DEFAULT); - assert(file_id >= 0); - - dataset_exists = H5Lexists( - file_id, - (in_groupname + "/" + in_dataset_name).c_str(), - H5P_DEFAULT); - - int retTest = H5Fclose(file_id); - assert(retTest >= 0); - } - - AssertMpi(MPI_Bcast( &dataset_exists, 1, MPI_INT, 0, in_mpi_com )); - return dataset_exists; - } - - particles_output_sampling_hdf5(MPI_Comm in_mpi_com, - const partsize_t inTotalNbParticles, - const std::string& in_filename, - const std::string& in_groupname, - const std::string& in_dataset_name, - const bool in_use_collective_io = false) - : Parent(in_mpi_com, inTotalNbParticles, 1), - dataset_name(in_dataset_name), - use_collective_io(in_use_collective_io){ - if(Parent::isInvolved()){ - hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS); - assert(plist_id_par >= 0); - int retTest = H5Pset_fapl_mpio( - plist_id_par, - Parent::getComWriter(), - MPI_INFO_NULL); - assert(retTest >= 0); - - // Parallel HDF5 write - file_id = H5Fopen( - in_filename.c_str(), - H5F_ACC_RDWR | H5F_ACC_DEBUG, - plist_id_par); - assert(file_id >= 0); - retTest = H5Pclose(plist_id_par); - assert(retTest >= 0); - - pgroup_id = H5Gopen( - file_id, - in_groupname.c_str(), - H5P_DEFAULT); - assert(pgroup_id >= 0); - } - } - - ~particles_output_sampling_hdf5(){ - if(Parent::isInvolved()){ - int retTest = H5Gclose(pgroup_id); - assert(retTest >= 0); - retTest = H5Fclose(file_id); - assert(retTest >= 0); - } - } - - void write( - const int /*idx_time_step*/, - const real_number* /*particles_positions*/, - const std::unique_ptr<real_number[]>* particles_rhs, - const partsize_t nb_particles, - const partsize_t particles_idx_offset) final{ - assert(Parent::isInvolved()); - - TIMEZONE("particles_output_hdf5::write"); - - assert(particles_idx_offset < Parent::getTotalNbParticles() || (particles_idx_offset == Parent::getTotalNbParticles() && nb_particles == 0)); - assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles()); - - static_assert(std::is_same<real_number, double>::value || - std::is_same<real_number, float>::value, - "real_number must be double or float"); - const hid_t type_id = (sizeof(real_number) == 8 ? H5T_NATIVE_DOUBLE : H5T_NATIVE_FLOAT); - - hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); - assert(plist_id >= 0); - { - int rethdf = H5Pset_dxpl_mpio(plist_id, use_collective_io ? H5FD_MPIO_COLLECTIVE : H5FD_MPIO_INDEPENDENT); - assert(rethdf >= 0); - } - { - assert(size_particle_rhs >= 0); - const hsize_t datacount[3] = {hsize_t(Parent::getNbRhs()), - hsize_t(Parent::getTotalNbParticles()), - hsize_t(size_particle_rhs)}; - hid_t dataspace = H5Screate_simple(3, datacount, NULL); - assert(dataspace >= 0); - - hid_t dataset_id = H5Dcreate( pgroup_id, - dataset_name.c_str(), - type_id, - dataspace, - H5P_DEFAULT, - H5P_DEFAULT, - H5P_DEFAULT); - assert(dataset_id >= 0); - - assert(particles_idx_offset >= 0); - const hsize_t count[3] = { - 1, - hsize_t(nb_particles), - hsize_t(size_particle_rhs)}; - const hsize_t offset[3] = { - 0, - hsize_t(particles_idx_offset), - 0}; - hid_t memspace = H5Screate_simple(3, count, NULL); - assert(memspace >= 0); - - hid_t filespace = H5Dget_space(dataset_id); - assert(filespace >= 0); - int rethdf = H5Sselect_hyperslab( - filespace, - H5S_SELECT_SET, - offset, - NULL, - count, - NULL); - assert(rethdf >= 0); - - herr_t status = H5Dwrite( - dataset_id, - type_id, - memspace, - filespace, - plist_id, - particles_rhs[0].get()); - assert(status >= 0); - rethdf = H5Sclose(filespace); - assert(rethdf >= 0); - rethdf = H5Sclose(memspace); - assert(rethdf >= 0); - rethdf = H5Dclose(dataset_id); - assert(rethdf >= 0); - } - - { - int rethdf = H5Pclose(plist_id); - assert(rethdf >= 0); - } - } -}; - -#endif diff --git a/bfps/cpp/particles/particles_sampling.hpp b/bfps/cpp/particles/particles_sampling.hpp deleted file mode 100644 index 3adc255341f3ca879d5cae1445124091f31b4394..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles/particles_sampling.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef PARTICLES_SAMPLING_HPP -#define PARTICLES_SAMPLING_HPP - -#include <memory> -#include <string> - -#include "abstract_particles_system.hpp" -#include "particles_output_sampling_hdf5.hpp" - -#include "field.hpp" -#include "kspace.hpp" - - -template <class partsize_t, class particles_rnumber, class rnumber, field_backend be, field_components fc> -void sample_from_particles_system(const field<rnumber, be, fc>& in_field, // a pointer to a field<rnumber, FFTW, fc> - std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>& ps, // a pointer to an particles_system<double> - const std::string& filename, - const std::string& parent_groupname, - const std::string& fname){ - const std::string datasetname = fname + std::string("/") + std::to_string(ps->get_step_idx()); - const int size_particle_rhs = ncomp(fc); - - // Stop here if already exists - if(particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3, size_particle_rhs>::DatasetExistsCol(MPI_COMM_WORLD, - filename, - parent_groupname, - datasetname)){ - return; - } - - const partsize_t nb_particles = ps->getLocalNbParticles(); - std::unique_ptr<particles_rnumber[]> sample_rhs(new particles_rnumber[size_particle_rhs*nb_particles]); - std::fill_n(sample_rhs.get(), size_particle_rhs*nb_particles, 0); - - ps->sample_compute_field(in_field, sample_rhs.get()); - - - - particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3, size_particle_rhs> outputclass(MPI_COMM_WORLD, - ps->getGlobalNbParticles(), - filename, - parent_groupname, - datasetname); - outputclass.save(ps->getParticlesPositions(), - &sample_rhs, - ps->getParticlesIndexes(), - ps->getLocalNbParticles(), - ps->get_step_idx()); -} - -#endif - diff --git a/bfps/cpp/particles_base.cpp b/bfps/cpp/particles_base.cpp deleted file mode 100644 index 1410488410a429ff463a1751e86f78cc2157679b..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles_base.cpp +++ /dev/null @@ -1,424 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <algorithm> -#include <cassert> -#include "particles_base.hpp" -#include "scope_timer.hpp" - -template <particle_types particle_type> -single_particle_state<particle_type>::single_particle_state() -{ - std::fill_n(this->data, state_dimension(particle_type), 0); -} - -template <particle_types particle_type> -single_particle_state<particle_type>::single_particle_state( - const single_particle_state<particle_type> &src) -{ - std::copy( - src.data, - src.data + state_dimension(particle_type), - this->data); -} - -template <particle_types particle_type> -single_particle_state<particle_type>::single_particle_state( - const double *src) -{ - std::copy( - src, - src + state_dimension(particle_type), - this->data); -} - -template <particle_types particle_type> -single_particle_state<particle_type>::~single_particle_state() -{ -} - -template <particle_types particle_type> -single_particle_state<particle_type> &single_particle_state<particle_type>::operator=( - const single_particle_state &src) -{ - std::copy( - src.data, - src.data + state_dimension(particle_type), - this->data); - return *this; -} - -template <particle_types particle_type> -single_particle_state<particle_type> &single_particle_state<particle_type>::operator=( - const double *src) -{ - std::copy( - src, - src + state_dimension(particle_type), - this->data); - return *this; -} - -int get_chunk_offsets( - std::vector<hsize_t> data_dims, - std::vector<hsize_t> chnk_dims, - std::vector<std::vector<hsize_t>> &co) -{ - TIMEZONE("get_chunk_offsets"); - std::vector<hsize_t> nchunks(data_dims); - int total_number_of_chunks = 1; - for (unsigned i=0; i<nchunks.size(); i++) - { - DEBUG_MSG("get_chunk_offset nchunks[%d] = %ld, chnk_dims[%d] = %ld\n", - i, nchunks[i], i, chnk_dims[i]); - nchunks[i] = data_dims[i] / chnk_dims[i]; - total_number_of_chunks *= nchunks[i]; - } - co.resize(total_number_of_chunks); - DEBUG_MSG("total number of chunks is %d\n", total_number_of_chunks); - for (int cindex=0; cindex < total_number_of_chunks; cindex++) - { - int cc = cindex; - for (unsigned i=0; i<nchunks.size(); i++) - { - int ii = nchunks.size()-1-i; - co[cindex].resize(nchunks.size()); - co[cindex][ii] = cc % nchunks[ii]; - cc = (cc - co[cindex][ii]) / nchunks[ii]; - co[cindex][ii] *= chnk_dims[ii]; - } - } - return EXIT_SUCCESS; -} - -template <particle_types particle_type> -particles_io_base<particle_type>::particles_io_base( - const char *NAME, - const int TRAJ_SKIP, - const hid_t data_file_id, - MPI_Comm COMM) -{ - TIMEZONE("particles_io_base::particles_io_base"); - this->name = std::string(NAME); - this->traj_skip = TRAJ_SKIP; - this->comm = COMM; - MPI_Comm_rank(COMM, &this->myrank); - MPI_Comm_size(COMM, &this->nprocs); - - if (this->myrank == 0) - { - hid_t dset, prop_list, dspace; - this->hdf5_group_id = H5Gopen(data_file_id, this->name.c_str(), H5P_DEFAULT); - dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT); - dspace = H5Dget_space(dset); - this->hdf5_state_dims.resize(H5Sget_simple_extent_ndims(dspace)); - H5Sget_simple_extent_dims(dspace, &this->hdf5_state_dims.front(), NULL); - assert(this->hdf5_state_dims[this->hdf5_state_dims.size()-1] == state_dimension(particle_type)); - this->nparticles = 1; - for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++) - this->nparticles *= this->hdf5_state_dims[i]; - prop_list = H5Dget_create_plist(dset); - this->hdf5_state_chunks.resize(this->hdf5_state_dims.size()); - H5Pget_chunk(prop_list, this->hdf5_state_dims.size(), &this->hdf5_state_chunks.front()); - H5Pclose(prop_list); - H5Sclose(dspace); - H5Dclose(dset); - this->chunk_size = 1; - for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++) - this->chunk_size *= this->hdf5_state_chunks[i]; - dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT); - dspace = H5Dget_space(dset); - this->hdf5_rhs_dims.resize(H5Sget_simple_extent_ndims(dspace)); - H5Sget_simple_extent_dims(dspace, &this->hdf5_rhs_dims.front(), NULL); - prop_list = H5Dget_create_plist(dset); - this->hdf5_rhs_chunks.resize(this->hdf5_rhs_dims.size()); - H5Pget_chunk(prop_list, this->hdf5_rhs_dims.size(), &this->hdf5_rhs_chunks.front()); - H5Pclose(prop_list); - H5Sclose(dspace); - H5Dclose(dset); - } - DEBUG_MSG("hello, rank 0 just read particle thingie\n"); - - int tmp; - tmp = this->hdf5_state_dims.size(); - MPI_Bcast( - &tmp, - 1, - MPI_INTEGER, - 0, - this->comm); - if (this->myrank != 0) - { - this->hdf5_state_dims.resize(tmp); - this->hdf5_state_chunks.resize(tmp); - } - DEBUG_MSG("successfully resized state_dims and state_chunks\n"); - MPI_Bcast( - &this->hdf5_state_dims.front(), - this->hdf5_state_dims.size(), - // hsize_t is in fact unsigned long long. Will this ever change...? - MPI_UNSIGNED_LONG_LONG, - 0, - this->comm); - MPI_Bcast( - &this->hdf5_state_chunks.front(), - this->hdf5_state_chunks.size(), - MPI_UNSIGNED_LONG_LONG, - 0, - this->comm); - DEBUG_MSG("successfully broadcasted state_dims and state_chunks\n"); - for (unsigned i=0; i<this->hdf5_state_chunks.size(); i++) - DEBUG_MSG( - "hdf5_state_dims[%d] = %ld, hdf5_state_chunks[%d] = %ld\n", - i, this->hdf5_state_dims[i], - i, this->hdf5_state_chunks[i] - ); - std::vector<hsize_t> tdims(this->hdf5_state_dims), tchnk(this->hdf5_state_chunks); - tdims.erase(tdims.begin()+0); - tchnk.erase(tchnk.begin()+0); - tdims.erase(tdims.end()-1); - tchnk.erase(tchnk.end()-1); - DEBUG_MSG("before get_chunk_offsets\n"); - get_chunk_offsets(tdims, tchnk, this->chunk_offsets); - DEBUG_MSG("after get_chunk_offsets\n"); - MPI_Bcast( - &this->chunk_size, - 1, - MPI_UNSIGNED, - 0, - this->comm); - MPI_Bcast( - &this->nparticles, - 1, - MPI_UNSIGNED, - 0, - this->comm); - DEBUG_MSG("nparticles = %d, chunk_size = %d\n", - this->nparticles, - this->chunk_size); - DEBUG_MSG("exiting particles_io_base constructor\n"); -} - -template <particle_types particle_type> -particles_io_base<particle_type>::~particles_io_base() -{ - if(this->myrank == 0) - H5Gclose(this->hdf5_group_id); -} - -template <particle_types particle_type> -void particles_io_base<particle_type>::read_state_chunk( - const int cindex, - double *data) -{ - TIMEZONE("particles_io_base::read_state_chunk"); - DEBUG_MSG("entered read_state_chunk\n"); - hid_t dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT); - hid_t rspace = H5Dget_space(dset); - std::vector<hsize_t> mem_dims(this->hdf5_state_chunks); - mem_dims[0] = 1; - hid_t mspace = H5Screate_simple( - this->hdf5_state_dims.size(), - &mem_dims.front(), - NULL); - hsize_t *offset = new hsize_t[this->hdf5_state_dims.size()]; - offset[0] = this->iteration / this->traj_skip; - for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++) - offset[i] = this->chunk_offsets[cindex][i-1]; - offset[this->hdf5_state_dims.size()-1] = 0; - H5Sselect_hyperslab( - rspace, - H5S_SELECT_SET, - offset, - NULL, - &mem_dims.front(), - NULL); - H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data); - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(dset); - delete[] offset; - DEBUG_MSG("exiting read_state_chunk\n"); -} - -template <particle_types particle_type> -void particles_io_base<particle_type>::write_state_chunk( - const int cindex, - const double *data) -{ - TIMEZONE("particles_io_base::write_state_chunk"); - hid_t dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT); - hid_t rspace = H5Dget_space(dset); - std::vector<hsize_t> mem_dims(this->hdf5_state_chunks); - mem_dims[0] = 1; - hid_t mspace = H5Screate_simple( - this->hdf5_state_dims.size(), - &mem_dims.front(), - NULL); - hsize_t *offset = new hsize_t[this->hdf5_state_dims.size()]; - offset[0] = this->iteration / this->traj_skip; - for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++) - offset[i] = this->chunk_offsets[cindex][i-1]; - offset[this->hdf5_state_dims.size()-1] = 0; - H5Sselect_hyperslab( - rspace, - H5S_SELECT_SET, - offset, - NULL, - &mem_dims.front(), - NULL); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data); - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(dset); - delete[] offset; -} - -template <particle_types particle_type> -void particles_io_base<particle_type>::read_rhs_chunk( - const int cindex, - const int rhsindex, - double *data) -{ - TIMEZONE("particles_io_base::read_rhs_chunk"); - //DEBUG_MSG("entered read_rhs_chunk\n"); - hid_t dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT); - hid_t rspace = H5Dget_space(dset); - std::vector<hsize_t> mem_dims(this->hdf5_rhs_chunks); - mem_dims[0] = 1; - mem_dims[1] = 1; - hid_t mspace = H5Screate_simple( - this->hdf5_rhs_dims.size(), - &mem_dims.front(), - NULL); - hsize_t *offset = new hsize_t[this->hdf5_rhs_dims.size()]; - offset[0] = this->hdf5_rhs_dims[0]-2; - offset[1] = rhsindex; - for (unsigned int i=2; i<this->hdf5_rhs_dims.size()-1; i++) - offset[i] = this->chunk_offsets[cindex][i-2]; - offset[this->hdf5_rhs_dims.size()-1] = 0; - //for (int i=0; i<this->hdf5_rhs_dims.size(); i++) - // DEBUG_MSG("rhs dim %d: size=%d chunk=%d offset=%d\n", - // i, this->hdf5_rhs_dims[i], this->hdf5_rhs_chunks[i], offset[i]); - H5Sselect_hyperslab( - rspace, - H5S_SELECT_SET, - offset, - NULL, - &mem_dims.front(), - NULL); - //DEBUG_MSG("selected hyperslab\n"); - H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data); - //DEBUG_MSG("data has been read\n"); - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(dset); - delete[] offset; - //DEBUG_MSG("exiting read_rhs_chunk\n"); -} - -template <particle_types particle_type> -void particles_io_base<particle_type>::write_rhs_chunk( - const int cindex, - const int rhsindex, - const double *data) -{ - TIMEZONE("particles_io_base::write_rhs_chunk"); - hid_t dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT); - hid_t rspace = H5Dget_space(dset); - std::vector<hsize_t> mem_dims(this->hdf5_rhs_chunks); - mem_dims[0] = 1; - mem_dims[1] = 1; - hid_t mspace = H5Screate_simple( - this->hdf5_rhs_dims.size(), - &mem_dims.front(), - NULL); - hsize_t *offset = new hsize_t[this->hdf5_rhs_dims.size()]; - offset[0] = this->hdf5_rhs_dims[0]-1; - offset[1] = rhsindex; - for (unsigned int i=2; i<this->hdf5_rhs_dims.size()-1; i++) - offset[i] = this->chunk_offsets[cindex][i-2]; - offset[this->hdf5_rhs_dims.size()-1] = 0; - DEBUG_MSG("rhs write offsets are %d %d %d %d\n", - offset[0], offset[1], offset[2], offset[3]); - H5Sselect_hyperslab( - rspace, - H5S_SELECT_SET, - offset, - NULL, - &mem_dims.front(), - NULL); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data); - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(dset); - delete[] offset; -} - -template <particle_types particle_type> -void particles_io_base<particle_type>::write_point3D_chunk( - const std::string dset_name, - const int cindex, - const double *data) -{ - TIMEZONE("particles_io_base::write_point3D_chunk"); - hid_t dset = H5Dopen(this->hdf5_group_id, dset_name.c_str(), H5P_DEFAULT); - hid_t rspace = H5Dget_space(dset); - std::vector<hsize_t> mem_dims(this->hdf5_state_chunks); - mem_dims[0] = 1; - mem_dims[mem_dims.size()-1] = 3; - hid_t mspace = H5Screate_simple( - this->hdf5_state_dims.size(), - &mem_dims.front(), - NULL); - hsize_t *offset = new hsize_t[this->hdf5_state_dims.size()]; - offset[0] = this->iteration / this->traj_skip; - for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++) - offset[i] = this->chunk_offsets[cindex][i-1]; - offset[this->hdf5_state_dims.size()-1] = 0; - H5Sselect_hyperslab( - rspace, - H5S_SELECT_SET, - offset, - NULL, - &mem_dims.front(), - NULL); - H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data); - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(dset); - delete[] offset; -} - -/*****************************************************************************/ -template class single_particle_state<POINT3D>; -template class single_particle_state<VELOCITY_TRACER>; - -template class particles_io_base<VELOCITY_TRACER>; -/*****************************************************************************/ - diff --git a/bfps/cpp/particles_base.hpp b/bfps/cpp/particles_base.hpp deleted file mode 100644 index 8afd5d439cdc121982868b5eadc991cdc1c5abdb..0000000000000000000000000000000000000000 --- a/bfps/cpp/particles_base.hpp +++ /dev/null @@ -1,136 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <vector> -#include <hdf5.h> -#include <unordered_map> -#include "interpolator_base.hpp" - -#ifndef PARTICLES_BASE - -#define PARTICLES_BASE - -/* particle types */ -enum particle_types {POINT3D, VELOCITY_TRACER}; - -/* space dimension */ -constexpr unsigned int state_dimension(particle_types particle_type) -{ - return ((particle_type == POINT3D) ? 3 : ( - (particle_type == VELOCITY_TRACER) ? 3 : - 3)); -} - -/* 1 particle state type */ - -template <particle_types particle_type> -class single_particle_state -{ - public: - double data[state_dimension(particle_type)]; - - single_particle_state(); - single_particle_state(const single_particle_state &src); - single_particle_state(const double *src); - ~single_particle_state(); - - single_particle_state<particle_type> &operator=(const single_particle_state &src); - single_particle_state<particle_type> &operator=(const double *src); - - inline double &operator[](const int i) - { - return this->data[i]; - } -}; - -std::vector<std::vector<hsize_t>> get_chunk_offsets( - std::vector<hsize_t> data_dims, - std::vector<hsize_t> chnk_dims); - -template <particle_types particle_type> -class particles_io_base -{ - protected: - int myrank, nprocs; - MPI_Comm comm; - - unsigned int nparticles; - - std::string name; - unsigned int chunk_size; - int traj_skip; - - hid_t hdf5_group_id; - std::vector<hsize_t> hdf5_state_dims, hdf5_state_chunks; - std::vector<hsize_t> hdf5_rhs_dims, hdf5_rhs_chunks; - - std::vector<std::vector<hsize_t>> chunk_offsets; - - particles_io_base( - const char *NAME, - const int TRAJ_SKIP, - const hid_t data_file_id, - MPI_Comm COMM); - virtual ~particles_io_base(); - - void read_state_chunk( - const int cindex, - double *__restrict__ data); - void write_state_chunk( - const int cindex, - const double *data); - void read_rhs_chunk( - const int cindex, - const int rhsindex, - double *__restrict__ data); - void write_rhs_chunk( - const int cindex, - const int rhsindex, - const double *data); - - void write_point3D_chunk( - const std::string dset_name, - const int cindex, - const double *data); - - public: - int iteration; - - inline const char *get_name() - { - return this->name.c_str(); - } - inline const unsigned int get_number_of_chunks() - { - return this->chunk_offsets.size(); - } - inline const unsigned int get_number_of_rhs_chunks(); - virtual void read() = 0; - virtual void write(const bool write_rhs = true) = 0; -}; - -#endif//PARTICLES_BASE - diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp deleted file mode 100644 index 265975f8c817a1b40942e076bd016c2921618bbc..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_distributed_particles.cpp +++ /dev/null @@ -1,804 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cmath> -#include <cassert> -#include <cstring> -#include <string> -#include <sstream> -#include <set> -#include <algorithm> -#include <ctime> - -#include "base.hpp" -#include "rFFTW_distributed_particles.hpp" -#include "fftw_tools.hpp" -#include "scope_timer.hpp" - - -extern int myrank, nprocs; - -template <particle_types particle_type, class rnumber, int interp_neighbours> -rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles( - const char *NAME, - const hid_t data_file_id, - rFFTW_interpolator<rnumber, interp_neighbours> *VEL, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS) : particles_io_base<particle_type>( - NAME, - TRAJ_SKIP, - data_file_id, - VEL->descriptor->comm) -{ - TIMEZONE("rFFTW_distributed_particles::rFFTW_distributed_particles"); - /* check that integration_steps has a valid value. - * If NDEBUG is defined, "assert" doesn't do anything. - * With NDEBUG defined, and an invalid INTEGRATION_STEPS, - * the particles will simply sit still. - * */ - assert((INTEGRATION_STEPS <= 6) && - (INTEGRATION_STEPS >= 1)); - /* check that the field layout is compatible with this class. - * if it's not, the code will fail in bad ways, most likely ending up - * with various CPUs locked in some MPI send/receive. - * therefore I prefer to just kill the code at this point, - * no matter whether or not NDEBUG is present. - * */ - if (interp_neighbours*2+2 > VEL->descriptor->subsizes[0]) - { - DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n" - "interp kernel size is %d, local_z_size is %d\n", - interp_neighbours*2+2, VEL->descriptor->subsizes[0]); - if (VEL->descriptor->myrank == 0) - std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl; - exit(0); - } - this->vel = VEL; - this->rhs.resize(INTEGRATION_STEPS); - this->integration_steps = INTEGRATION_STEPS; - /* the particles are expected to be evenly distributed among processes. - * therefore allocating twice that amount of memory seems enough. - * */ - this->state.reserve(2*this->nparticles / this->nprocs); - for (unsigned int i=0; i<this->rhs.size(); i++) - this->rhs[i].reserve(2*this->nparticles / this->nprocs); - - /* build communicators and stuff for interpolation */ - - /* number of processors per domain */ - this->domain_nprocs[-1] = 2; // domain in common with lower z CPU - this->domain_nprocs[ 0] = 1; // local domain - this->domain_nprocs[ 1] = 2; // domain in common with higher z CPU - - /* initialize domain bins */ - this->domain_particles[-1] = std::unordered_set<int>(); - this->domain_particles[ 0] = std::unordered_set<int>(); - this->domain_particles[ 1] = std::unordered_set<int>(); - this->domain_particles[-1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - this->domain_particles[ 1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - this->domain_particles[ 0].reserve(unsigned( - 1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)* - float(this->nparticles) / - this->nprocs)); - - int color, key; - MPI_Comm tmpcomm; - for (int rank=0; rank<this->nprocs; rank++) - { - color = MPI_UNDEFINED; - key = MPI_UNDEFINED; - if (this->myrank == rank) - { - color = rank; - key = 0; - } - if (this->myrank == MOD(rank + 1, this->nprocs)) - { - color = rank; - key = 1; - } - MPI_Comm_split(this->comm, color, key, &tmpcomm); - if (this->myrank == rank) - this->domain_comm[ 1] = tmpcomm; - if (this->myrank == MOD(rank+1, this->nprocs)) - this->domain_comm[-1] = tmpcomm; - - } - - /* following code may be useful in the future for the general case */ - //this->interp_comm.resize(this->vel->descriptor->sizes[0]); - //this->interp_nprocs.resize(this->vel->descriptor->sizes[0]); - //for (int zg=0; zg<this->vel->descriptor->sizes[0]; zg++) - //{ - // color = (this->vel->get_rank_info( - // (zg+.5)*this->vel->dz, rminz, rmaxz) ? zg : MPI_UNDEFINED); - // key = zg - this->vel->descriptor->starts[0] + interp_neighbours; - // MPI_Comm_split(this->comm, color, key, &this->interp_comm[zg]); - // if (this->interp_comm[zg] != MPI_COMM_NULL) - // MPI_Comm_size(this->interp_comm[zg], &this->interp_nprocs[zg]); - // else - // this->interp_nprocs[zg] = 0; - //} -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::~rFFTW_distributed_particles() -{ -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - TIMEZONE("rFFTW_distributed_particles::sample"); - double *yyy; - double *yy; - y.clear(); - /* local z domain */ - yy = new double[3]; - for (auto p: dp.at(0)) - { - (*field)(x.find(p)->second.data, yy); - y[p] = yy; - } - delete[] yy; - /* boundary z domains */ - int domain_index; - for (int rankpair = 0; rankpair < this->nprocs; rankpair++) - { - if (this->myrank == rankpair) - domain_index = 1; - if (this->myrank == MOD(rankpair+1, this->nprocs)) - domain_index = -1; - if (this->myrank == rankpair || - this->myrank == MOD(rankpair+1, this->nprocs)) - { - yy = new double[3*dp.at(domain_index).size()]; - yyy = new double[3*dp.at(domain_index).size()]; - int tindex; - tindex = 0; - // can this sorting be done more efficiently? - std::vector<int> ordered_dp; - { - TIMEZONE("rFFTW_distributed_particles::sample::ordered_dp"); - ordered_dp.reserve(dp.at(domain_index).size()); - for (auto p: dp.at(domain_index)) - ordered_dp.push_back(p); - //std::set<int> ordered_dp(dp.at(domain_index)); - std::sort(ordered_dp.begin(), ordered_dp.end()); - } - - for (auto p: ordered_dp) - //for (auto p: dp.at(domain_index)) - { - (*field)(x.at(p).data, yy + tindex*3); - tindex++; - } - { - TIMEZONE("rFFTW_distributed_particles::sample::MPI_Allreduce"); - MPI_Allreduce( - yy, - yyy, - 3*dp.at(domain_index).size(), - MPI_DOUBLE, - MPI_SUM, - this->domain_comm[domain_index]); - } - tindex = 0; - for (auto p: ordered_dp) - //for (auto p: dp.at(domain_index)) - { - y[p] = yyy + tindex*3; - tindex++; - } - delete[] yy; - delete[] yyy; - } - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<particle_type>> &y) -{ - std::unordered_map<int, single_particle_state<POINT3D>> yy; - switch(particle_type) - { - case VELOCITY_TRACER: - this->sample(this->vel, x, dp, yy); - y.clear(); - y.reserve(yy.size()); - y.rehash(this->nparticles); - for (auto &pp: yy) - y[pp.first] = pp.second.data; - break; - } -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const char *dset_name) -{ - std::unordered_map<int, single_particle_state<POINT3D>> y; - this->sample(field, this->state, this->domain_particles, y); - this->write(dset_name, y); -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs() -{ - for (int i=this->integration_steps-2; i>=0; i--) - rhs[i+1] = rhs[i]; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, - std::unordered_map<int, std::unordered_set<int>> &dp) -{ - TIMEZONE("rFFTW_distributed_particles::redistribute"); - //DEBUG_MSG("entered redistribute\n"); - /* get new distribution of particles */ - std::unordered_map<int, std::unordered_set<int>> newdp; - { - TIMEZONE("sort_into_domains"); - this->sort_into_domains(x, newdp); - } - /* take care of particles that are leaving the shared domains */ - int dindex[2] = {-1, 1}; - // for each D of the 2 shared domains - { - TIMEZONE("Loop1"); - for (int di=0; di<2; di++) - // for all particles previously in D - for (auto p: dp[dindex[di]]) - { - // if the particle is no longer in D - if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end()) - { - // and the particle is not in the local domain - if (newdp[0].find(p) == newdp[0].end()) - { - // remove the particle from the local list - x.erase(p); - for (unsigned int i=0; i<vals.size(); i++) - vals[i].erase(p); - } - // if the particle is in the local domain, do nothing - } - } - } - /* take care of particles that are entering the shared domains */ - /* neighbouring rank offsets */ - int ro[2]; - ro[0] = -1; - ro[1] = 1; - /* particles to send, particles to receive */ - std::vector<int> ps[2], pr[2]; - for (int tcounter = 0; tcounter < 2; tcounter++) - { - ps[tcounter].reserve(newdp[dindex[tcounter]].size()); - } - /* number of particles to send, number of particles to receive */ - int nps[2], npr[2]; - int rsrc, rdst; - /* get list of id-s to send */ - { - TIMEZONE("Loop2"); - for (auto &p: dp[0]) - { - for (int di=0; di<2; di++) - { - if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end()) - ps[di].push_back(p); - } - } - } - /* prepare data for send recv */ - for (int i=0; i<2; i++) - nps[i] = ps[i].size(); - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc){ - TIMEZONE("MPI_Send"); - MPI_Send( - nps+i, - 1, - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm); - } - if (this->myrank == rdst){ - TIMEZONE("MPI_Recv"); - MPI_Recv( - npr+1-i, - 1, - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst)+i, - this->comm, - MPI_STATUS_IGNORE); - } - } - //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]); - //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]); - for (int i=0; i<2; i++) - pr[i].resize(npr[i]); - - int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1]; - buffer_size = (buffer_size > npr[0])? buffer_size : npr[0]; - buffer_size = (buffer_size > npr[1])? buffer_size : npr[1]; - //DEBUG_MSG("buffer size is %d\n", buffer_size); - double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())]; - for (rsrc = 0; rsrc<this->nprocs; rsrc++) - for (int i=0; i<2; i++) - { - rdst = MOD(rsrc+ro[i], this->nprocs); - if (this->myrank == rsrc && nps[i] > 0) - { - TIMEZONE("this->myrank == rsrc && nps[i] > 0"); - MPI_Send( - &ps[i].front(), - nps[i], - MPI_INTEGER, - rdst, - 2*(rsrc*this->nprocs + rdst), - this->comm); - int pcounter = 0; - for (int p: ps[i]) - { - std::copy(x[p].data, - x[p].data + state_dimension(particle_type), - buffer + pcounter*(1+vals.size())*state_dimension(particle_type)); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - std::copy(vals[tindex][p].data, - vals[tindex][p].data + state_dimension(particle_type), - buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type)); - } - pcounter++; - } - MPI_Send( - buffer, - nps[i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rdst, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm); - } - if (this->myrank == rdst && npr[1-i] > 0) - { - TIMEZONE("this->myrank == rdst && npr[1-i] > 0"); - MPI_Recv( - &pr[1-i].front(), - npr[1-i], - MPI_INTEGER, - rsrc, - 2*(rsrc*this->nprocs + rdst), - this->comm, - MPI_STATUS_IGNORE); - MPI_Recv( - buffer, - npr[1-i]*(1+vals.size())*state_dimension(particle_type), - MPI_DOUBLE, - rsrc, - 2*(rsrc*this->nprocs + rdst)+1, - this->comm, - MPI_STATUS_IGNORE); - int pcounter = 0; - for (int p: pr[1-i]) - { - x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type); - newdp[1-i].insert(p); - for (unsigned int tindex=0; tindex<vals.size(); tindex++) - { - vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type); - } - pcounter++; - } - } - } - delete[] buffer; - // x has been changed, so newdp is obsolete - // we need to sort into domains again - { - TIMEZONE("sort_into_domains2"); - this->sort_into_domains(x, dp); - } - -#ifndef NDEBUG - /* check that all particles at x are local */ - //for (auto &pp: x) - // if (this->vel->get_rank(pp.second.data[2]) != this->myrank) - // { - // DEBUG_MSG("found particle %d with rank %d\n", - // pp.first, - // this->vel->get_rank(pp.second.data[2])); - // assert(false); - // } -#endif - //DEBUG_MSG("exiting redistribute\n"); -} - - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( - const int nsteps) -{ - this->get_rhs(this->state, this->domain_particles, this->rhs[0]); -#define AdamsBashforth_LOOP_PREAMBLE \ - for (auto &pp: this->state) \ - for (unsigned int i=0; i<state_dimension(particle_type); i++) - switch(nsteps) - { - case 1: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*this->rhs[0][pp.first][i]; - break; - case 2: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] - - this->rhs[1][pp.first][i])/2; - break; - case 3: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] - - 16*this->rhs[1][pp.first][i] - + 5*this->rhs[2][pp.first][i])/12; - break; - case 4: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] - - 59*this->rhs[1][pp.first][i] - + 37*this->rhs[2][pp.first][i] - - 9*this->rhs[3][pp.first][i])/24; - break; - case 5: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] - - 2774*this->rhs[1][pp.first][i] - + 2616*this->rhs[2][pp.first][i] - - 1274*this->rhs[3][pp.first][i] - + 251*this->rhs[4][pp.first][i])/720; - break; - case 6: - AdamsBashforth_LOOP_PREAMBLE - pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] - - 7923*this->rhs[1][pp.first][i] - + 9982*this->rhs[2][pp.first][i] - - 7298*this->rhs[3][pp.first][i] - + 2877*this->rhs[4][pp.first][i] - - 475*this->rhs[5][pp.first][i])/1440; - break; - } - this->redistribute(this->state, this->rhs, this->domain_particles); - this->roll_rhs(); -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::step() -{ - TIMEZONE("rFFTW_distributed_particles::step"); - this->AdamsBashforth((this->iteration < this->integration_steps) ? - this->iteration+1 : - this->integration_steps); - this->iteration++; -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sort_into_domains( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, std::unordered_set<int>> &dp) -{ - TIMEZONE("rFFTW_distributed_particles::sort_into_domains"); - int tmpint1, tmpint2; - dp.clear(); - dp[-1] = std::unordered_set<int>(); - dp[ 0] = std::unordered_set<int>(); - dp[ 1] = std::unordered_set<int>(); - dp[-1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - dp[ 1].reserve(unsigned( - 1.5*(interp_neighbours*2+2)* - float(this->nparticles) / - this->nprocs)); - dp[ 0].reserve(unsigned( - 1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)* - float(this->nparticles) / - this->nprocs)); - for (auto &xx: x) - { - if (this->vel->get_rank_info(xx.second.data[2], tmpint1, tmpint2)) - { - if (tmpint1 == tmpint2) - dp[0].insert(xx.first); - else - { - if (this->myrank == tmpint1) - dp[-1].insert(xx.first); - else - dp[ 1].insert(xx.first); - } - } - } -} - - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::read() -{ - TIMEZONE("rFFTW_distributed_particles::read"); - double *temp = new double[this->chunk_size*state_dimension(particle_type)]; - int tmpint1, tmpint2; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //read state - if (this->myrank == 0){ - TIMEZONE("read_state_chunk"); - this->read_state_chunk(cindex, temp); - } - { - TIMEZONE("MPI_Bcast"); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - } - for (unsigned int p=0; p<this->chunk_size; p++) - { - if (this->vel->get_rank_info(temp[state_dimension(particle_type)*p+2], tmpint1, tmpint2)) - { - this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - } - //read rhs - if (this->iteration > 0){ - TIMEZONE("this->iteration > 0"); - for (int i=0; i<this->integration_steps; i++) - { - if (this->myrank == 0){ - TIMEZONE("read_rhs_chunk"); - this->read_rhs_chunk(cindex, i, temp); - } - { - TIMEZONE("MPI_Bcast"); - MPI_Bcast( - temp, - this->chunk_size*state_dimension(particle_type), - MPI_DOUBLE, - 0, - this->comm); - } - for (unsigned int p=0; p<this->chunk_size; p++) - { - auto pp = this->state.find(p+cindex*this->chunk_size); - if (pp != this->state.end()) - this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; - } - } - } - } - this->sort_into_domains(this->state, this->domain_particles); - DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size()); - for (int domain=-1; domain<=1; domain++) - { - DEBUG_MSG("domain %d nparticles = %ld\n", domain, this->domain_particles[domain].size()); - } - delete[] temp; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y) -{ - TIMEZONE("rFFTW_distributed_particles::write"); - double *data = new double[this->chunk_size*3]; - double *yy = new double[this->chunk_size*3]; - //int pindex = 0; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - std::fill_n(yy, this->chunk_size*3, 0); - //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - //{ - // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - // { - // std::copy(y[pindex].data, - // y[pindex].data + 3, - // yy + p*3); - // } - //} - for (int s = -1; s <= 0; s++) - for (auto &pp: this->domain_particles[s]) - { - if (pp >= int(cindex*this->chunk_size) && - pp < int((cindex+1)*this->chunk_size)) - { - std::copy(y[pp].data, - y[pp].data + 3, - yy + (pp-cindex*this->chunk_size)*3); - } - } - { - TIMEZONE("MPI_Allreduce"); - MPI_Allreduce( - yy, - data, - 3*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - } - if (this->myrank == 0){ - TIMEZONE("write_point3D_chunk"); - this->write_point3D_chunk(dset_name, cindex, data); - } - } - delete[] yy; - delete[] data; -} - -template <particle_types particle_type, class rnumber, int interp_neighbours> -void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write( - const bool write_rhs) -{ - TIMEZONE("rFFTW_distributed_particles::write2"); - double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; - double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; - //int pindex = 0; - for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++) - { - //write state - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - //pindex = cindex*this->chunk_size; - //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - //{ - // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - // { - // TIMEZONE("std::copy"); - // std::copy(this->state[pindex].data, - // this->state[pindex].data + state_dimension(particle_type), - // temp0 + p*state_dimension(particle_type)); - // } - //} - for (int s = -1; s <= 0; s++) - for (auto &pp: this->domain_particles[s]) - { - if (pp >= int(cindex*this->chunk_size) && - pp < int((cindex+1)*this->chunk_size)) - { - std::copy(this->state[pp].data, - this->state[pp].data + state_dimension(particle_type), - temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); - } - } - { - TIMEZONE("MPI_Allreduce"); - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - } - if (this->myrank == 0){ - TIMEZONE("write_state_chunk"); - this->write_state_chunk(cindex, temp1); - } - //write rhs - if (write_rhs){ - TIMEZONE("write_rhs"); - for (int i=0; i<this->integration_steps; i++) - { - std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); - //pindex = cindex*this->chunk_size; - //for (unsigned int p=0; p<this->chunk_size; p++, pindex++) - //{ - // if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || - // this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) - // { - // TIMEZONE("std::copy"); - // std::copy(this->rhs[i][pindex].data, - // this->rhs[i][pindex].data + state_dimension(particle_type), - // temp0 + p*state_dimension(particle_type)); - // } - //} - for (int s = -1; s <= 0; s++) - for (auto &pp: this->domain_particles[s]) - { - if (pp >= int(cindex*this->chunk_size) && - pp < int((cindex+1)*this->chunk_size)) - { - std::copy(this->rhs[i][pp].data, - this->rhs[i][pp].data + state_dimension(particle_type), - temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type)); - } - } - { - TIMEZONE("MPI_Allreduce"); - MPI_Allreduce( - temp0, - temp1, - state_dimension(particle_type)*this->chunk_size, - MPI_DOUBLE, - MPI_SUM, - this->comm); - } - if (this->myrank == 0){ - TIMEZONE("write_rhs_chunk"); - this->write_rhs_chunk(cindex, i, temp1); - } - } - } - } - delete[] temp0; - delete[] temp1; -} - - -/*****************************************************************************/ -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 1>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 2>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 3>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 4>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 5>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 6>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 1>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 2>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 3>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 4>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 5>; -template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 6>; -/*****************************************************************************/ - diff --git a/bfps/cpp/rFFTW_distributed_particles.hpp b/bfps/cpp/rFFTW_distributed_particles.hpp deleted file mode 100644 index 400411d5f1fd6e597714be494a72272a76e01206..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_distributed_particles.hpp +++ /dev/null @@ -1,144 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <unordered_map> -#include <unordered_set> -#include <vector> -#include <hdf5.h> -#include "base.hpp" -#include "particles_base.hpp" -#include "fluid_solver_base.hpp" -#include "rFFTW_interpolator.hpp" - -#ifndef RFFTW_DISTRIBUTED_PARTICLES - -#define RFFTW_DISTRIBUTED_PARTICLES - -template <particle_types particle_type, class rnumber, int interp_neighbours> -class rFFTW_distributed_particles: public particles_io_base<particle_type> -{ - private: - // a "domain" corresponds to a region in 3D real space where a fixed set - // of MPI processes are required to participate in the interpolation - // formula (i.e. they all contain required information). - // we need to know how many processes there are for each of the domains - // to which the local process belongs. - std::unordered_map<int, int> domain_nprocs; - // each domain has an associated communicator, and we keep a list of the - // communicators to which the local process belongs - std::unordered_map<int, MPI_Comm> domain_comm; - // for each domain, we need a list of the IDs of the particles located - // in that domain - std::unordered_map<int, std::unordered_set<int>> domain_particles; - - // for each domain, we need the state of each particle - std::unordered_map<int, single_particle_state<particle_type>> state; - // for each domain, we also need the last few values of the right hand - // side of the ODE, since we use Adams-Bashforth integration - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs; - - public: - int integration_steps; - // this class only works with rFFTW interpolator - rFFTW_interpolator<rnumber, interp_neighbours> *vel; - - /* simulation parameters */ - double dt; - - /* methods */ - - /* constructor and destructor. - * allocate and deallocate: - * this->state - * this->rhs - * */ - rFFTW_distributed_particles( - const char *NAME, - const hid_t data_file_id, - rFFTW_interpolator<rnumber, interp_neighbours> *FIELD, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS = 2); - ~rFFTW_distributed_particles(); - - void sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const char *dset_name); - void sample( - rFFTW_interpolator<rnumber, interp_neighbours> *field, - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void get_rhs( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - const std::unordered_map<int, std::unordered_set<int>> &dp, - std::unordered_map<int, single_particle_state<particle_type>> &y); - - - /* given a list of particle positions, - * figure out which go into what local domain, and construct the relevant - * map of ID lists "dp" (for domain particles). - * */ - void sort_into_domains( - const std::unordered_map<int, single_particle_state<particle_type>> &x, - std::unordered_map<int, std::unordered_set<int>> &dp); - /* suppose the particles are currently badly distributed, and some - * arbitrary quantities (stored in "vals") are associated to the particles, - * and we need to properly distribute them among processes. - * that's what this function does. - * In practice it's only used to redistribute the rhs values (and it - * automatically redistributes the state x being passed). - * Some more comments are present in the .cpp file, but, in brief: the - * particles are simply moved from one domain to another. - * If it turns out that the new domain contains a process which does not - * know about a particle, that information is sent from the closest process. - * */ - void redistribute( - std::unordered_map<int, single_particle_state<particle_type>> &x, - std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, - std::unordered_map<int, std::unordered_set<int>> &dp); - - - /* input/output */ - void read(); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<POINT3D>> &y); - void write( - const char *dset_name, - std::unordered_map<int, single_particle_state<particle_type>> &y); - void write(const bool write_rhs = true); - - /* solvers */ - void step(); - void roll_rhs(); - void AdamsBashforth(const int nsteps); -}; - -#endif//RFFTW_DISTRIBUTED_PARTICLES - diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp deleted file mode 100644 index b8b21e8811d7f5286dc4edd00833c205539ea89c..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_interpolator.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - -#include <cmath> -#include "rFFTW_interpolator.hpp" -#include "scope_timer.hpp" - -template <class rnumber, int interp_neighbours> -rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( - fluid_solver_base<rnumber> *fs, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) -{ - this->field = FIELD_POINTER; - - - // generate compute array - this->compute = new bool[this->descriptor->sizes[0]]; - std::fill_n(this->compute, this->descriptor->sizes[0], false); - for (int iz = this->descriptor->starts[0]-interp_neighbours-1; - iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours; - iz++) - this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; -} - -template <class rnumber, int interp_neighbours> -rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( - vorticity_equation<rnumber, FFTW> *fs, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) -{ -// this->field = FIELD_POINTER; -// -// -// // generate compute array -// this->compute = new bool[this->descriptor->sizes[0]]; -// std::fill_n(this->compute, this->descriptor->sizes[0], false); -// for (int iz = this->descriptor->starts[0]-interp_neighbours-1; -// iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours; -// iz++) -// this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; -} - -template <class rnumber, int interp_neighbours> -rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator() -{ - delete[] this->compute; -} - -template <class rnumber, int interp_neighbours> -bool rFFTW_interpolator<rnumber, interp_neighbours>::get_rank_info(double z, int &maxz_rank, int &minz_rank) -{ - int zg = int(floor(z/this->dz)); - minz_rank = this->descriptor->rank[MOD( - zg - interp_neighbours, - this->descriptor->sizes[0])]; - maxz_rank = this->descriptor->rank[MOD( - zg + 1 + interp_neighbours, - this->descriptor->sizes[0])]; - bool is_here = false; - for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) - is_here = (is_here || - (this->descriptor->myrank == - this->descriptor->rank[MOD(zg+iz, this->descriptor->sizes[0])])); - return is_here; -} - -template <class rnumber, int interp_neighbours> -void rFFTW_interpolator<rnumber, interp_neighbours>::sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv) -{ - TIMEZONE("rFFTW_interpolator::sample"); - /* get grid coordinates */ - int *xg = new int[3*nparticles]; - double *xx = new double[3*nparticles]; - double *yy = new double[3*nparticles]; - std::fill_n(yy, 3*nparticles, 0.0); - this->get_grid_coordinates(nparticles, pdimension, x, xg, xx); - /* perform interpolation */ - for (int p=0; p<nparticles; p++) - if (this->compute[xg[p*3+2]]) - this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); - MPI_Allreduce( - yy, - y, - 3*nparticles, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - delete[] yy; - delete[] xg; - delete[] xx; -} - -template <class rnumber, int interp_neighbours> -void rFFTW_interpolator<rnumber, interp_neighbours>::operator()( - const int *xg, - const double *xx, - double *dest, - const int *deriv) -{ - TIMEZONE("rFFTW_interpolator::operator()"); - double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; - /* please note that the polynomials in z are computed for all the different - * iz values, independently of whether or not "myrank" will perform the - * computation for all the different iz slices. - * I don't know how big a deal this really is, but it is something that we can - * optimize. - * */ - if (deriv == NULL) - { - this->compute_beta(0, xx[0], bx); - this->compute_beta(0, xx[1], by); - this->compute_beta(0, xx[2], bz); - } - else - { - this->compute_beta(deriv[0], xx[0], bx); - this->compute_beta(deriv[1], xx[1], by); - this->compute_beta(deriv[2], xx[2], bz); - } - std::fill_n(dest, 3, 0); - ptrdiff_t bigiz, bigiy, bigix; - // loop over the 2*interp_neighbours + 2 z slices - for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) - { - // bigiz is the z index of the cell containing the particles - // this->descriptor->sizes[0] is added before taking the modulo - // because we want to be sure that "bigiz" is a positive number. - // I'm no longer sure why I don't use the MOD function here. - bigiz = ptrdiff_t(((xg[2]+iz) + this->descriptor->sizes[0]) % this->descriptor->sizes[0]); - // once we know bigiz, we know whether "myrank" has the relevant slice. - // if not, go to next value of bigiz - if (this->descriptor->myrank == this->descriptor->rank[bigiz]) - { - for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++) - { - // bigiy is the y index of the cell - // since we have all the y indices in myrank, we can safely use the - // modulo value - bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1])); - for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++) - { - // bigix is the x index of the cell - bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2])); - // here we create the index to the current grid node - // note the removal of local_z_start from bigiz. - ptrdiff_t tindex = (((bigiz-this->descriptor->starts[0])*this->descriptor->sizes[1] + - bigiy)*(this->descriptor->sizes[2]+2) + - bigix)*3; - for (int c=0; c<3; c++) - dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]* - by[iy+interp_neighbours]* - bx[ix+interp_neighbours]); - } - } - } - } -} - -template class rFFTW_interpolator<float, 1>; -template class rFFTW_interpolator<float, 2>; -template class rFFTW_interpolator<float, 3>; -template class rFFTW_interpolator<float, 4>; -template class rFFTW_interpolator<float, 5>; -template class rFFTW_interpolator<float, 6>; -template class rFFTW_interpolator<float, 7>; -template class rFFTW_interpolator<float, 8>; -template class rFFTW_interpolator<float, 9>; -template class rFFTW_interpolator<float, 10>; -template class rFFTW_interpolator<double, 1>; -template class rFFTW_interpolator<double, 2>; -template class rFFTW_interpolator<double, 3>; -template class rFFTW_interpolator<double, 4>; -template class rFFTW_interpolator<double, 5>; -template class rFFTW_interpolator<double, 6>; -template class rFFTW_interpolator<double, 7>; -template class rFFTW_interpolator<double, 8>; -template class rFFTW_interpolator<double, 9>; -template class rFFTW_interpolator<double, 10>; - diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp deleted file mode 100644 index 5088be8b2f3094fd96332af0c923d7cc905e4f3f..0000000000000000000000000000000000000000 --- a/bfps/cpp/rFFTW_interpolator.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include "field_descriptor.hpp" -#include "fftw_tools.hpp" -#include "fluid_solver_base.hpp" -#include "vorticity_equation.hpp" -#include "interpolator_base.hpp" - -#ifndef RFFTW_INTERPOLATOR - -#define RFFTW_INTERPOLATOR - -template <class rnumber, int interp_neighbours> -class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours> -{ - public: - using interpolator_base<rnumber, interp_neighbours>::operator(); - - /* pointer to field that has to be interpolated - * The reason this is a member variable is because I want this class - * to be consistent with the "interpolator" class, where a member - * variable is absolutely required (since that class uses padding). - * */ - rnumber *field; - - /* compute[iz] is an array that says whether or not the current MPI - * process is involved in the interpolation formula for a particle - * located in cell "iz". - * It is mostly used in the formula itself. - * This translates as the following condition: - * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours - * I think it's cleaner to keep things in an array, especially since - * "local_zend" is shorthand for another arithmetic operation anyway. - * */ - bool *compute; - - - /* Constructors */ - rFFTW_interpolator( - fluid_solver_base<rnumber> *FSOLVER, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_DATA); - - /* this constructor is empty, I just needed for a quick hack of the - * "vorticity_equation" class. - * It should be removed soon. - * */ - rFFTW_interpolator( - vorticity_equation<rnumber, FFTW> *FSOLVER, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_DATA); - ~rFFTW_interpolator(); - - /* This method is provided for consistency with "interpolator", and it - * does not destroy input */ - inline int read_rFFTW(const void *src) - { - this->field = (rnumber*)src; - return EXIT_SUCCESS; - } - - /* This is used when "compute" is not enough. - * For a given z location, it gives the outermost ranks that are relevant - * for the interpolation formula. - * */ - bool get_rank_info(double z, int &maxz_rank, int &minz_rank); - - /* interpolate field at an array of locations. - * After interpolation is performed, call Allreduce for "y", over - * this->descriptor->comm --- generally MPI_COMM_WORLD. - * This is useful for the simple "particles" class, where particle - * information is synchronized across all processes. - * */ - void sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv = NULL); - /* interpolate 1 point. - * Result is kept local. - * This is used in the "rFFTW_distributed_particles" class, with the - * result being synchronized across the relevant "local particle - * communicator". - * */ - void operator()( - const int *__restrict__ xg, - const double *__restrict__ xx, - double *__restrict__ dest, - const int *deriv = NULL); -}; - -#endif//RFFTW_INTERPOLATOR - diff --git a/bfps/cpp/slab_field_particles.cpp b/bfps/cpp/slab_field_particles.cpp deleted file mode 100644 index 15fa363f6d277d34c6081fd545c4578e1f735929..0000000000000000000000000000000000000000 --- a/bfps/cpp/slab_field_particles.cpp +++ /dev/null @@ -1,799 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - - -#include <cmath> -#include <cassert> -#include <cstring> -#include <string> -#include <sstream> - -#include "base.hpp" -#include "slab_field_particles.hpp" -#include "fftw_tools.hpp" - - -extern int myrank, nprocs; - -template <class rnumber> -slab_field_particles<rnumber>::slab_field_particles( - const char *NAME, - fluid_solver_base<rnumber> *FSOLVER, - const int NPARTICLES, - const int NCOMPONENTS, - base_polynomial_values BETA_POLYS, - const int INTERP_NEIGHBOURS, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS) -{ - assert((NCOMPONENTS % 3) == 0); - assert((INTERP_NEIGHBOURS >= 1) || - (INTERP_NEIGHBOURS <= 8)); - assert((INTEGRATION_STEPS <= 6) && - (INTEGRATION_STEPS >= 1)); - strncpy(this->name, NAME, 256); - this->fs = FSOLVER; - this->nparticles = NPARTICLES; - this->ncomponents = NCOMPONENTS; - this->integration_steps = INTEGRATION_STEPS; - this->interp_neighbours = INTERP_NEIGHBOURS; - this->traj_skip = TRAJ_SKIP; - this->compute_beta = BETA_POLYS; - // in principle only the buffer width at the top needs the +1, - // but things are simpler if buffer_width is the same - this->buffer_width = this->interp_neighbours+1; - this->buffer_size = this->buffer_width*this->fs->rd->slice_size; - this->array_size = this->nparticles * this->ncomponents; - this->state = fftw_alloc_real(this->array_size); - std::fill_n(this->state, this->array_size, 0.0); - for (int i=0; i < this->integration_steps; i++) - { - this->rhs[i] = fftw_alloc_real(this->array_size); - std::fill_n(this->rhs[i], this->array_size, 0.0); - } - this->watching = new bool[this->fs->rd->nprocs*nparticles]; - std::fill_n(this->watching, this->fs->rd->nprocs*this->nparticles, false); - this->computing = new int[nparticles]; - - int tdims[4]; - tdims[0] = this->buffer_width*2*this->fs->rd->nprocs + this->fs->rd->sizes[0]; - tdims[1] = this->fs->rd->sizes[1]; - tdims[2] = this->fs->rd->sizes[2]; - tdims[3] = this->fs->rd->sizes[3]; - this->buffered_field_descriptor = new field_descriptor<rnumber>( - 4, tdims, - this->fs->rd->mpi_dtype, - this->fs->rd->comm); - - // compute dx, dy, dz; - this->dx = 4*acos(0) / (this->fs->dkx*this->fs->rd->sizes[2]); - this->dy = 4*acos(0) / (this->fs->dky*this->fs->rd->sizes[1]); - this->dz = 4*acos(0) / (this->fs->dkz*this->fs->rd->sizes[0]); - - // compute lower and upper bounds - this->lbound = new double[nprocs]; - this->ubound = new double[nprocs]; - double *tbound = new double[nprocs]; - std::fill_n(tbound, nprocs, 0.0); - tbound[this->fs->rd->myrank] = this->fs->rd->starts[0]*this->dz; - MPI_Allreduce( - tbound, - this->lbound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - std::fill_n(tbound, nprocs, 0.0); - tbound[this->fs->rd->myrank] = (this->fs->rd->starts[0] + this->fs->rd->subsizes[0])*this->dz; - MPI_Allreduce( - tbound, - this->ubound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - delete[] tbound; - //for (int r = 0; r<nprocs; r++) - // DEBUG_MSG( - // "lbound[%d] = %lg, ubound[%d] = %lg\n", - // r, this->lbound[r], - // r, this->ubound[r] - // ); -} - -template <class rnumber> -slab_field_particles<rnumber>::~slab_field_particles() -{ - delete[] this->computing; - delete[] this->watching; - fftw_free(this->state); - for (int i=0; i < this->integration_steps; i++) - { - fftw_free(this->rhs[i]); - } - delete[] this->lbound; - delete[] this->ubound; - delete this->buffered_field_descriptor; -} - -template <class rnumber> -void slab_field_particles<rnumber>::get_rhs(double *x, double *y) -{ - std::fill_n(y, this->array_size, 0.0); -} - -template <class rnumber> -void slab_field_particles<rnumber>::jump_estimate(double *dest) -{ - std::fill_n(dest, this->nparticles, 0.0); -} - -template <class rnumber> -int slab_field_particles<rnumber>::get_rank(double z) -{ - int tmp = this->fs->rd->rank[MOD(int(floor(z/this->dz)), this->fs->rd->sizes[0])]; - assert(tmp >= 0 && tmp < this->fs->rd->nprocs); - return tmp; -} - -template <class rnumber> -void slab_field_particles<rnumber>::synchronize_single_particle_state(int p, double *x, int source) -{ - if (source == -1) source = this->computing[p]; - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) for (int r=0; r<this->fs->rd->nprocs; r++) - if (r != source && - this->watching[r*this->nparticles+p]) - { - //DEBUG_MSG("synchronizing state %d from %d to %d\n", p, this->computing[p], r); - if (this->fs->rd->myrank == source) - MPI_Send( - x+p*this->ncomponents, - this->ncomponents, - MPI_DOUBLE, - r, - p+this->computing[p]*this->nparticles, - this->fs->rd->comm); - if (this->fs->rd->myrank == r) - MPI_Recv( - x+p*this->ncomponents, - this->ncomponents, - MPI_DOUBLE, - source, - p+this->computing[p]*this->nparticles, - this->fs->rd->comm, - MPI_STATUS_IGNORE); - } -} - -template <class rnumber> -void slab_field_particles<rnumber>::synchronize() -{ - double *tstate = fftw_alloc_real(this->array_size); - // first, synchronize state and jump across CPUs - std::fill_n(tstate, this->array_size, 0.0); - for (int p=0; p<this->nparticles; p++) - { - //if (this->watching[this->fs->rd->myrank*this->nparticles + p]) - //DEBUG_MSG( - // "in synchronize, position for particle %d is %g %g %g\n", - // p, - // this->state[p*this->ncomponents], - // this->state[p*this->ncomponents+1], - // this->state[p*this->ncomponents+2]); - if (this->fs->rd->myrank == this->computing[p]) - std::copy(this->state + p*this->ncomponents, - this->state + (p+1)*this->ncomponents, - tstate + p*this->ncomponents); - } - MPI_Allreduce( - tstate, - this->state, - this->array_size, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - if (this->integration_steps >= 1) - { - for (int i=0; i<this->integration_steps; i++) - { - std::fill_n(tstate, this->array_size, 0.0); - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - std::copy(this->rhs[i] + p*this->ncomponents, - this->rhs[i] + (p+1)*this->ncomponents, - tstate + p*this->ncomponents); - std::fill_n(this->rhs[i], this->array_size, 0.0); - MPI_Allreduce( - tstate, - this->rhs[i], - this->array_size, - MPI_DOUBLE, - MPI_SUM, - this->fs->rd->comm); - } - } - fftw_free(tstate); - // assignment of particles - for (int p=0; p<this->nparticles; p++) - { - this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]); - //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]); - } - double *jump = fftw_alloc_real(this->nparticles); - this->jump_estimate(jump); - // now, see who needs to watch - bool *local_watching = new bool[this->fs->rd->nprocs*this->nparticles]; - std::fill_n(local_watching, this->fs->rd->nprocs*this->nparticles, false); - for (int p=0; p<this->nparticles; p++) - if (this->fs->rd->myrank == this->computing[p]) - { - local_watching[this->get_rank(this->state[this->ncomponents*p+2] )*this->nparticles+p] = true; - local_watching[this->get_rank(this->state[this->ncomponents*p+2]-jump[p])*this->nparticles+p] = true; - local_watching[this->get_rank(this->state[this->ncomponents*p+2]+jump[p])*this->nparticles+p] = true; - } - fftw_free(jump); - MPI_Allreduce( - local_watching, - this->watching, - this->nparticles*this->fs->rd->nprocs, - MPI_C_BOOL, - MPI_LOR, - this->fs->rd->comm); - delete[] local_watching; - for (int p=0; p<this->nparticles; p++) - DEBUG_MSG("watching = %d for particle %d\n", this->watching[this->fs->rd->myrank*nparticles+p], p); -} - - - -template <class rnumber> -void slab_field_particles<rnumber>::roll_rhs() -{ - for (int i=this->integration_steps-2; i>=0; i--) - std::copy(this->rhs[i], - this->rhs[i] + this->array_size, - this->rhs[i+1]); -} - - - -template <class rnumber> -void slab_field_particles<rnumber>::AdamsBashforth(int nsteps) -{ - ptrdiff_t ii; - this->get_rhs(this->state, this->rhs[0]); - //if (myrank == 0) - //{ - // DEBUG_MSG( - // "in AdamsBashforth for particles %s, integration_steps = %d, nsteps = %d, iteration = %d\n", - // this->name, - // this->integration_steps, - // nsteps, - // this->iteration); - // std::stringstream tstring; - // for (int p=0; p<this->nparticles; p++) - // tstring << " " << this->computing[p]; - // DEBUG_MSG("%s\n", tstring.str().c_str()); - // for (int i=0; i<this->integration_steps; i++) - // { - // std::stringstream tstring; - // for (int p=0; p<this->nparticles; p++) - // tstring << " " << this->rhs[i][p*3]; - // DEBUG_MSG("%s\n", tstring.str().c_str()); - // } - //} - switch(nsteps) - { - case 1: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*this->rhs[0][ii]; - } - break; - case 2: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(3*this->rhs[0][ii] - - this->rhs[1][ii])/2; - } - break; - case 3: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(23*this->rhs[0][ii] - - 16*this->rhs[1][ii] - + 5*this->rhs[2][ii])/12; - } - break; - case 4: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(55*this->rhs[0][ii] - - 59*this->rhs[1][ii] - + 37*this->rhs[2][ii] - - 9*this->rhs[3][ii])/24; - } - break; - case 5: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(1901*this->rhs[0][ii] - - 2774*this->rhs[1][ii] - + 2616*this->rhs[2][ii] - - 1274*this->rhs[3][ii] - + 251*this->rhs[4][ii])/720; - } - break; - case 6: - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - for (int i=0; i<this->ncomponents; i++) - { - ii = p*this->ncomponents+i; - this->state[ii] += this->dt*(4277*this->rhs[0][ii] - - 7923*this->rhs[1][ii] - + 9982*this->rhs[2][ii] - - 7298*this->rhs[3][ii] - + 2877*this->rhs[4][ii] - - 475*this->rhs[5][ii])/1440; - } - break; - } - this->roll_rhs(); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::step() -{ - this->AdamsBashforth((this->iteration < this->integration_steps) ? this->iteration+1 : this->integration_steps); - //this->cRK4(); - this->iteration++; - this->synchronize(); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::Euler() -{ - double *y = fftw_alloc_real(this->array_size); - this->get_rhs(this->state, y); - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - { - for (int i=0; i<this->ncomponents; i++) - this->state[p*this->ncomponents+i] += this->dt*y[p*this->ncomponents+i]; - //DEBUG_MSG( - // "particle %d state is %lg %lg %lg\n", - // p, this->state[p*this->ncomponents], this->state[p*this->ncomponents+1], this->state[p*this->ncomponents+2]); - } - fftw_free(y); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::Heun() -{ - double *y = new double[this->array_size]; - double dtfactor[] = {0.0, this->dt}; - this->get_rhs(this->state, this->rhs[0]); - for (int p=0; p<this->nparticles; p++) - { - this->synchronize_single_particle_state(p, this->rhs[0]); - //int crank = this->get_rank(this->state[p*3 + 2]); - //DEBUG_MSG( - // "k 0 iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g, rhs is %g %g %g\n", - // this->iteration, p, - // crank, this->computing[p], - // this->state[p*3], this->state[p*3+1], this->state[p*3+2], - // this->rhs[0][p*3], this->rhs[0][p*3+1], this->rhs[0][p*3+2]); - } - for (int kindex = 1; kindex < 2; kindex++) - { - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex]; - } - } - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, y); - this->get_rhs(y, this->rhs[kindex]); - for (int p=0; p<this->nparticles; p++) - { - this->synchronize_single_particle_state(p, this->rhs[kindex]); - DEBUG_MSG( - "k %d iteration %d particle is %d, position is %g %g %g, rhs is %g %g %g\n", - kindex, this->iteration, p, - y[p*3], y[p*3+1], y[p*3+2], - this->rhs[kindex][p*3], this->rhs[kindex][p*3+1], this->rhs[kindex][p*3+2]); - } - } - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - { - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - this->state[tindex] += this->dt*(this->rhs[0][tindex] + this->rhs[1][tindex])/2; - } - //int crank = this->get_rank(this->state[p*3 + 2]); - //if (crank != this->computing[p]) - // DEBUG_MSG( - // "k _ iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g\n", - // this->iteration, p, - // crank, this->computing[p], - // this->state[p*3], this->state[p*3+1], this->state[p*3+2]); - } - } - delete[] y; - DEBUG_MSG("exiting Heun\n"); -} - - -template <class rnumber> -void slab_field_particles<rnumber>::cRK4() -{ - double *y = new double[this->array_size]; - double dtfactor[] = {0.0, this->dt/2, this->dt/2, this->dt}; - this->get_rhs(this->state, this->rhs[0]); - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, this->rhs[0]); - for (int kindex = 1; kindex < 4; kindex++) - { - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex]; - } - } - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, y); - this->get_rhs(y, this->rhs[kindex]); - for (int p=0; p<this->nparticles; p++) - this->synchronize_single_particle_state(p, this->rhs[kindex]); - } - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - for (int i=0; i<this->ncomponents; i++) - { - ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i; - this->state[tindex] += this->dt*(this->rhs[0][tindex] + - 2*(this->rhs[1][tindex] + this->rhs[2][tindex]) + - this->rhs[3][tindex])/6; - } - } - delete[] y; -} - -template <class rnumber> -void slab_field_particles<rnumber>::get_grid_coordinates(double *x, int *xg, double *xx) -{ - static double grid_size[] = {this->dx, this->dy, this->dz}; - double tval; - std::fill_n(xg, this->nparticles*3, 0); - std::fill_n(xx, this->nparticles*3, 0.0); - for (int p=0; p<this->nparticles; p++) if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - { - for (int c=0; c<3; c++) - { - tval = floor(x[p*this->ncomponents+c]/grid_size[c]); - xg[p*3+c] = MOD(int(tval), this->fs->rd->sizes[2-c]); - xx[p*3+c] = (x[p*this->ncomponents+c] - tval*grid_size[c]) / grid_size[c]; - } - xg[p*3+2] -= this->fs->rd->starts[0]; - if (this->fs->rd->myrank == this->fs->rd->rank[0] && - xg[p*3+2] > this->fs->rd->subsizes[0]) - xg[p*3+2] -= this->fs->rd->sizes[0]; - //DEBUG_MSG( - // "particle %d x is %lg %lg %lg xx is %lg %lg %lg xg is %d %d %d\n", - // p, - // x[p*3], x[p*3+1], x[p*3+2], - // xx[p*3], xx[p*3+1], xx[p*3+2], - // xg[p*3], xg[p*3+1], xg[p*3+2]); - } -} - -template <class rnumber> -void slab_field_particles<rnumber>::interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv) -{ - double bx[this->interp_neighbours*2+2], by[this->interp_neighbours*2+2], bz[this->interp_neighbours*2+2]; - this->compute_beta(deriv[0], xx[0], bx); - this->compute_beta(deriv[1], xx[1], by); - this->compute_beta(deriv[2], xx[2], bz); - //DEBUG_MSG("computed beta polynomials\n"); - std::fill_n(dest, 3, 0); - for (int iz = -this->interp_neighbours; iz <= this->interp_neighbours+1; iz++) - for (int iy = -this->interp_neighbours; iy <= this->interp_neighbours+1; iy++) - for (int ix = -this->interp_neighbours; ix <= this->interp_neighbours+1; ix++) - for (int c=0; c<3; c++) - { - //DEBUG_MSG( - // "%d %d %d %d %d %d %d %ld %ld\n", - // xg[2], xg[1], xg[0], iz, iy, ix, c, - // ((ptrdiff_t(xg[2]+iz) *this->fs->rd->subsizes[1] + - // ptrdiff_t(xg[1]+iy))*this->fs->rd->subsizes[2] + - // ptrdiff_t(xg[0]+ix))*3+c, - // this->buffered_field_descriptor->local_size - // ); - dest[c] += field[((ptrdiff_t( xg[2]+iz ) *this->fs->rd->subsizes[1] + - ptrdiff_t(MOD(xg[1]+iy, this->fs->rd->sizes[1])))*this->fs->rd->subsizes[2] + - ptrdiff_t(MOD(xg[0]+ix, this->fs->rd->sizes[2])))*3+c]*(bz[iz+this->interp_neighbours]* - by[iy+this->interp_neighbours]* - bx[ix+this->interp_neighbours]); - } -} - -template <class rnumber> -void slab_field_particles<rnumber>::linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv) -{ - //ptrdiff_t tindex, tmp; - //tindex = ((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0] )*3; - //tmp = ptrdiff_t(xg[2]); - //DEBUG_MSG( - // "linear interpolation xx is %lg %lg %lg xg is %d %d %d," - // " corner index is ((%ld*%d+%d)*%d+%d)*3 = %ld\n", - // xx[0], xx[1], xx[2], - // xg[0], xg[1], xg[2], - // tmp, this->fs->rd->subsizes[1], xg[1], this->fs->rd->subsizes[2], xg[0], - // tindex); - for (int c=0; c<3; c++) - dest[c] = (field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*(1-xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*(1-xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*( xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2] )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*( xx[1])*(1-xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*(1-xx[1])*( xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1] )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*(1-xx[1])*( xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0] )*3+c]*((1-xx[0])*( xx[1])*( xx[2])) + - field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*(( xx[0])*( xx[1])*( xx[2]))); -} - -template <class rnumber> -void slab_field_particles<rnumber>::read(hid_t data_file_id) -{ - //DEBUG_MSG("aloha\n"); - if (this->fs->rd->myrank == 0) - { - std::string temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/state")); - hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - hid_t mspace, rspace; - hsize_t count[4], offset[4]; - rspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(rspace, count, NULL); - count[0] = 1; - offset[0] = this->iteration / this->traj_skip; - offset[1] = 0; - offset[2] = 0; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->state); - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(Cdset); - if (this->iteration > 0) - { - temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/rhs")); - Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - rspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(rspace, count, NULL); - //reading from last available position - offset[0] = count[0] - 1; - offset[3] = 0; - count[0] = 1; - count[1] = 1; - mspace = H5Screate_simple(4, count, NULL); - for (int i=0; i<this->integration_steps; i++) - { - offset[1] = i; - H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->rhs[i]); - } - H5Sclose(mspace); - H5Sclose(rspace); - H5Dclose(Cdset); - } - } - MPI_Bcast( - this->state, - this->array_size, - MPI_DOUBLE, - 0, - this->fs->rd->comm); - for (int i = 0; i<this->integration_steps; i++) - { - MPI_Bcast( - this->rhs[i], - this->array_size, - MPI_DOUBLE, - 0, - this->fs->rd->comm); - } - // initial assignment of particles - for (int p=0; p<this->nparticles; p++) - { - this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]); - //DEBUG_MSG("reading particles, particle %d computing is %d\n", p, this->computing[p]); - } - // now actual synchronization - this->synchronize(); -} - -template <class rnumber> -void slab_field_particles<rnumber>::write(hid_t data_file_id, bool write_rhs) -{ - if (this->fs->rd->myrank == 0) - { - std::string temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/state")); - hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - hid_t mspace, wspace; - hsize_t count[4], offset[4]; - wspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(wspace, count, NULL); - count[0] = 1; - offset[0] = this->iteration / this->traj_skip; - offset[1] = 0; - offset[2] = 0; - mspace = H5Screate_simple(3, count, NULL); - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->state); - H5Sclose(mspace); - H5Sclose(wspace); - H5Dclose(Cdset); - if (write_rhs) - { - temp_string = (std::string("/particles/") + - std::string(this->name) + - std::string("/rhs")); - Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); - wspace = H5Dget_space(Cdset); - H5Sget_simple_extent_dims(wspace, count, NULL); - //writing to last available position - offset[0] = count[0] - 1; - count[0] = 1; - count[1] = 1; - offset[3] = 0; - mspace = H5Screate_simple(4, count, NULL); - for (int i=0; i<this->integration_steps; i++) - { - offset[1] = i; - H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); - H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->rhs[i]); - } - H5Sclose(mspace); - H5Sclose(wspace); - H5Dclose(Cdset); - } - } -} - - - -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ -#define SLAB_FIELD_PARTICLES_DEFINITIONS(FFTW, R, MPI_RNUM) \ - \ -template <> \ -void slab_field_particles<R>::rFFTW_to_buffered(R *src, R *dst) \ -{ \ - /* do big copy of middle stuff */ \ - std::copy(src, \ - src + this->fs->rd->local_size, \ - dst + this->buffer_size); \ - int rsrc; \ - /* get upper slices */ \ - for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \ - { \ - rsrc = this->fs->rd->rank[(this->fs->rd->all_start0[rdst] + \ - this->fs->rd->all_size0[rdst]) % \ - this->fs->rd->sizes[0]]; \ - if (this->fs->rd->myrank == rsrc) \ - MPI_Send( \ - (void*)(src), \ - this->buffer_size, \ - MPI_RNUM, \ - rdst, \ - 2*(rsrc*this->fs->rd->nprocs + rdst), \ - this->fs->rd->comm); \ - if (this->fs->rd->myrank == rdst) \ - MPI_Recv( \ - (void*)(dst + this->buffer_size + this->fs->rd->local_size), \ - this->buffer_size, \ - MPI_RNUM, \ - rsrc, \ - 2*(rsrc*this->fs->rd->nprocs + rdst), \ - this->fs->rd->comm, \ - MPI_STATUS_IGNORE); \ - } \ - /* get lower slices */ \ - for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \ - { \ - rsrc = this->fs->rd->rank[MOD(this->fs->rd->all_start0[rdst] - 1, \ - this->fs->rd->sizes[0])]; \ - if (this->fs->rd->myrank == rsrc) \ - MPI_Send( \ - (void*)(src + this->fs->rd->local_size - this->buffer_size), \ - this->buffer_size, \ - MPI_RNUM, \ - rdst, \ - 2*(rsrc*this->fs->rd->nprocs + rdst)+1, \ - this->fs->rd->comm); \ - if (this->fs->rd->myrank == rdst) \ - MPI_Recv( \ - (void*)(dst), \ - this->buffer_size, \ - MPI_RNUM, \ - rsrc, \ - 2*(rsrc*this->fs->rd->nprocs + rdst)+1, \ - this->fs->rd->comm, \ - MPI_STATUS_IGNORE); \ - } \ -} \ -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* now actually use the macro defined above */ -SLAB_FIELD_PARTICLES_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT) -SLAB_FIELD_PARTICLES_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE) -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* finally, force generation of code for single precision */ -template class slab_field_particles<float>; -template class slab_field_particles<double>; -/*****************************************************************************/ diff --git a/bfps/cpp/slab_field_particles.hpp b/bfps/cpp/slab_field_particles.hpp deleted file mode 100644 index 15f9477bbfb680be17390447ce88bc40cd7471e2..0000000000000000000000000000000000000000 --- a/bfps/cpp/slab_field_particles.hpp +++ /dev/null @@ -1,149 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <hdf5.h> -#include "base.hpp" -#include "fluid_solver_base.hpp" -#include "interpolator.hpp" - -#ifndef SLAB_FIELD_PARTICLES - -#define SLAB_FIELD_PARTICLES - -extern int myrank, nprocs; - -template <class rnumber> -class slab_field_particles -{ - protected: - //typedef void (slab_field_particles<rnumber>::*tensor_product_interpolation_formula)( - // rnumber *field, - // int *xg, - // double *xx, - // double *dest, - // int *deriv); - public: - fluid_solver_base<rnumber> *fs; - field_descriptor<rnumber> *buffered_field_descriptor; - - /* watching is an array of shape [nparticles], with - * watching[p] being true if particle p is in the domain of myrank - * or in the buffer regions. - * watching is not really being used right now, since I don't do partial - * synchronizations of particles. - * we may do this at some point in the future, if it seems needed... - * */ - bool *watching; - /* computing is an array of shape [nparticles], with - * computing[p] being the rank that is currently working on particle p - * */ - int *computing; - - /* state will generally hold all the information about the particles. - * in the beginning, we will only need to solve 3D ODEs, but I figured - * a general ncomponents is better, since we may change our minds. - * */ - double *state; - double *rhs[6]; - int nparticles; - int ncomponents; - int array_size; - int interp_neighbours; - int buffer_width; - int integration_steps; - int traj_skip; - ptrdiff_t buffer_size; - double *lbound; - double *ubound; - //tensor_product_interpolation_formula spline_formula; - base_polynomial_values compute_beta; - - /* simulation parameters */ - char name[256]; - int iteration; - double dt; - - /* physical parameters of field */ - rnumber dx, dy, dz; - - /* methods */ - - /* constructor and destructor. - * allocate and deallocate: - * this->state - * this->lbound - * this->ubound - * this->watching - * */ - slab_field_particles( - const char *NAME, - fluid_solver_base<rnumber> *FSOLVER, - const int NPARTICLES, - const int NCOMPONENTS, - base_polynomial_values BETA_POLYS, - const int INTERP_NEIGHBOURS, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS = 2); - ~slab_field_particles(); - - /* an Euler step is needed to compute an estimate of future positions, - * which is needed for synchronization. - * */ - virtual void jump_estimate(double *jump_length); - /* function get_rhs is virtual since we want children to do different things, - * depending on the type of particle. - * */ - virtual void get_rhs(double *x, double *rhs); - - /* generic methods, should work for all children of this class */ - int get_rank(double z); // get rank for given value of z - void synchronize(); - void synchronize_single_particle_state(int p, double *x, int source_id = -1); - void get_grid_coordinates(double *x, int *xg, double *xx); - void linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv); - void interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv); - - void rFFTW_to_buffered(rnumber *src, rnumber *dst); - - /* generic methods, should work for all children of this class */ - void read(hid_t data_file_id); - void write(hid_t data_file_id, bool write_rhs = true); - - /* solver stuff */ - void step(); - void roll_rhs(); - void AdamsBashforth(int nsteps); - void Euler(); - void Heun(); - void cRK4(); -}; - - -#endif//SLAB_FIELD_PARTICLES - diff --git a/bfps/cpp/spline.hpp b/bfps/cpp/spline.hpp deleted file mode 100644 index d66d2b1eb42278b987072ffff24d0123c86a1e2f..0000000000000000000000000000000000000000 --- a/bfps/cpp/spline.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef SPLINE_HPP -#define SPLINE_HPP - -#include "spline_n1.hpp" -#include "spline_n2.hpp" -#include "spline_n3.hpp" -#include "spline_n4.hpp" -#include "spline_n5.hpp" -#include "spline_n6.hpp" -#include "spline_n7.hpp" -#include "spline_n8.hpp" -#include "spline_n9.hpp" -#include "spline_n10.hpp" - -#endif diff --git a/bfps/cpp/tracers.cpp b/bfps/cpp/tracers.cpp deleted file mode 100644 index 3d9fbfb6a1e357d70452466b6cc901659444539d..0000000000000000000000000000000000000000 --- a/bfps/cpp/tracers.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/********************************************************************** -* * -* Copyright 2015 Max Planck Institute * -* for Dynamics and Self-Organization * -* * -* This file is part of bfps. * -* * -* bfps is free software: you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published * -* by the Free Software Foundation, either version 3 of the License, * -* or (at your option) any later version. * -* * -* bfps is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with bfps. If not, see <http://www.gnu.org/licenses/> * -* * -* Contact: Cristian.Lalescu@ds.mpg.de * -* * -**********************************************************************/ - - - -#define NDEBUG - - -#include <cmath> -#include "base.hpp" -#include "fftw_tools.hpp" -#include "tracers.hpp" - -template <class rnumber> -void tracers<rnumber>::jump_estimate(double *jump) -{ - int deriv[] = {0, 0, 0}; - int *xg = new int[this->array_size]; - double *xx = new double[this->array_size]; - rnumber *vel = this->data + this->buffer_size; - double tmp[3]; - /* get grid coordinates */ - this->get_grid_coordinates(this->state, xg, xx); - - /* perform interpolation */ - for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) - { - this->interpolation_formula(vel, xg + p*3, xx + p*3, tmp, deriv); - jump[p] = fabs(3*this->dt * tmp[2]); - if (jump[p] < this->dz*1.01) - jump[p] = this->dz*1.01; - } - delete[] xg; - delete[] xx; -} - -template <class rnumber> -void tracers<rnumber>::get_rhs(double *x, double *y) -{ - std::fill_n(y, this->array_size, 0.0); - int deriv[] = {0, 0, 0}; - /* get grid coordinates */ - int *xg = new int[this->array_size]; - double *xx = new double[this->array_size]; - rnumber *vel = this->data + this->buffer_size; - this->get_grid_coordinates(x, xg, xx); - //DEBUG_MSG( - // "position is %g %g %g, grid_coords are %d %d %d %g %g %g\n", - // x[0], x[1], x[2], - // xg[0], xg[1], xg[2], - // xx[0], xx[1], xx[2]); - /* perform interpolation */ - for (int p=0; p<this->nparticles; p++) - { - if (this->watching[this->fs->rd->myrank*this->nparticles+p]) - { - int crank = this->get_rank(x[p*3 + 2]); - if (this->fs->rd->myrank == crank) - { - this->interpolation_formula(vel, xg + p*3, xx + p*3, y + p*3, deriv); - DEBUG_MSG( - "position is %g %g %g %d %d %d %g %g %g, result is %g %g %g\n", - x[p*3], x[p*3+1], x[p*3+2], - xg[p*3], xg[p*3+1], xg[p*3+2], - xx[p*3], xx[p*3+1], xx[p*3+2], - y[p*3], y[p*3+1], y[p*3+2]); - } - if (crank != this->computing[p]) - { - this->synchronize_single_particle_state(p, y, crank); - } - //DEBUG_MSG( - // "after synch crank is %d, computing rank is %d, position is %g %g %g, result is %g %g %g\n", - // this->iteration, p, - // crank, this->computing[p], - // x[p*3], x[p*3+1], x[p*3+2], - // y[p*3], y[p*3+1], y[p*3+2]); - } - } - delete[] xg; - delete[] xx; -} - -template<class rnumber> -void tracers<rnumber>::update_field(bool clip_on) -{ - if (clip_on) - clip_zero_padding<rnumber>(this->fs->rd, this->source_data, 3); - this->rFFTW_to_buffered(this->source_data, this->data); -} - -/*****************************************************************************/ -/* macro for specializations to numeric types compatible with FFTW */ - -#define TRACERS_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template <> \ -tracers<R>::tracers( \ - const char *NAME, \ - fluid_solver_base<R> *FSOLVER, \ - const int NPARTICLES, \ - base_polynomial_values BETA_POLYS, \ - const int NEIGHBOURS, \ - const int TRAJ_SKIP, \ - const int INTEGRATION_STEPS, \ - R *SOURCE_DATA) : slab_field_particles<R>( \ - NAME, \ - FSOLVER, \ - NPARTICLES, \ - 3, \ - BETA_POLYS, \ - NEIGHBOURS, \ - TRAJ_SKIP, \ - INTEGRATION_STEPS) \ -{ \ - this->source_data = SOURCE_DATA; \ - this->data = FFTW(alloc_real)(this->buffered_field_descriptor->local_size); \ -} \ - \ -template<> \ -tracers<R>::~tracers() \ -{ \ - FFTW(free)(this->data); \ -} \ - \ -template <> \ -void tracers<R>::sample_vec_field(R *vec_field, double *vec_values) \ -{ \ - vec_field += this->buffer_size; \ - double *vec_local = new double[this->array_size]; \ - std::fill_n(vec_local, this->array_size, 0.0); \ - int deriv[] = {0, 0, 0}; \ - /* get grid coordinates */ \ - int *xg = new int[this->array_size]; \ - double *xx = new double[this->array_size]; \ - this->get_grid_coordinates(this->state, xg, xx); \ - /* perform interpolation */ \ - for (int p=0; p<this->nparticles; p++) \ - if (this->fs->rd->myrank == this->computing[p]) \ - this->interpolation_formula( \ - vec_field, \ - xg + p*3, \ - xx + p*3, \ - vec_local + p*3, \ - deriv); \ - MPI_Allreduce( \ - vec_local, \ - vec_values, \ - this->array_size, \ - MPI_DOUBLE, \ - MPI_SUM, \ - this->fs->rd->comm); \ - delete[] xg; \ - delete[] xx; \ - delete[] vec_local; \ -} \ - -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* now actually use the macro defined above */ -TRACERS_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -TRACERS_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) -/*****************************************************************************/ - - - -/*****************************************************************************/ -/* finally, force generation of code */ -template class tracers<float>; -template class tracers<double>; -/*****************************************************************************/ - diff --git a/bfps/test/test_Parseval.py b/bfps/test/test_Parseval.py new file mode 100644 index 0000000000000000000000000000000000000000..00a88d24ca615375dcfc24b82db15b8f3496fcc1 --- /dev/null +++ b/bfps/test/test_Parseval.py @@ -0,0 +1,38 @@ +#! /usr/bin/env python + +import numpy as np +import sys + +import bfps +from bfps import DNS + +def main(): + niterations = 10 + nlist = [16, 32, 48, 24, 64, 12] + for ii in range(len(nlist)): + c = DNS() + c.launch( + ['NSVE', + '--nx', str(nlist[ii]), + '--ny', str(nlist[(ii+1)%(len(nlist))]), + '--nz', str(nlist[(ii+2)%(len(nlist))]), + '--Lx', str(2+np.random.random()), + '--Ly', str(2+np.random.random()), + '--Lz', str(2+np.random.random()), + '--simname', 'test_Parseval_{0}'.format(ii), + '--np', '4', + '--ntpp', '1', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--wd', './'] + + sys.argv[1:]) + c.compute_statistics() + Parseval_error = np.abs((c.statistics['energy(t)'] - c.statistics['renergy(t)']) / c.statistics['renergy(t)']) + assert(np.max(Parseval_error) < 1e-6) + print('SUCCESS!!! Parseval test passed for unequal nx, ny, nz and random Lx, Ly, Lz') + return None + +if __name__ == '__main__': + main() + diff --git a/bfps/test/test_bfps_NSVEparticles.py b/bfps/test/test_bfps_NSVEparticles.py index ab77e2103ccda7685cebe759f8e11cfe2a5b5ec9..fe1e7875a651b17dd9180f3cbe6d6bfe1f1b5c27 100644 --- a/bfps/test/test_bfps_NSVEparticles.py +++ b/bfps/test/test_bfps_NSVEparticles.py @@ -1,4 +1,29 @@ #! /usr/bin/env python +####################################################################### +# # +# Copyright 2019 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + import os import numpy as np @@ -18,11 +43,13 @@ def main(): ['NSVEparticles', '-n', '32', '--src-simname', 'B32p1e4', + '--forcing_type', 'linear', '--src-wd', bfps.lib_dir + '/test', '--src-iteration', '0', '--simname', 'dns_nsveparticles', '--np', '4', '--ntpp', '1', + '--fftw_plan_rigor', 'FFTW_PATIENT', '--niter_todo', '{0}'.format(niterations), '--niter_out', '{0}'.format(niterations), '--niter_stat', '1', @@ -41,13 +68,16 @@ def main(): for iteration in [0, 32, 64]: field0 = f0['vorticity/complex/{0}'.format(iteration)].value field1 = f1['vorticity/complex/{0}'.format(iteration)].value - assert(np.max(np.abs(field0 - field1)) < 1e-5) + field_error = np.max(np.abs(field0 - field1)) x0 = f0['tracers0/state/{0}'.format(iteration)].value x1 = f1['tracers0/state/{0}'.format(iteration)].value - assert(np.max(np.abs(x0 - x1)) < 1e-5) + traj_error = np.max(np.abs(x0 - x1)) y0 = f0['tracers0/rhs/{0}'.format(iteration)].value y1 = f1['tracers0/rhs/{0}'.format(iteration)].value - assert(np.max(np.abs(y0 - y1)) < 1e-5) + rhs_error = np.max(np.abs(y0 - y1)) + assert(field_error < 1e-5) + assert(traj_error < 1e-5) + assert(rhs_error < 1e-5) print('SUCCESS! Basic test passed.') return None diff --git a/bfps/test/test_bfps_resize.py b/bfps/test/test_bfps_resize.py new file mode 100644 index 0000000000000000000000000000000000000000..ce0a051da909a34b3be9d7ebd15e0d923c0a09f6 --- /dev/null +++ b/bfps/test/test_bfps_resize.py @@ -0,0 +1,113 @@ +#! /usr/bin/env python + +import os +import numpy as np +import h5py +import sys + +import bfps +from bfps import DNS +from bfps import PP + +import matplotlib.pyplot as plt +import pyfftw + + +def main(): + niterations = 2 + c = DNS() + c.launch( + ['NSVE', + '-n', '32', + '--src-simname', 'B32p1e4', + '--src-wd', bfps.lib_dir + '/test', + '--src-iteration', '0', + '--simname', 'dns_test', + '--np', '4', + '--ntpp', '1', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--wd', './'] + + sys.argv[1:]) + rr = PP() + rr.launch( + ['resize', + '--simname', 'dns_test', + '--new_nx', '64', + '--new_ny', '64', + '--new_nz', '64', + '--new_simname', 'pp_resize_test', + '--np', '4', + '--ntpp', '1', + '--iter0', '0', + '--iter1', '{0}'.format(niterations), + '--wd', './'] + + sys.argv[1:]) + f0 = h5py.File(c.get_checkpoint_0_fname(), 'r') + f1 = h5py.File('pp_resize_test_fields.h5', 'r') + d0 = f0['vorticity/complex/0'].value + d1 = f1['vorticity/complex/0'].value + small_kdata = pyfftw.n_byte_align_empty( + (32, 32, 17, 3), + pyfftw.simd_alignment, + dtype = c.ctype) + small_rdata = pyfftw.n_byte_align_empty( + (32, 32, 32, 3), + pyfftw.simd_alignment, + dtype = c.rtype) + small_plan = pyfftw.FFTW( + small_kdata.transpose((1, 0, 2, 3)), + small_rdata, + axes = (0, 1, 2), + direction = 'FFTW_BACKWARD', + threads = 4) + big_kdata = pyfftw.n_byte_align_empty( + (64, 64, 33, 3), + pyfftw.simd_alignment, + dtype = c.ctype) + big_rdata = pyfftw.n_byte_align_empty( + (64, 64, 64, 3), + pyfftw.simd_alignment, + dtype = c.rtype) + big_plan = pyfftw.FFTW( + big_kdata.transpose((1, 0, 2, 3)), + big_rdata, + axes = (0, 1, 2), + direction = 'FFTW_BACKWARD', + threads = 4) + small_kdata[:] = d0 + big_kdata[:] = d1 + small_plan.execute() + big_plan.execute() + + se = np.mean(small_rdata**2, axis = 3)**.5 + be = np.mean(big_rdata**2, axis = 3)**.5 + + f = plt.figure(figsize = (6, 4)) + a = f.add_subplot(231) + a.set_axis_off() + a.imshow(se[0]) + a = f.add_subplot(234) + a.set_axis_off() + a.imshow(be[0]) + a = f.add_subplot(232) + a.set_axis_off() + a.imshow(se[:, 0]) + a = f.add_subplot(235) + a.set_axis_off() + a.imshow(be[:, 0]) + a = f.add_subplot(233) + a.set_axis_off() + a.imshow(se[:, :, 0]) + a = f.add_subplot(236) + a.set_axis_off() + a.imshow(be[:, :, 0]) + f.tight_layout() + f.savefig('resize_test.pdf') + plt.close(f) + return None + +if __name__ == '__main__': + main() + diff --git a/bfps/test/test_fftw.py b/bfps/test/test_fftw.py new file mode 100644 index 0000000000000000000000000000000000000000..3de2d97df167567899fbf8b19c1123e5bf35cbe7 --- /dev/null +++ b/bfps/test/test_fftw.py @@ -0,0 +1,66 @@ +#! /usr/bin/env python + +import numpy as np +import h5py +import sys + +import bfps +from bfps import TEST + +try: + import matplotlib.pyplot as plt +except: + plt = None + +def main(): + niterations = 10 + nlist = [16, 32, 48, 24, 64, 12] + for ii in range(len(nlist)): + c = TEST() + c.launch( + ['symmetrize_test', + '--nx', str(nlist[ii]), + '--ny', str(nlist[(ii+1)%(len(nlist))]), + '--nz', str(nlist[(ii+2)%(len(nlist))]), + '--Lx', str(2+np.random.random()), + '--Ly', str(2+np.random.random()), + '--Lz', str(2+np.random.random()), + '--simname', 'fftw_vs_numpy_{0}'.format(ii), + '--np', '4', + '--ntpp', '1', + '--wd', './'] + + sys.argv[1:]) + df = h5py.File(c.simname + '.h5', 'r') + df = h5py.File(c.simname + '_fields.h5', 'r') + field1_complex = df['field1/complex/0'].value + field1_real = df['field1/real/0'].value + npoints = field1_real.size//3 + + np_field1_real = np.fft.irfftn(field1_complex, axes = (0, 1, 2)).transpose(1, 0, 2, 3) + L2normr = np.sqrt(np.mean(np.sum(field1_real**2, axis = 3))) + np_L2normr = np.sqrt(np.mean(np.sum(np_field1_real**2, axis = 3))) + err = np.max(np.abs(field1_real - np_field1_real*npoints)) / L2normr + assert(err < 1e-5) + + np_field1_complex = np.fft.rfftn(field1_real.transpose(1, 0, 2, 3), axes = (0, 1, 2)) / npoints + + L2norm0 = np.sqrt(np.sum(np.abs(field1_complex[:, :, 0])**2) + 2*np.sum(np.abs(field1_complex[:, :, 1:])**2)) + L2norm1 = np.sqrt(np.sum(np.abs(np_field1_complex[:, :, 0])**2) + 2*np.sum(np.abs(np_field1_complex[:, :, 1:])**2)) + err = np.max(np.abs(np_field1_complex - field1_complex)) / L2norm0 + assert(err < 1e-5) + + err = abs(L2normr - L2norm0) / L2norm0 + assert(err < 1e-5) + + if not type(plt) == type(None): + f = plt.figure() + a = f.add_subplot(121) + a.imshow(np.log(np.abs(np_field1_complex[:, :, 0, 0])), interpolation = 'nearest') + a = f.add_subplot(122) + a.imshow(np.log(np.abs(field1_complex[:, :, 0, 0])), interpolation = 'nearest') + f.savefig(c.simname + '_complex_slice_kx0.pdf') + return None + +if __name__ == '__main__': + main() + diff --git a/bfps/test/test_interpolation.py b/bfps/test/test_interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..eeb40248388d8a67d341f000b264bc9e7ac1dec0 --- /dev/null +++ b/bfps/test/test_interpolation.py @@ -0,0 +1,54 @@ +#! /usr/bin/env python + +import os +import numpy as np +import h5py +import sys + +import bfps +from bfps import TEST + +try: + import matplotlib.pyplot as plt + matplotlib_on = True +except ImportError: + matplotlib_on = False + + +def main(): + nparticles = 100 + c = TEST() + c.launch( + ['test_interpolation', + '-n', '32', + '--np', '4', + '--ntpp', '1', + #'--nparticles', '{0}'.format(nparticles), + '--wd', './'] + + sys.argv[3:]) + ifile = h5py.File( + 'test_input.h5', + 'r') + ofile = h5py.File( + 'test_output.h5', + 'r') + pos0 = ifile['tracers0/state/0'].value + pos1 = ofile['tracers0/position/0'].value + assert(np.max(np.abs(pos0-pos1) / np.abs(pos0)) <= 1e-5) + vort0 = ofile['tracers0/vorticity/0'].value + vel_gradient = ofile['tracers0/velocity_gradient/0'].value + vort1 = vort0.copy() + vort1[:, 0] = vel_gradient[:, 5] - vel_gradient[:, 7] + vort1[:, 1] = vel_gradient[:, 6] - vel_gradient[:, 2] + vort1[:, 2] = vel_gradient[:, 1] - vel_gradient[:, 3] + assert(np.max(np.abs(vort0-vort1) / np.abs(vort0)) <= 1e-5) + divergence = vel_gradient[:, 0] + vel_gradient[:, 4] + vel_gradient[:, 8] + divergence_error = np.abs(divergence) / (vel_gradient[:, 0]**2 + vel_gradient[:, 1]**2 + vel_gradient[:, 2]**2)**.5 + print('mean divergence error is ', np.mean(divergence_error)) + print('maximum divergence error is ', np.max(divergence_error)) + print('SUCCESS! Interpolated vorticity agrees with vorticity from interpolated velocity gradient.') + return None + +if __name__ == '__main__': + main() + diff --git a/bfps/test/test_particle_clouds.py b/bfps/test/test_particle_clouds.py new file mode 100644 index 0000000000000000000000000000000000000000..5d2045390f51e7f529f78a3eb7037acb3fcae3b9 --- /dev/null +++ b/bfps/test/test_particle_clouds.py @@ -0,0 +1,93 @@ +#! /usr/bin/env python +####################################################################### +# # +# Copyright 2019 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import os +import numpy as np +import h5py +import sys + +import bfps +from bfps import DNS + + +def main(): + nclouds = 10 + nparticles_per_cloud = 1000 + nparticles = nclouds*nparticles_per_cloud + niterations = 32 + c = DNS() + c.dns_type = 'NSVEparticles' + c.parameters['nparticles'] = nparticles + c.parameters['tracers1_integration_steps'] = 4 + c.generate_tracer_state(rseed = 2, species = 1) + del c.parameters['nparticles'] + del c.parameters['tracers1_integration_steps'] + ic_file = h5py.File(c.get_checkpoint_0_fname(), 'a') + ic_file['tracers0/state/0'] = ic_file['tracers1/state/0'].value.reshape(nclouds, nparticles_per_cloud, 3) + ic_file['tracers0/rhs/0'] = ic_file['tracers1/rhs/0'].value.reshape(4, nclouds, nparticles_per_cloud, 3) + ic_file.close() + c.launch( + ['NSVEparticles', + '-n', '32', + '--src-simname', 'B32p1e4', + '--forcing_type', 'linear', + '--src-wd', bfps.lib_dir + '/test', + '--src-iteration', '0', + '--np', '4', + '--ntpp', '1', + '--fftw_plan_rigor', 'FFTW_PATIENT', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--nparticles', '{0}'.format(nparticles), + '--njobs', '2', + '--wd', './']) + f0 = h5py.File( + os.path.join( + os.path.join(bfps.lib_dir, 'test'), + 'B32p1e4_checkpoint_0.h5'), + 'r') + f1 = h5py.File(c.get_checkpoint_0_fname(), 'r') + for iteration in [0, 32, 64]: + field0 = f0['vorticity/complex/{0}'.format(iteration)].value + field1 = f1['vorticity/complex/{0}'.format(iteration)].value + field_error = np.max(np.abs(field0 - field1)) + x0 = f0['tracers0/state/{0}'.format(iteration)].value + x1 = f1['tracers0/state/{0}'.format(iteration)].value.reshape(x0.shape) + traj_error = np.max(np.abs(x0 - x1)) + y0 = f0['tracers0/rhs/{0}'.format(iteration)].value + y1 = f1['tracers0/rhs/{0}'.format(iteration)].value.reshape(y0.shape) + rhs_error = np.max(np.abs(y0 - y1)) + assert(field_error < 1e-5) + assert(traj_error < 1e-5) + assert(rhs_error < 1e-5) + print('SUCCESS! Basic test passed.') + return None + +if __name__ == '__main__': + main() + diff --git a/bfps/test/test_particles.py b/bfps/test/test_particles.py new file mode 100644 index 0000000000000000000000000000000000000000..6d3abec14e1c822224290e247593eda9b02a8f6b --- /dev/null +++ b/bfps/test/test_particles.py @@ -0,0 +1,133 @@ +#! /usr/bin/env python + +import os +import numpy as np +import h5py +import sys + +import bfps +from bfps import DNS + +try: + import matplotlib.pyplot as plt + matplotlib_on = True +except ImportError: + matplotlib_on = False + + +def main(): + assert(sys.argv[1] in ['p2p_sampling']) + assert(sys.argv[2] in ['on', 'off']) + niterations = 32 + nparticles = 1000 + njobs = 1 + if sys.argv[2] == 'on': + c = DNS() + c.launch( + ['NSVEcomplex_particles', + '-n', '32', + '--src-simname', 'B32p1e4', + '--src-wd', bfps.lib_dir + '/test', + '--src-iteration', '0', + '--np', '4', + '--ntpp', '1', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--checkpoints_per_file', '{0}'.format(3), + '--nparticles', '{0}'.format(nparticles), + '--particle-rand-seed', '2', + '--njobs', '{0}'.format(njobs), + '--wd', './'] + + sys.argv[3:]) + if sys.argv[1] == 'p2p_sampling': + cf = h5py.File( + 'test_checkpoint_0.h5', + 'r') + pf = h5py.File( + 'test_particles.h5', + 'r') + if matplotlib_on: + # initial condition: + # show a histogram of the orientations + f = plt.figure() + a = f.add_subplot(111) + for iteration in range(1): + x = cf['tracers0/state/{0}'.format(iteration)][:, 3:] + hist, bins = np.histogram( + np.sum(x**2, axis = -1).flatten()**.5, + bins = np.linspace(0, 2, 40)) + bb = (bins[:-1] + bins[1:])/2 + pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0])) + a.plot(bb, pp, label = '{0}'.format(iteration)) + a.legend(loc = 'best') + f.tight_layout() + f.savefig('orientation_histogram.pdf') + plt.close(f) + # show a histogram of the positions + f = plt.figure() + a = f.add_subplot(111) + for iteration in range(0, niterations*njobs+1, niterations//2): + x = pf['tracers0/position/{0}'.format(iteration)].value + hist, bins = np.histogram( + np.sum(x**2, axis = -1).flatten()**.5, + bins = 40) + bb = (bins[:-1] + bins[1:])/2 + pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0])) + a.plot(bb, pp, label = '{0}'.format(iteration)) + a.legend(loc = 'best') + f.tight_layout() + f.savefig('position_histogram.pdf') + plt.close(f) + # show a histogram of the orientations + f = plt.figure() + a = f.add_subplot(111) + for iteration in range(0, niterations*njobs+1, niterations//2): + x = pf['tracers0/orientation/{0}'.format(iteration)].value + hist, bins = np.histogram( + np.sum(x**2, axis = -1).flatten()**.5, + bins = np.linspace(0, 2, 40)) + bb = (bins[:-1] + bins[1:])/2 + pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0])) + a.plot(bb, pp, label = '{0}'.format(iteration)) + a.legend(loc = 'best') + f.tight_layout() + f.savefig('orientation_histogram.pdf') + plt.close(f) + # compared sampled positions with checkpoint positions + for iteration in range(0, niterations*njobs+1, niterations): + x = pf['tracers0/position/{0}'.format(iteration)].value + s = cf['tracers0/state/{0}'.format(iteration)].value + distance = (np.max(np.abs(x - s[..., :3]) / + np.maximum(np.ones(x.shape), + np.maximum(np.abs(x), + np.abs(s[..., :3]))))) + assert(distance < 1e-14) + x = pf['tracers0/orientation/{0}'.format(iteration)].value + distance = (np.max(np.abs(x - s[..., 3:]) / + np.maximum(np.ones(x.shape), + np.maximum(np.abs(x), + np.abs(s[..., 3:]))))) + assert(distance < 1e-14) + # code relevant when velocity field is 0 everywhere. + # we check to see what happens to the orientation of the particles + # show a histogram of the orientations + f = plt.figure() + a = f.add_subplot(111) + for iteration in range(0, niterations*njobs+1, niterations//4): + x = pf['tracers0/orientation/{0}'.format(iteration)].value + hist, bins = np.histogram( + x.flatten(), + bins = 100) + bb = (bins[:-1] + bins[1:])/2 + pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0])) + a.plot(bb, pp, label = '{0}'.format(iteration)) + a.legend(loc = 'best') + f.tight_layout() + f.savefig('full_orientation_histogram.pdf') + plt.close(f) + return None + +if __name__ == '__main__': + main() + diff --git a/bfps/tools.py b/bfps/tools.py index 69756ec648409ab52d57930d26b1ab1ca8b942c1..0acf51b539826a4ff18a6b6458d6ac5f777344a1 100644 --- a/bfps/tools.py +++ b/bfps/tools.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -143,6 +142,19 @@ def generate_data_3D( a[ii] = 0 return a + +def generate_random_discontinuous_data_3D( + n0, n1, n2, + dtype = np.complex128, + p = 1.5, + amplitude = 0.5): + """returns the Fourier representation of a random field. + """ + assert(n0 % 2 == 0 and n1 % 2 == 0 and n2 % 2 == 0) + a = np.random.randn(n1, n0, n2) + b = np.fft.rfftn(a).astype(dtype) + return b + def randomize_phases(v): """randomize the phases of an FFTW complex field. @@ -190,10 +202,10 @@ def padd_with_zeros( """ if (type(odtype) == type(None)): odtype = a.dtype - assert(a.shape[0] <= n0 and - a.shape[1] <= n1 and + assert(a.shape[0] <= n1 and + a.shape[1] <= n0 and a.shape[2] <= n2//2+1) - b = np.zeros((n0, n1, n2//2 + 1) + a.shape[3:], dtype = odtype) + b = np.zeros((n1, n0, n2//2 + 1) + a.shape[3:], dtype = odtype) m0 = a.shape[1] m1 = a.shape[0] m2 = a.shape[2] diff --git a/machine_settings_py.py b/cmake/BFPSConfig.cmake.in similarity index 53% rename from machine_settings_py.py rename to cmake/BFPSConfig.cmake.in index 787f1d5a10b9b0b260b42a1da18d35e67c56dacc..bd2af7160bbd8583b4d6ebd8cd6d710fc6fdfb9f 100644 --- a/machine_settings_py.py +++ b/cmake/BFPSConfig.cmake.in @@ -1,6 +1,6 @@ ####################################################################### # # -# Copyright 2015 Max Planck Institute # +# Copyright 2019 Max Planck Institute # # for Dynamics and Self-Organization # # # # This file is part of bfps. # @@ -23,41 +23,45 @@ ####################################################################### +#----------------------------------------------------------------------------- +# +# BFPSConfig.cmake - BFPS CMake configuration file for external projects. +# +# This file is configured by BFPS and used by the BFPS.cmake module +# to load BFPS's settings for an external project. +# +@BFPS_CONFIG_INSTALL_ONLY@ -import os +# +SET(BFPS_VERSION "@BFPS_VERSION@") -######################################################################## -# these lists should be adapted for your different environment(s) -# personally, I have access to setups where my home folder is shared -# between different machines, including cluster and desktop, therefore -# I check the host name when choosing libraries etc. -# feel free to do your own thing to the copy of this file placed in -# ./config/bfps -######################################################################## +# +SET(HAVE_BFPS TRUE) +SET(BFPS_PREFIX "@CMAKE_INSTALL_PREFIX@") +SET(BFPS_INCLUDE_DIR "@CMAKE_INSTALL_PREFIX@/include") +SET(BFPS_LIBRARIES_DIR "@CMAKE_INSTALL_PREFIX@/lib") -hostname = os.getenv('HOSTNAME') +SET(BFPS_LINK_DIRECTORIES "@ALL_LINK_DIRS@") +SET(BFPS_INCLUDE_DIRECTORIES "@ALL_INCLUDE_DIRS@") -compiler = 'g++' -extra_compile_args = ['-Wall', '-O2', '-g', '-mtune=native', '-ffast-math', '-std=c++11'] -extra_libraries = ['hdf5'] -include_dirs = [] -library_dirs = [] +SET(BFPS_CXX_COMPILE_FLAGS "@CMAKE_CXX_COMPILE_FLAGS@") +SET(BFPS_CXX_COMPILER "@CMAKE_CXX_COMPILER@") +SET(BFPS_C_COMPILER "@CMAKE_C_COMPILER@") +SET(BFPS_EXE_LINKER_FLAGS "@CMAKE_EXE_LINKER_FLAGS@") +SET(BFPS_LIBS "@BFPS_LIBS@") +set(BFPS_DEFINITIONS @COMPILE_DEFINITIONS@) -if hostname == 'chichi-G': - include_dirs = ['/usr/local/include', - '/usr/include/mpich'] - library_dirs = ['/usr/local/lib', - '/usr/lib/mpich'] - extra_libraries += ['mpich'] +# +SET(BFPS_SOURCE_DIR "@BFPS_SOURCE_DIR@") -if hostname in ['tolima', 'misti']: - local_install_dir = '/scratch.local/chichi/installs' +# +SET(BFPS_BUILD_TYPE "@CMAKE_BUILD_TYPE@") - include_dirs = ['/usr/lib64/mpi/gcc/openmpi/include', - os.path.join(local_install_dir, 'include')] +# +SET(BFPS_HDF5_USE_SZIP "@BFPS_HDF5_USE_SZIP@") +SET(BFPS_HDF5_SZIP_LIB_PATH "@BFPS_HDF5_SZIP_LIB_PATH@") - library_dirs = ['/usr/lib64/mpi/gcc/openmpi/lib64', - os.path.join(local_install_dir, 'lib'), - os.path.join(local_install_dir, 'lib64')] - extra_libraries += ['mpi_cxx', 'mpi'] +# +set(BFPS_SRC_INCLUDE_DIRS "@BFPS_INCLUDE_DIRS@") +set(BFPS_BUILD_LIBRARY_DIRS "@BFPS_LIB_DIR@") diff --git a/cmake/morse/FindCommon.cmake b/cmake/morse/FindCommon.cmake new file mode 100644 index 0000000000000000000000000000000000000000..95d8c1f5404c0d7ea2384d84bd12c2e4a3cc3418 --- /dev/null +++ b/cmake/morse/FindCommon.cmake @@ -0,0 +1,47 @@ +### +# +# @copyright (c) 2018 Inria. All rights reserved. +# +### +# +# @file FindCommon.cmake +# +# @project MORSE +# MORSE is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Florent Pruvost +# @date 13-04-2018 +# +### + +# clean these variables before using them in CMAKE_REQUIRED_* variables in +# check_function_exists +macro(finds_remove_duplicates) + if (REQUIRED_DEFINITIONS) + list(REMOVE_DUPLICATES REQUIRED_DEFINITIONS) + endif() + if (REQUIRED_INCDIRS) + list(REMOVE_DUPLICATES REQUIRED_INCDIRS) + endif() + if (REQUIRED_FLAGS) + list(REMOVE_DUPLICATES REQUIRED_FLAGS) + endif() + if (REQUIRED_LDFLAGS) + list(REMOVE_DUPLICATES REQUIRED_LDFLAGS) + endif() + if (REQUIRED_LIBS) + list(REVERSE REQUIRED_LIBS) + list(REMOVE_DUPLICATES REQUIRED_LIBS) + list(REVERSE REQUIRED_LIBS) + endif() +endmacro() + +## +## @end file FindCommon +## diff --git a/cmake/morse/FindFFTW.cmake b/cmake/morse/FindFFTW.cmake new file mode 100644 index 0000000000000000000000000000000000000000..37450baea9f52a9a4e8a1236d6234d3c3840ba79 --- /dev/null +++ b/cmake/morse/FindFFTW.cmake @@ -0,0 +1,832 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2018 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find FFTW Version 3 include dirs and libraries +# Default configuration will find the real double precision fftw library version +# without THREADS|OMP. +# Use this module by invoking find_package with the form: +# find_package(FFTW +# [REQUIRED] # Fail with error if fftw is not found +# [COMPONENTS MKL] +# +# COMPONENTS can be some of the following: +# - MKL: to detect the FFTW from Intel MKL +# - ESSL: to detect the FFTW from IBM ESSL +# - THREADS: to detect the Threads version of FFTW +# - OMP: to detect the OpenMP version of FFTW +# - SIMPLE: to detect the FFTW simple precision fftw3f +# - LONG: to detect the FFTW long double precision fftw3l +# - QUAD: to detect the FFTW quadruple precision fftw3q +# +# This module finds headers and fftw library. +# Results are reported in variables: +# FFTW_FOUND - True if headers and requested libraries were found +# FFTW_CFLAGS_OTHER - fftw compiler flags without headers paths +# FFTW_LDFLAGS_OTHER - fftw linker flags without libraries +# FFTW_INCLUDE_DIRS - fftw include directories +# FFTW_LIBRARY_DIRS - fftw link directories +# FFTW_LIBRARIES - fftw libraries to be linked (absolute path) +# FFTW_CFLAGS_OTHER_DEP - fftw + dependencies compiler flags without headers paths +# FFTW_LDFLAGS_OTHER_DEP - fftw + dependencies linker flags without libraries +# FFTW_INCLUDE_DIRS_DEP - fftw + dependencies include directories +# FFTW_LIBRARY_DIRS_DEP - fftw + dependencies link directories +# FFTW_LIBRARIES_DEP - fftw + dependencies libraries +# +# FFTW_FOUND_WITH_PKGCONFIG - True if found with pkg-config +# if found with pkg-config the following variables are set +# <PREFIX> = FFTW3F or FFTW3 or FFTW3L or FFTW3Q +# <XPREFIX> = <PREFIX> for common case +# <XPREFIX> = <PREFIX>_STATIC for static linking +# <XPREFIX>_FOUND ... set to 1 if module(s) exist +# <XPREFIX>_LIBRARIES ... only the libraries (w/o the '-l') +# <XPREFIX>_LIBRARY_DIRS ... the paths of the libraries (w/o the '-L') +# <XPREFIX>_LDFLAGS ... all required linker flags +# <XPREFIX>_LDFLAGS_OTHER ... all other linker flags +# <XPREFIX>_INCLUDE_DIRS ... the '-I' preprocessor flags (w/o the '-I') +# <XPREFIX>_CFLAGS ... all required cflags +# <XPREFIX>_CFLAGS_OTHER ... the other compiler flags +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DFFTW_DIR=path/to/fftw): +# FFTW_DIR - Where to find the base directory of fftw +# FFTW_INCDIR - Where to find the header files +# FFTW_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: FFTW_DIR, FFTW_INCDIR, FFTW_LIBDIR +# For MKL case and if no paths are given as hints, we will try to use the MKLROOT +# environment variable + +#============================================================================= +# Copyright 2012-2018 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2018 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +# Common macros to use in finds +include(FindInit) + +if (NOT FFTW_FOUND) + set(FFTW_DIR "" CACHE PATH "Installation directory of FFTW library given by user") + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "A cache variable, namely FFTW_DIR, has been set to specify the install directory of FFTW") + endif() +endif() + +# Set the version to find +set(FFTW_LOOK_FOR_MKL OFF) +set(FFTW_LOOK_FOR_ESSL OFF) +set(FFTW_LOOK_FOR_THREADS OFF) +set(FFTW_LOOK_FOR_OMP OFF) +set(FFTW_LOOK_FOR_FFTW_SIMPLE OFF) +set(FFTW_LOOK_FOR_FFTW_LONG OFF) +set(FFTW_LOOK_FOR_FFTW_QUAD OFF) + +if( FFTW_FIND_COMPONENTS ) + foreach( component ${FFTW_FIND_COMPONENTS} ) + if (${component} STREQUAL "THREADS") + # means we look for the Threads version of FFTW + set(FFTW_LOOK_FOR_THREADS ON) + endif() + if (${component} STREQUAL "OMP") + # means we look for the OpenMP version of FFTW + set(FFTW_LOOK_FOR_OMP ON) + endif() + if (${component} STREQUAL "SIMPLE") + # means we look for FFTW simple precision (fftw3f) + set(FFTW_LOOK_FOR_FFTW_SIMPLE ON) + set(FFTW_LOOK_FOR_FFTW_LONG OFF) + set(FFTW_LOOK_FOR_FFTW_QUAD OFF) + endif() + if (${component} STREQUAL "LONG") + # means we look for FFTW long double precision (fftw3l) + set(FFTW_LOOK_FOR_FFTW_SIMPLE OFF) + set(FFTW_LOOK_FOR_FFTW_LONG ON) + set(FFTW_LOOK_FOR_FFTW_QUAD OFF) + endif() + if (${component} STREQUAL "QUAD") + # means we look for FFTW quad precision (fftw3q) + set(FFTW_LOOK_FOR_FFTW_SIMPLE OFF) + set(FFTW_LOOK_FOR_FFTW_LONG OFF) + set(FFTW_LOOK_FOR_FFTW_QUAD ON) + endif() + if (${component} STREQUAL "MKL") + # means we look for the Intel MKL version of FFTW + set(FFTW_LOOK_FOR_MKL ON) + if (FFTW_LOOK_FOR_FFTW_LONG) + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "Looking for FFTW -- long precision functions do not exist in MKL FFTW") + endif() + set(FFTW_LOOK_FOR_FFTW_LONG OFF) + endif() + if (FFTW_LOOK_FOR_FFTW_QUAD) + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "Looking for FFTW -- quadruple functions do not exist in MKL FFTW") + endif() + set(FFTW_LOOK_FOR_FFTW_QUAD OFF) + endif() + endif() + if (${component} STREQUAL "ESSL") + # means we look for the Intel MKL version of FFTW + set(FFTW_LOOK_FOR_ESSL ON) + if (FFTW_LOOK_FOR_FFTW_LONG) + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "Looking for FFTW -- long precision functions do not exist in FFTW_ESSL") + endif() + set(FFTW_LOOK_FOR_FFTW_LONG OFF) + endif() + if (FFTW_LOOK_FOR_FFTW_QUAD) + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "Looking for FFTW -- quadruple functions do not exist in FFTW_ESSL") + endif() + set(FFTW_LOOK_FOR_FFTW_QUAD OFF) + endif() + if (FFTW_LOOK_FOR_OMP) + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "Looking for FFTW -- FFTW_ESSL does not use OpenMP") + endif() + set(FFTW_LOOK_FOR_OMP OFF) + endif() + endif() + endforeach() +endif() + +if (FFTW_LOOK_FOR_THREADS) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "FFTW looks for threads") + endif() + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_THREADS) + find_package(Threads REQUIRED) + else() + find_package(Threads) + endif() +endif() + +if (FFTW_LOOK_FOR_OMP) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "FFTW looks for openmp") + endif() + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_OMP) + find_package(OpenMP REQUIRED) + else() + find_package(OpenMP) + endif() +endif() + +if (FFTW_LOOK_FOR_MKL) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "FFTW looks for threads and Intel MKL") + endif() + if (FFTW_LOOK_FOR_THREADS) + set(BLA_VENDOR "Intel10_64lp") + else() + set(BLA_VENDOR "Intel10_64lp_seq") + endif() + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL) + find_package(Threads REQUIRED) + find_package(BLAS REQUIRED) + else() + find_package(Threads) + find_package(BLAS) + endif() +endif() + +if (FFTW_LOOK_FOR_ESSL) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "FFTW looks for IBM ESSL") + endif() + if (FFTW_LOOK_FOR_THREADS) + set(BLA_VENDOR "IBMESSLMT") + else() + set(BLA_VENDOR "IBMESSL") + endif() + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_ESSL) + find_package(BLAS REQUIRED) + else() + find_package(BLAS) + endif() +endif() + + +if( THREADS_FOUND ) + libraries_absolute_path(CMAKE_THREAD_LIBS_INIT "") +endif () + +set(ENV_FFTW_DIR "$ENV{FFTW_DIR}") +set(ENV_FFTW_INCDIR "$ENV{FFTW_INCDIR}") +set(ENV_FFTW_LIBDIR "$ENV{FFTW_LIBDIR}") +set(FFTW_GIVEN_BY_USER "FALSE") +if ( FFTW_DIR OR ( FFTW_INCDIR AND FFTW_LIBDIR) OR ENV_FFTW_DIR OR (ENV_FFTW_INCDIR AND ENV_FFTW_LIBDIR) ) + set(FFTW_GIVEN_BY_USER "TRUE") +endif() + + +# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) +# ------------------------------------------------------------------------------------- +if (NOT FFTW_LOOK_FOR_MKL AND NOT FFTW_LOOK_FOR_ESSL) + include(FindPkgConfig) + find_package(PkgConfig QUIET) + if( PKG_CONFIG_EXECUTABLE AND NOT FFTW_GIVEN_BY_USER ) + + set(FFTW_INCLUDE_DIRS) + set(FFTW_LIBRARY_DIRS) + set(FFTW_LIBRARIES) + + if(FFTW_LOOK_FOR_FFTW_SIMPLE) + pkg_search_module(FFTW3F fftw3f) + pkg_search_module(FFTW3 fftw3) + if (FFTW3F_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3F - found using PkgConfig") + endif() + if (FFTW3F_LIBRARIES) + find_pkgconfig_libraries_absolute_path(FFTW3F) + list(APPEND FFTW_LIBRARIES "${FFTW3F_LIBRARIES}") + endif() + if(FFTW3F_INCLUDE_DIRS) + list(APPEND FFTW_INCLUDE_DIRS "${FFTW3F_INCLUDE_DIRS}") + else() + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "FFTW3F_INCLUDE_DIRS is empty using PkgConfig." + "Perhaps the path to fftw3f headers is already present in your" + "CPATH/C(PLUS)_INCLUDE_PATH environment variables.") + endif() + endif() + if(FFTW3F_LIBRARY_DIRS) + list(APPEND FFTW_LIBRARY_DIRS "${FFTW3F_LIBRARY_DIRS}") + endif() + else(FFTW3F_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3F - not found using PkgConfig." + "\n Perhaps you should add the directory containing fftw3f.pc to" + "\n the PKG_CONFIG_PATH environment variable.") + endif() + endif(FFTW3F_FOUND) + elseif(FFTW_LOOK_FOR_FFTW_LONG) + pkg_search_module(FFTW3L fftw3l) + pkg_search_module(FFTW3 fftw3) + if (FFTW3L_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3L - found using PkgConfig") + endif() + if (FFTW3L_LIBRARIES) + find_pkgconfig_libraries_absolute_path(FFTW3L) + list(APPEND FFTW_LIBRARIES "${FFTW3L_LIBRARIES}") + endif() + if(FFTW3L_INCLUDE_DIRS) + list(APPEND FFTW_INCLUDE_DIRS "${FFTW3L_INCLUDE_DIRS}") + else() + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "FFTW3L_INCLUDE_DIRS is empty using PkgConfig." + "Perhaps the path to fftw3l headers is already present in your" + "CPATH/C(PLUS)_INCLUDE_PATH environment variables.") + endif() + endif() + if(FFTW3L_LIBRARY_DIRS) + list(APPEND FFTW_LIBRARY_DIRS "${FFTW3L_LIBRARY_DIRS}") + endif() + else(FFTW3L_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3L - not found using PkgConfig." + "\n Perhaps you should add the directory containing fftw3l.pc to" + "\n the PKG_CONFIG_PATH environment variable.") + endif() + endif(FFTW3L_FOUND) + elseif(FFTW_LOOK_FOR_FFTW_QUAD) + pkg_search_module(FFTW3Q fftw3q) + pkg_search_module(FFTW3 fftw3) + if (FFTW3Q_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3Q - found using PkgConfig") + endif() + if (FFTW3Q_LIBRARIES) + find_pkgconfig_libraries_absolute_path(FFTW3Q) + list(APPEND FFTW_LIBRARIES "${FFTW3Q_LIBRARIES}") + endif() + if(FFTW3Q_INCLUDE_DIRS) + list(APPEND FFTW_INCLUDE_DIRS "${FFTW3Q_INCLUDE_DIRS}") + else() + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "FFTW3Q_INCLUDE_DIRS is empty using PkgConfig." + "Perhaps the path to fftw3q headers is already present in your" + "CPATH/C(PLUS)_INCLUDE_PATH environment variables.") + endif() + endif() + if(FFTW3Q_LIBRARY_DIRS) + list(APPEND FFTW_LIBRARY_DIRS "${FFTW3Q_LIBRARY_DIRS}") + endif() + else(FFTW3Q_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3Q - not found using PkgConfig." + "\n Perhaps you should add the directory containing fftw3q.pc to" + "\n the PKG_CONFIG_PATH environment variable.") + endif() + endif(FFTW3Q_FOUND) + else() + pkg_search_module(FFTW3 fftw3) + if (FFTW3_FOUND AND FFTW3_LIBRARIES) + find_pkgconfig_libraries_absolute_path(FFTW3) + endif() + endif() + if (FFTW3_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3 - found using PkgConfig") + endif() + if (FFTW3_LIBRARIES) + find_pkgconfig_libraries_absolute_path(FFTW3) + list(APPEND FFTW_LIBRARIES "${FFTW3_LIBRARIES}") + endif() + if(FFTW3_INCLUDE_DIRS) + list(APPEND FFTW_INCLUDE_DIRS "${FFTW3_INCLUDE_DIRS}") + else() + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "FFTW3_INCLUDE_DIRS is empty using PkgConfig." + "Perhaps the path to fftw3 headers is already present in your" + "CPATH/C(PLUS)_INCLUDE_PATH environment variables.") + endif() + endif() + if(FFTW3_LIBRARY_DIRS) + list(APPEND FFTW_LIBRARY_DIRS "${FFTW3_LIBRARY_DIRS}") + endif() + else(FFTW3_FOUND) + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW3 - not found using PkgConfig." + "\n Perhaps you should add the directory containing fftw3.pc to" + "\n the PKG_CONFIG_PATH environment variable.") + endif() + endif(FFTW3_FOUND) + + if (FFTW_FOUND AND FFTW_LIBRARIES) + set(FFTW_FOUND_WITH_PKGCONFIG "TRUE") + else() + set(FFTW_FOUND_WITH_PKGCONFIG "FALSE") + endif() + + endif( PKG_CONFIG_EXECUTABLE AND NOT FFTW_GIVEN_BY_USER ) + +endif(NOT FFTW_LOOK_FOR_MKL AND NOT FFTW_LOOK_FOR_ESSL) + +if( (NOT PKG_CONFIG_EXECUTABLE) OR + (PKG_CONFIG_EXECUTABLE AND NOT FFTW_FOUND) OR + FFTW_GIVEN_BY_USER OR + FFTW_LOOK_FOR_MKL OR + FFTW_LOOK_FOR_ESSL + ) + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_MKLROOT "$ENV{MKLROOT}") + set(ENV_FFTW_DIR "$ENV{FFTW_DIR}") + set(ENV_FFTW_INCDIR "$ENV{FFTW_INCDIR}") + if(ENV_FFTW_INCDIR) + list(APPEND _inc_env "${ENV_FFTW_INCDIR}") + elseif(ENV_FFTW_DIR) + list(APPEND _inc_env "${ENV_FFTW_DIR}") + list(APPEND _inc_env "${ENV_FFTW_DIR}/include") + list(APPEND _inc_env "${ENV_FFTW_DIR}/include/fftw") + else() + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include/fftw") + endif() + # system variables + if(WIN32) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") + + if (FFTW_LOOK_FOR_ESSL) + set(FFTW3_HEADER_TO_FIND "fftw3_essl.h") + else() + set(FFTW3_HEADER_TO_FIND "fftw3.h") + endif() + + # Try to find the fftw header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(FFTW_INCDIR) + set(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS "FFTW_${FFTW3_HEADER_TO_FIND}_DIRS-NOTFOUND") + find_path(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS + NAMES ${FFTW3_HEADER_TO_FIND} + HINTS ${FFTW_INCDIR}) + else() + if(FFTW_DIR) + set(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS "FFTW_${FFTW3_HEADER_TO_FIND}_DIRS-NOTFOUND") + find_path(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS + NAMES ${FFTW3_HEADER_TO_FIND} + HINTS ${FFTW_DIR} + PATH_SUFFIXES "include" "include/fftw") + else() + set(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS "FFTW_${FFTW3_HEADER_TO_FIND}_DIRS-NOTFOUND") + find_path(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS + NAMES ${FFTW3_HEADER_TO_FIND} + HINTS ${PATH_TO_LOOK_FOR} + PATH_SUFFIXES "fftw") + endif() + endif() + mark_as_advanced(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS) + + # Add path to cmake variable + # ------------------------------------ + if (FFTW_${FFTW3_HEADER_TO_FIND}_DIRS) + set(FFTW_INCLUDE_DIRS "${FFTW_${FFTW3_HEADER_TO_FIND}_DIRS}") + else () + set(FFTW_INCLUDE_DIRS "FFTW_INCLUDE_DIRS-NOTFOUND") + if(NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW -- ${FFTW3_HEADER_TO_FIND} not found") + endif() + endif () + + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + set(ENV_FFTW_LIBDIR "$ENV{FFTW_LIBDIR}") + if(ENV_FFTW_LIBDIR) + list(APPEND _lib_env "${ENV_FFTW_LIBDIR}") + elseif(ENV_FFTW_DIR) + list(APPEND _lib_env "${ENV_FFTW_DIR}") + list(APPEND _lib_env "${ENV_FFTW_DIR}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _lib_env "${ENV_FFTW_DIR}/lib64") + list(APPEND _lib_env "${ENV_FFTW_DIR}/lib/intel64") + else() + list(APPEND _lib_env "${ENV_FFTW_DIR}/lib32") + list(APPEND _lib_env "${ENV_FFTW_DIR}/lib/ia32") + endif() + else() + if (ENV_MKLROOT) + list(APPEND _lib_env "${ENV_MKLROOT}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _lib_env "${ENV_MKLROOT}/lib64") + list(APPEND _lib_env "${ENV_MKLROOT}/lib/intel64") + else() + list(APPEND _lib_env "${ENV_MKLROOT}/lib32") + list(APPEND _lib_env "${ENV_MKLROOT}/lib/ia32") + endif() + endif() + list(APPEND _lib_env "$ENV{LIBRARY_PATH}") + if(WIN32) + string(REPLACE ":" ";" _lib_env2 "$ENV{LIB}") + elseif(APPLE) + string(REPLACE ":" ";" _lib_env2 "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env2 "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${_lib_env2}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + list(REMOVE_DUPLICATES _lib_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_lib_env}") + + if(FFTW_LOOK_FOR_FFTW_SIMPLE) + set(FFTW_PREC "f") + set(FFTW_PREC_TESTFUNC "s") + elseif(FFTW_LOOK_FOR_FFTW_LONG) + set(FFTW_PREC "l") + set(FFTW_PREC_TESTFUNC "l") + elseif(FFTW_LOOK_FOR_FFTW_QUAD) + set(FFTW_PREC "q") + set(FFTW_PREC_TESTFUNC "q") + else() + set(FFTW_PREC "") + set(FFTW_PREC_TESTFUNC "d") + endif() + + set(FFTW_LIBRARIES "") + set(FFTW_LIBRARY_DIRS "") + + if(NOT FFTW_LOOK_FOR_MKL) + + if (FFTW_LOOK_FOR_THREADS) + set(FFTW_libs_to_find "fftw3${FFTW_PREC}_threads;fftw3${FFTW_PREC};fftw3") + elseif (FFTW_LOOK_FOR_OMP) + set(FFTW_libs_to_find "fftw3${FFTW_PREC}_omp;fftw3${FFTW_PREC};fftw3") + else() + set(FFTW_libs_to_find "fftw3${FFTW_PREC};fftw3") + endif() + if (FFTW_LOOK_FOR_FFTW_QUAD) + if (NOT FFTW_LOOK_FOR_MKL AND NOT FFTW_LOOK_FOR_ESSL) + list(APPEND FFTW_libs_to_find "quadmath") + endif() + endif() + + if (FFTW_LOOK_FOR_ESSL) + set(FFTW_libs_to_find "fftw3_essl") + endif() + + # Try to find the fftw lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(FFTW_LIBDIR) + foreach(fftw_lib ${FFTW_libs_to_find}) + set(FFTW_${fftw_lib}_LIBRARY "FFTW_${fftw_lib}_LIBRARY-NOTFOUND") + find_library(FFTW_${fftw_lib}_LIBRARY + NAMES ${fftw_lib} + HINTS ${FFTW_LIBDIR}) + endforeach() + else() + if(FFTW_DIR) + foreach(fftw_lib ${FFTW_libs_to_find}) + set(FFTW_${fftw_lib}_LIBRARY "FFTW_${fftw_lib}_LIBRARY-NOTFOUND") + find_library(FFTW_${fftw_lib}_LIBRARY + NAMES ${fftw_lib} + HINTS ${FFTW_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(fftw_lib ${FFTW_libs_to_find}) + set(FFTW_${fftw_lib}_LIBRARY "FFTW_${fftw_lib}_LIBRARY-NOTFOUND") + find_library(FFTW_${fftw_lib}_LIBRARY + NAMES ${fftw_lib} + HINTS ${PATH_TO_LOOK_FOR}) + endforeach() + endif() + endif() + + # If found, add path to cmake variable + # ------------------------------------ + foreach(fftw_lib ${FFTW_libs_to_find}) + + if (FFTW_${fftw_lib}_LIBRARY) + get_filename_component(${fftw_lib}_lib_path "${FFTW_${fftw_lib}_LIBRARY}" PATH) + # set cmake variables + list(APPEND FFTW_LIBRARIES "${FFTW_${fftw_lib}_LIBRARY}") + list(APPEND FFTW_LIBRARY_DIRS "${${fftw_lib}_lib_path}") + else () + list(APPEND FFTW_LIBRARIES "${FFTW_${fftw_lib}_LIBRARY}") + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW -- lib ${fftw_lib} not found") + endif() + endif () + mark_as_advanced(FFTW_${fftw_lib}_LIBRARY) + + endforeach() + + # check if one lib is NOTFOUND + foreach(lib ${FFTW_LIBRARIES}) + if (NOT lib) + set(FFTW_LIBRARIES "FFTW_LIBRARIES-NOTFOUND") + endif() + endforeach() + + endif(NOT FFTW_LOOK_FOR_MKL) + + if (FFTW_LOOK_FOR_MKL OR FFTW_LOOK_FOR_ESSL) + + # FFTW relies on blas libs + if (FFTW_LOOK_FOR_THREADS) + if (FFTW_LOOK_FOR_MKL) + if (BLAS_LIBRARIES_PAR) + list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_PAR}") + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Multithreaded FFTW has been found: ${FFTW_LIBRARIES}") + endif() + else() + if (NOT FFTW_FIND_QUIETLY) + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL) + message(FATAL_ERROR "FFTW is required but not found.") + else() + message(STATUS "Multithreaded FFTW not found.") + endif() + endif() + endif(BLAS_LIBRARIES_PAR) + elseif (FFTW_LOOK_FOR_ESSL) + if (FFTW_LIBRARIES AND BLAS_LIBRARIES_PAR) + list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_PAR}") + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "Multithreaded FFTW has been found: ${FFTW_LIBRARIES}") + endif() + else() + if (NOT FFTW_FIND_QUIETLY) + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL) + message(FATAL_ERROR "FFTW is required but not found.") + else() + message(STATUS "Multithreaded FFTW not found.") + endif() + endif() + endif(FFTW_LIBRARIES AND BLAS_LIBRARIES_PAR) + endif() + else(FFTW_LOOK_FOR_THREADS) + if (FFTW_LOOK_FOR_MKL) + if (BLAS_LIBRARIES_SEQ) + list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_SEQ}") + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "FFTW has been found: ${FFTW_LIBRARIES}") + endif() + else() + if (NOT FFTW_FIND_QUIETLY) + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL) + message(FATAL_ERROR "FFTW is required but not found.") + else() + message(STATUS "FFTW not found.") + endif() + endif() + endif(BLAS_LIBRARIES_SEQ) + elseif (FFTW_LOOK_FOR_ESSL) + if (FFTW_LIBRARIES AND BLAS_LIBRARIES_SEQ) + list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_SEQ}") + if (NOT FFTW_FIND_QUIETLY) + message(STATUS "FFTW has been found: ${FFTW_LIBRARIES}") + endif() + else() + if (NOT FFTW_FIND_QUIETLY) + if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL) + message(FATAL_ERROR "FFTW is required but not found.") + else() + message(STATUS "FFTW not found.") + endif() + endif() + endif(FFTW_LIBRARIES AND BLAS_LIBRARIES_SEQ) + endif() + endif(FFTW_LOOK_FOR_THREADS) + + if (BLAS_LIBRARY_DIRS) + list(APPEND FFTW_LIBRARY_DIRS "${BLAS_LIBRARY_DIRS}") + else() + if (NOT FFTW_FIND_QUIETLY) + message(WARNING "FFTW_LIBRARY_DIRS may not be complete because BLAS_LIBRARY_DIRS is empty.") + endif() + endif() + + endif(FFTW_LOOK_FOR_MKL OR FFTW_LOOK_FOR_ESSL) + + list(REMOVE_DUPLICATES FFTW_INCLUDE_DIRS) + list(REMOVE_DUPLICATES FFTW_LIBRARY_DIRS) + + # check if one lib is NOTFOUND + foreach(lib ${FFTW_LIBRARIES}) + if (NOT lib) + set(FFTW_LIBRARIES "FFTW_LIBRARIES-NOTFOUND") + endif() + endforeach() + +endif( (NOT PKG_CONFIG_EXECUTABLE) OR + (PKG_CONFIG_EXECUTABLE AND NOT FFTW_FOUND) OR + FFTW_GIVEN_BY_USER OR + FFTW_LOOK_FOR_MKL OR + FFTW_LOOK_FOR_ESSL + ) + +# check a function to validate the find +if(FFTW_LIBRARIES) + + set(REQUIRED_FLAGS) + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # FFTW + if (FFTW_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${FFTW_INCLUDE_DIRS}") + endif() + if (FFTW_CFLAGS_OTHER) + set(REQUIRED_FLAGS "${FFTW_CFLAGS_OTHER}") + endif() + if (FFTW_LDFLAGS_OTHER) + set(REQUIRED_LDFLAGS "${FFTW_LDFLAGS_OTHER}") + endif() + if (FFTW_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${FFTW_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${FFTW_LIBRARIES}") + # THREADS + if (FFTW_LOOK_FOR_THREADS) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + endif() + # OMP + if(FFTW_LOOK_FOR_OMP) + list(APPEND REQUIRED_FLAGS "${OPENMP_C_FLAGS}") + endif() + # MKL + if(FFTW_LOOK_FOR_MKL) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + list(APPEND REQUIRED_LDFLAGS "-Wl,--no-as-needed") + endif() + endif() + # m + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + if (REQUIRED_FLAGS) + set(REQUIRED_FLAGS_COPY "${REQUIRED_FLAGS}") + set(REQUIRED_FLAGS) + set(REQUIRED_DEFINITIONS) + foreach(_flag ${REQUIRED_FLAGS_COPY}) + if (_flag MATCHES "^-D") + list(APPEND REQUIRED_DEFINITIONS "${_flag}") + endif() + string(REGEX REPLACE "^-D.*" "" _flag "${_flag}") + list(APPEND REQUIRED_FLAGS "${_flag}") + endforeach() + endif() + finds_remove_duplicates() + set(CMAKE_REQUIRED_DEFINITIONS "${REQUIRED_DEFINITIONS}") + set(CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(FFTW_WORKS CACHE) + include(CheckFunctionExists) + if (FFTW_LOOK_FOR_ESSL) + check_function_exists(${FFTW_PREC_TESTFUNC}fftw_execute FFTW_WORKS) + else() + check_function_exists(${FFTW_PREC_TESTFUNC}fftw_execute_ FFTW_WORKS) + endif() + mark_as_advanced(FFTW_WORKS) + + if(FFTW_WORKS) + # save link with dependencies + set(FFTW_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(FFTW_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(FFTW_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(FFTW_CFLAGS_OTHER_DEP "${REQUIRED_FLAGS}") + set(FFTW_LDFLAGS_OTHER_DEP "${REQUIRED_LDFLAGS}") + else() + if(NOT FFTW_FIND_QUIETLY) + message(STATUS "Looking for FFTW : test of ${FFTW_PREC_TESTFUNC}fftw_execute_ with fftw library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "CMAKE_REQUIRED_FLAGS: ${CMAKE_REQUIRED_FLAGS}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(FFTW_LIBRARIES) + +if (FFTW_LIBRARIES) + list(GET FFTW_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (NOT FFTW_LIBRARY_DIRS) + set(FFTW_LIBRARY_DIRS "${first_lib_path}") + endif() + if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") + string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") + set(FFTW_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of FFTW library" FORCE) + else() + set(FFTW_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of FFTW library" FORCE) + endif() +endif() +mark_as_advanced(FFTW_DIR) +mark_as_advanced(FFTW_DIR_FOUND) + +# check that FFTW has been found +# ------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFTW DEFAULT_MSG + FFTW_LIBRARIES + FFTW_WORKS) diff --git a/cmake/morse/FindHeadersAndLibs.cmake b/cmake/morse/FindHeadersAndLibs.cmake new file mode 100644 index 0000000000000000000000000000000000000000..64144bdbf8a35f966f1ac802e5765e6ad81abf7c --- /dev/null +++ b/cmake/morse/FindHeadersAndLibs.cmake @@ -0,0 +1,94 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# @file FindHeadersAndLibs.cmake +# +# @project MORSE +# MORSE is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 0.9.0 +# @author Cedric Castagnede +# @author Emmanuel Agullo +# @author Mathieu Faverge +# @author Florent Pruvost +# @date 13-07-2012 +# +### + +# Some macros to print status when search for headers and libs +include(PrintFindStatus) + +function(FindHeader _libname _header_to_find) + + # save _libname upper and lower case + string(TOUPPER ${_libname} LIBNAME) + string(TOLOWER ${_libname} libname) + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + + # Try to find the _header_to_find in the given paths + # -------------------------------------------------- + # call cmake macro to find the header path + if(${LIBNAME}_INCDIR) + set(${LIBNAME}_${_header_to_find}_DIRS "${LIBNAME}_${_header_to_find}_DIRS-NOTFOUND") + find_path(${LIBNAME}_${_header_to_find}_DIRS + NAMES ${_header_to_find} + HINTS ${${LIBNAME}_INCDIR}) + elseif(${LIBNAME}_DIR) + set(${LIBNAME}_${_header_to_find}_DIRS "${LIBNAME}_${_header_to_find}_DIRS-NOTFOUND") + find_path(${LIBNAME}_${_header_to_find}_DIRS + NAMES ${_header_to_find} + HINTS ${${LIBNAME}_DIR} + PATH_SUFFIXES include) + else() + set(${LIBNAME}_${_header_to_find}_DIRS "${LIBNAME}_${_header_to_find}_DIRS-NOTFOUND") + find_path(${LIBNAME}_${_header_to_find}_DIRS + NAMES ${_header_to_find} + HINTS ${_inc_env}) + endif() + mark_as_advanced(${LIBNAME}_${_header_to_find}_DIRS) + + # Print status if not found + # ------------------------- + if (NOT ${LIBNAME}_${_header_to_find}_DIRS) + Print_Find_Header_Status(${libname} ${_header_to_find}) + endif () + +endfunction(FindHeader) + + +## +## @end file FindHeadersAndLibs.cmake +## diff --git a/cmake/morse/FindInit.cmake b/cmake/morse/FindInit.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e59d41a077848029e04065d5f46bba57bcf0277d --- /dev/null +++ b/cmake/morse/FindInit.cmake @@ -0,0 +1,45 @@ +### +# +# @copyright (c) 2018 Inria. All rights reserved. +# +### +# +# @file FindInit.cmake +# +# @project MORSE +# MORSE is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Florent Pruvost +# @date 24-04-2018 +# +### + + +# This include is required to check symbols of libs +include(CheckFunctionExists) + +# This include is required to check defines in headers +include(CheckIncludeFiles) + +# Factorize some piece of code +include(FindCommon) + +# To find headers and libs +include(FindHeadersAndLibs) + +# To transform relative path into absolute for a list of libraries +include(LibrariesAbsolutePath) +include(FindPkgconfigLibrariesAbsolutePath) + +# Some macros to print status when search for headers and libs +include(PrintFindStatus) + +## +## @end file FindInit.cmake +## diff --git a/cmake/morse/FindPkgconfigLibrariesAbsolutePath.cmake b/cmake/morse/FindPkgconfigLibrariesAbsolutePath.cmake new file mode 100644 index 0000000000000000000000000000000000000000..51b08ce59853459f493a0892874f71678467c392 --- /dev/null +++ b/cmake/morse/FindPkgconfigLibrariesAbsolutePath.cmake @@ -0,0 +1,99 @@ +### +# +# @copyright (c) 2018 Inria. All rights reserved. +# +### +# +# @file FindPkgconfigLibrariesAbsolutePath.cmake +# +# @project MORSE +# MORSE is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Florent Pruvost +# @date 06-04-2018 +# +### + +# Transform relative path into absolute path for libraries found with the +# pkg_search_module cmake macro +# _prefix: the name of the CMake variable used when pkg_search_module was called +# e.g. for pkg_search_module(BLAS blas) _prefix would be BLAS +macro(FIND_PKGCONFIG_LIBRARIES_ABSOLUTE_PATH _prefix) + list(APPEND _lib_env "$ENV{LIBRARY_PATH}") + if(WIN32) + string(REPLACE ":" ";" _lib_env2 "$ENV{LIB}") + elseif(APPLE) + string(REPLACE ":" ";" _lib_env2 "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env2 "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${_lib_env2}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + # non static case + set(${_prefix}_LIBRARIES_COPY "${${_prefix}_LIBRARIES}") + set(${_prefix}_LIBRARIES "") + foreach(_library ${${_prefix}_LIBRARIES_COPY}) + if(EXISTS "${_library}") + list(APPEND ${_prefix}_LIBRARIES ${_library}) + else() + get_filename_component(_ext "${_library}" EXT) + set(_lib_extensions ".so" ".a" ".dyld" ".dll") + list(FIND _lib_extensions "${_ext}" _index) + if (${_index} GREATER -1) + get_filename_component(_library "${_library}" NAME_WE) + endif() + find_library(_library_path NAMES ${_library} + HINTS ${${_prefix}_LIBDIR} ${${_prefix}_LIBRARY_DIRS} ${_lib_env}) + if (_library_path) + list(APPEND ${_prefix}_LIBRARIES ${_library_path}) + else() + message(FATAL_ERROR "Dependency of ${_prefix} '${_library}' NOT FOUND") + endif() + unset(_library_path CACHE) + endif() + endforeach() + set (${_prefix}_LIBRARIES "${${_prefix}_LIBRARIES}" CACHE INTERNAL "" FORCE) + ## static case + #set(${_prefix}_STATIC_LIBRARIES_COPY "${${_prefix}_STATIC_LIBRARIES}") + #set(${_prefix}_STATIC_LIBRARIES "") + #foreach(_library ${${_prefix}_STATIC_LIBRARIES_COPY}) + # if(EXISTS "${_library}") + # list(APPEND ${_prefix}_STATIC_LIBRARIES ${_library}) + # else() + # get_filename_component(_ext "${_library}" EXT) + # set(_lib_extensions ".so" ".a" ".dyld" ".dll") + # list(FIND _lib_extensions "${_ext}" _index) + # if (${_index} GREATER -1) + # get_filename_component(_library "${_library}" NAME_WE) + # endif() + # # try static first + # set (default_find_library_suffixes ${CMAKE_FIND_LIBRARY_SUFFIXES}) + # set (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX}) + # find_library(_library_path NAMES ${_library} + # HINTS ${${_prefix}_STATIC_LIBDIR} ${${_prefix}_STATIC_LIBRARY_DIRS} ${_lib_env}) + # set (CMAKE_FIND_LIBRARY_SUFFIXES ${default_find_library_suffixes}) + # # if not found try dynamic + # if (NOT _library_path) + # find_library(_library_path NAMES ${_library} + # HINTS ${${_prefix}_STATIC_LIBDIR} ${${_prefix}_STATIC_LIBRARY_DIRS} ${_lib_env}) + # endif() + # if (_library_path) + # list(APPEND ${_prefix}_STATIC_LIBRARIES ${_library_path}) + # else() + # message(FATAL_ERROR "Dependency of ${_prefix} '${_library}' NOT FOUND") + # endif() + # unset(_library_path CACHE) + # endif() + #endforeach() + #set (${_prefix}_STATIC_LIBRARIES "${${_prefix}_STATIC_LIBRARIES}" CACHE INTERNAL "" FORCE) +endmacro() + +## +## @end file FindPkgconfigLibrariesAbsolutePath.cmake +## diff --git a/cmake/morse/LICENCE.txt b/cmake/morse/LICENCE.txt new file mode 100644 index 0000000000000000000000000000000000000000..b95821f36afa3579a5f1de4fe840aec43b7a4b96 --- /dev/null +++ b/cmake/morse/LICENCE.txt @@ -0,0 +1,42 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# @copyright (c) 2016 KAUST. All rights reserved. +# +### +# +# This software is a computer program whose purpose is to process +# Matrices Over Runtime Systems @ Exascale (MORSE). More information +# can be found on the following website: http://www.inria.fr/en/teams/morse. +# +# This software is governed by the CeCILL-C license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL-C +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL-C license and that you accept its terms. +# +### diff --git a/cmake/morse/LibrariesAbsolutePath.cmake b/cmake/morse/LibrariesAbsolutePath.cmake new file mode 100644 index 0000000000000000000000000000000000000000..7aaab504d7348090e36c502755020b0b1439f123 --- /dev/null +++ b/cmake/morse/LibrariesAbsolutePath.cmake @@ -0,0 +1,70 @@ +### +# +# @copyright (c) 2018 Inria. All rights reserved. +# +### +# +# @file LibrariesAbsolutePath.cmake +# +# @project MORSE +# MORSE is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Florent Pruvost +# @date 13-04-2018 +# +### + +# Transform relative path into absolute path for libraries +# lib_list (input/output): the name of the CMake variable containing libraries, e.g. BLAS_LIBRARIES +# hints_paths (input): additional paths to add when looking for libraries +macro(LIBRARIES_ABSOLUTE_PATH lib_list hints_paths) + # collect environment paths to dig + list(APPEND _lib_env "$ENV{LIBRARY_PATH}") + if(WIN32) + string(REPLACE ":" ";" _lib_env2 "$ENV{LIB}") + elseif(APPLE) + string(REPLACE ":" ";" _lib_env2 "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env2 "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${_lib_env2}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + # copy the lib list + set (${lib_list}_COPY "${${lib_list}}") + # reset the lib list to populate + set(${lib_list} "") + foreach(_library ${${lib_list}_COPY}) + if(EXISTS "${_library}") + # if already an absolute path, nothing special to do + list(APPEND ${lib_list} ${_library}) + else() + # replace pattern -lfoo -> foo + string(REGEX REPLACE "^-l" "" _library "${_library}") + # remove extensions if exist + get_filename_component(_ext "${_library}" EXT) + set(_lib_extensions ".so" ".a" ".dyld" ".dll") + list(FIND _lib_extensions "${_ext}" _index) + if (${_index} GREATER -1) + get_filename_component(_library "${_library}" NAME_WE) + endif() + # try to find the lib + find_library(_library_path NAMES ${_library} HINTS ${hints_paths} ${_lib_env}) + if (_library_path) + list(APPEND ${lib_list} ${_library_path}) + else() + message(FATAL_ERROR "Dependency of ${lib_list} '${_library}' NOT FOUND") + endif() + unset(_library_path CACHE) + endif() + endforeach() +endmacro() + +## +## @end file LibrariesAbsolutePath.cmake +## diff --git a/cmake/morse/MorseInit.cmake b/cmake/morse/MorseInit.cmake new file mode 100644 index 0000000000000000000000000000000000000000..fc51170401cc17045854de4ee51f477eff1b66c1 --- /dev/null +++ b/cmake/morse/MorseInit.cmake @@ -0,0 +1,67 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2018 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# @file MorseInit.cmake +# +# @project MORSE +# MORSE is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Cedric Castagnede +# @author Emmanuel Agullo +# @author Mathieu Faverge +# @author Florent Pruvost +# @date 13-07-2012 +# +### + +# Path to Morse modules +get_filename_component(MORSE_CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_FILE} DIRECTORY CACHE) + +# Global Morse options +option(MORSE_ENABLE_WARNING "Enable warning messages" OFF) +option(MORSE_ENABLE_COVERAGE "Enable flags for coverage test" OFF) +option(MORSE_ENABLE_COLOR_MESSAGE "Enable colors in messages" OFF) +#option(MORSE_VERBOSE_FIND_PACKAGE "Add additional messages concerning packages not found" OFF) +#message(STATUS "MORSE_VERBOSE_FIND_PACKAGE is set to OFF, turn it ON to get" +# " information about packages not found") + + +# This include is required to check symbols of libs in the main CMakeLists.txt +include(CheckFunctionExists) + +# This include is required to check defines in headers +include(CheckIncludeFiles) + +if (MORSE_ENABLE_COLOR_MESSAGE) + # colorize messages + include(ColorizeMessage) +endif() + +# Define some auxilary flags +include(AuxilaryFlags) + +# Define some variables to et info about ressources +include(Ressources) + +# Add the path where we handle our FindFOO.cmake to seek for liraries +list(APPEND CMAKE_MODULE_PATH ${MORSE_CMAKE_MODULE_PATH}/find) + +# To load some macros used in Finds (could be useful for other projects) +include(FindInit) + +## +## @end file MorseInit.cmake +## diff --git a/cmake/morse/PrintFindStatus.cmake b/cmake/morse/PrintFindStatus.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1fdd403b7de11a8b946b178c5aada2da5e6fe33e --- /dev/null +++ b/cmake/morse/PrintFindStatus.cmake @@ -0,0 +1,207 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Some macros to print status when search for headers and libs +# Main parameters of macros +# _libname: name of the lib you seek, foo for example +# _header_to_find: name of the header you seek, foo.h for example +# _lib_to_find: name of the library you seek, libfoo for example +# _pc_to_find: name of the pkg-config file zyou seek, foo.pc for example + + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. + +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + + +# Set some colors +#if(NOT WIN32) +# string(ASCII 27 Esc) +# set(ColourReset "${Esc}[m") +# set(ColourBold "${Esc}[1m") +# set(Red "${Esc}[31m") +# set(Green "${Esc}[32m") +# set(Yellow "${Esc}[33m") +# set(Blue "${Esc}[34m") +# set(Magenta "${Esc}[35m") +# set(Cyan "${Esc}[36m") +# set(White "${Esc}[37m") +# set(BoldRed "${Esc}[1;31m") +# set(BoldGreen "${Esc}[1;32m") +# set(BoldYellow "${Esc}[1;33m") +# set(BoldBlue "${Esc}[1;34m") +# set(BoldMagenta "${Esc}[1;35m") +# set(BoldCyan "${Esc}[1;36m") +# set(BoldWhite "${Esc}[1;37m") +#endif() + + +# This macro informs why the _header_to_find file has not been found +macro(Print_Find_Header_Status _libname _header_to_find) + + # save _libname upper and lower case + string(TOUPPER ${_libname} LIBNAME) + string(TOLOWER ${_libname} libname) + + # print status + #message(" ") + if(${LIBNAME}_INCDIR) + message("${Blue}${LIBNAME}_INCDIR is defined but ${_header_to_find}" + "has not been found in ${${LIBNAME}_INCDIR}${ColourReset}") + else() + if(${LIBNAME}_DIR) + message("${Blue}${LIBNAME}_DIR is defined but" + "${_header_to_find} has not been found in" + "${${LIBNAME}_DIR}/include${ColourReset}") + else() + message("${Blue}${_header_to_find} not found." + "Nor ${LIBNAME}_DIR neither ${LIBNAME}_INCDIR" + "are defined so that we looked for ${_header_to_find} in" + "system paths (INCLUDE, CPATH, C_INCLUDE_PATH," + "INCLUDE_PATH, CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES)${ColourReset}") + if(_inc_env) + message("${Blue}${_header_to_find} has not been found in" + "${_inc_env}${ColourReset}") + endif() + endif() + endif() + message("${BoldBlue}Please indicate where to find ${_header_to_find}. You have three options:\n" + "- Option 1: Provide the root directory of the library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n" + "- Option 2: Provide the directory where to find the headers with cmake option: -D${LIBNAME}_INCDIR=your/path/to/${libname}/include/\n" + "- Option 3: Update your environment variable (INCLUDE or CPATH)\n" + "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}") + #message(" ") + +endmacro() + +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Status _libname _lib_to_find) + + # save _libname upper/lower case + string(TOUPPER ${_libname} LIBNAME) + string(TOLOWER ${_libname} libname) + + # print status + #message(" ") + if(${LIBNAME}_LIBDIR) + message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}" + "has not been found in ${${LIBNAME}_LIBDIR}${ColourReset}") + else() + if(${LIBNAME}_DIR) + message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}" + "has not been found in ${${LIBNAME}_DIR}/lib(or /lib32 or" + "/lib64)${ColourReset}") + else() + message("${Yellow}${_lib_to_find} not found." + "Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR" + "are defined so that we looked for ${_lib_to_find} in" + "system paths (Linux: LD_LIBRARY_PATH, Windows: LIB," + "Mac: DYLD_LIBRARY_PATH," + "CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}") + if(_lib_env) + message("${Yellow}${_lib_to_find} has not been found in" + "${_lib_env}${ColourReset}") + endif() + endif() + endif() + message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n" + "- Option 1: Provide the root directory of the library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n" + "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}") + +endmacro() + +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_Status _libname _lib_to_find) + + # save _libname upper/lower case + string(TOUPPER ${_libname} LIBNAME) + string(TOLOWER ${_libname} libname) + + # print status + #message(" ") + if(${LIBNAME}_LIBDIR) + message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + if(${LIBNAME}_DIR) + message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + message("${Yellow}${_lib_to_find} not found." + "Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR" + "are defined so that we look for ${_lib_to_find} in" + "system paths (Linux: LD_LIBRARY_PATH, Windows: LIB," + "Mac: DYLD_LIBRARY_PATH," + "CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}") + if(_lib_env) + message("${Yellow}${_lib_to_find} has not been found in" + "${_lib_env}${ColourReset}") + endif() + endif() + endif() + message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n" + "- Option 1: Provide the root directory of the library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n" + "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}") + +endmacro() + +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_CheckFunc_Status _name) + + # save _libname upper/lower case + string(TOUPPER ${_name} FUNCNAME) + string(TOLOWER ${_name} funcname) + + # print status + #message(" ") + message("${Red}Libs have been found but check of symbol ${_name} failed " + "with following libraries ${ARGN}${ColourReset}") + message("${BoldRed}Please open your error file CMakeFiles/CMakeError.log" + "to figure out why it fails${ColourReset}") + #message(" ") + +endmacro() + +# This macro informs that _pc_to_find file has not been found in the list +# path you give as last argument (read in ${ARGN}) +# ex: Print_Find_Pkgconfig_Status(foo foo.pc ${PATHLIST} +macro(Print_Find_Pkgconfig_Status _libname _pc_to_find) + + # save _libname lower case + string(TOLOWER ${_libname} libname) + + # print status + #message(" ") + message("${Magenta}${_pc_to_find} has not been found in" + "${ARGN}${ColourReset}") + message("${BoldMagenta}If you really want to use the pkg-config file of" + "${libname}, please update your PKG_CONFIG_PATH with the path" + "where ${_pc_to_find} states${ColourReset}") + #message(" ") + +endmacro() diff --git a/bfps/cpp/Lagrange_polys.cpp b/cpp/Lagrange_polys.cpp similarity index 100% rename from bfps/cpp/Lagrange_polys.cpp rename to cpp/Lagrange_polys.cpp diff --git a/bfps/cpp/Lagrange_polys.hpp b/cpp/Lagrange_polys.hpp similarity index 100% rename from bfps/cpp/Lagrange_polys.hpp rename to cpp/Lagrange_polys.hpp diff --git a/bfps/cpp/base.hpp b/cpp/base.hpp similarity index 100% rename from bfps/cpp/base.hpp rename to cpp/base.hpp diff --git a/bfps/cpp/bfps_timer.hpp b/cpp/bfps_timer.hpp similarity index 100% rename from bfps/cpp/bfps_timer.hpp rename to cpp/bfps_timer.hpp diff --git a/cpp/fftw_interface.hpp b/cpp/fftw_interface.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0a840dd5ba3d864b36271515faa7cb81f3042c01 --- /dev/null +++ b/cpp/fftw_interface.hpp @@ -0,0 +1,779 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + +#ifndef FFTW_INTERFACE_HPP +#define FFTW_INTERFACE_HPP + +#include <fftw3-mpi.h> +#include <map> +#include <string> + +#ifdef USE_FFTWESTIMATE +#define DEFAULT_FFTW_FLAG FFTW_ESTIMATE +#warning You are using FFTW estimate +#else +#define DEFAULT_FFTW_FLAG FFTW_PATIENT +#endif + +// To have multiple calls to c2r/r2c +// you must define SPLIT_FFTW_MANY +// by calling setup.py --split-fftw-many +#ifdef SPLIT_FFTW_MANY +#include <vector> +#include <memory> +#include <algorithm> +#include <cassert> +#include <cstring> +#include <type_traits> + +// To mix unique ptr with allocation from fftw +struct fftw_free_deleter{ + template <typename T> + void operator()(T *p) const { + fftwf_free(const_cast<typename std::remove_const<T>::type*>(p)); + } +}; + +#endif + +template <class realtype> +class fftw_interface; + +template <> +class fftw_interface<float> +{ +public: + using real = float; + using complex = fftwf_complex; + using plan = fftwf_plan; + using iodim = fftwf_iodim; +#ifdef SPLIT_FFTW_MANY + struct many_plan_container{ + int rnk; + std::vector<ptrdiff_t> n; + int howmany; + ptrdiff_t iblock; + ptrdiff_t oblock; + std::unique_ptr<real[], fftw_free_deleter> buffer; + plan plan_to_use; + + ptrdiff_t local_n0, local_0_start; + ptrdiff_t local_n1, local_1_start; + + bool is_r2c; + void* in; + void* out; + + ptrdiff_t nb_sections_real; + ptrdiff_t size_real_section; + ptrdiff_t nb_sections_complex; + ptrdiff_t size_complex_section; + + ptrdiff_t sizeBuffer; + }; + + using many_plan = many_plan_container; +#else + using many_plan = fftwf_plan; +#endif + + static complex* alloc_complex(const size_t in_size){ + return fftwf_alloc_complex(in_size); + } + + static real* alloc_real(const size_t in_size){ + return fftwf_alloc_real(in_size); + } + + static void free(void* ptr){ + fftwf_free(ptr); + } + + static void execute(plan in_plan){ + fftwf_execute(in_plan); + } + + static void destroy_plan(plan in_plan){ + fftwf_destroy_plan(in_plan); + } + + template <class ... Params> + static ptrdiff_t mpi_local_size_many(Params ... params){ + return fftwf_mpi_local_size_many(params...); + } + + template <class ... Params> + static plan mpi_plan_transpose(Params ... params){ + return fftwf_mpi_plan_transpose(params...); + } + + template <class ... Params> + static plan mpi_plan_many_transpose(Params ... params){ + return fftwf_mpi_plan_many_transpose(params...); + } + + template <class ... Params> + static plan plan_guru_r2r(Params ... params){ + return fftwf_plan_guru_r2r(params...); + } + + template <class ... Params> + static plan plan_guru_dft(Params ... params){ + return fftwf_plan_guru_dft(params...); + } + + template <class ... Params> + static ptrdiff_t mpi_local_size_transposed(Params ... params){ + return fftwf_mpi_local_size_transposed(params...); + } + +#ifdef SPLIT_FFTW_MANY + static ptrdiff_t mpi_local_size_many_transposed(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start){ + assert(block0 == FFTW_MPI_DEFAULT_BLOCK); + assert(block1 == FFTW_MPI_DEFAULT_BLOCK); + return howmany*mpi_local_size_transposed(rnk, n, comm, + local_n0, local_0_start, + local_n1, local_1_start); + } + + static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + complex *in, real *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan c2r_plan; + c2r_plan.rnk = rnk; + c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk); + c2r_plan.howmany = howmany; + c2r_plan.iblock = iblock; + c2r_plan.oblock = oblock; + c2r_plan.is_r2c = false; + c2r_plan.in = in; + c2r_plan.out = out; + c2r_plan.sizeBuffer = 0; + + // If 1 then use default without copy + if(howmany == 1){ + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)in, + out, + comm, flags); + return c2r_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_transposed( + rnk, n, comm, + &c2r_plan.local_n0, &c2r_plan.local_0_start, + &c2r_plan.local_n1, &c2r_plan.local_1_start); + + ptrdiff_t sizeBuffer = c2r_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + c2r_plan.buffer.reset(alloc_real(sizeBuffer)); + memset(c2r_plan.buffer.get(), 0, sizeof(real)*sizeBuffer); + c2r_plan.sizeBuffer = sizeBuffer; + // Init the plan + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)c2r_plan.buffer.get(), + c2r_plan.buffer.get(), + comm, flags); + + c2r_plan.nb_sections_real = c2r_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + c2r_plan.nb_sections_real *= n[idxrnk]; + c2r_plan.nb_sections_complex *= n[idxrnk]; + } + c2r_plan.size_real_section = (n[rnk-1] + 2); + + c2r_plan.nb_sections_complex = c2r_plan.local_n1; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + if(idxrnk == 1){ + c2r_plan.nb_sections_complex *= n[0]; + } + else{ + c2r_plan.nb_sections_complex *= n[idxrnk]; + } + } + c2r_plan.size_complex_section = (n[rnk-1]/2 + 1); + + return c2r_plan; + } + + static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + real *in, complex *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan r2c_plan; + r2c_plan.rnk = rnk; + r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk); + r2c_plan.howmany = howmany; + r2c_plan.iblock = iblock; + r2c_plan.oblock = oblock; + r2c_plan.is_r2c = true; + r2c_plan.in = in; + r2c_plan.out = out; + r2c_plan.sizeBuffer = 0; + + // If 1 then use default without copy + if(howmany == 1){ + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + in, + (complex*)out, + comm, flags); + return r2c_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_transposed( + rnk, n, comm, + &r2c_plan.local_n0, &r2c_plan.local_0_start, + &r2c_plan.local_n1, &r2c_plan.local_1_start); + + ptrdiff_t sizeBuffer = r2c_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + r2c_plan.buffer.reset(alloc_real(sizeBuffer)); + memset(r2c_plan.buffer.get(), 0, sizeof(real)*sizeBuffer); + r2c_plan.sizeBuffer = sizeBuffer; + // Init the plan + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + r2c_plan.buffer.get(), + (complex*)r2c_plan.buffer.get(), + comm, flags); + + r2c_plan.nb_sections_real = r2c_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + r2c_plan.nb_sections_real *= n[idxrnk]; + r2c_plan.nb_sections_complex *= n[idxrnk]; + } + r2c_plan.size_real_section = (n[rnk-1] + 2); + + r2c_plan.nb_sections_complex = r2c_plan.local_n1; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + if(idxrnk == 1){ + r2c_plan.nb_sections_complex *= n[0]; + } + else{ + r2c_plan.nb_sections_complex *= n[idxrnk]; + } + } + r2c_plan.size_complex_section = (n[rnk-1]/2 + 1); + + return r2c_plan; + } + + static void execute(many_plan& in_plan){ + if(in_plan.howmany == 1){ + execute(in_plan.plan_to_use); + return; + } + + std::unique_ptr<real[]> in_copy; + if(in_plan.is_r2c){ + in_copy.reset(new real[in_plan.nb_sections_real * in_plan.size_real_section * in_plan.howmany]); + + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){ + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){ + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + in_copy[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany] = + ((const real*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany]; + } + } + } + } + else{ + in_copy.reset((real*)new complex[in_plan.nb_sections_complex * in_plan.size_complex_section * in_plan.howmany]); + + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){ + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){ + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0] = + ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0]; + ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1] = + ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1]; + } + } + } + } + + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + // Copy to buffer + if(in_plan.is_r2c){ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){ + real* dest = in_plan.buffer.get() + idx_section*in_plan.size_real_section; + const real* src = in_copy.get()+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany; + + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){ + dest[idx_copy] = src[idx_copy*in_plan.howmany]; + } + } + } + else{ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){ + complex* dest = ((complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section; + const complex* src = ((const complex*)in_copy.get()) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany; + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){ + dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0]; + dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1]; + } + } + } + + execute(in_plan.plan_to_use); + // Copy result from buffer + if(in_plan.is_r2c){ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){ + complex* dest = ((complex*)in_plan.out) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany; + const complex* src = ((const complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section; + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){ + dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0]; + dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1]; + } + } + } + else{ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){ + real* dest = ((real*)in_plan.out)+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany; + const real* src = in_plan.buffer.get() + idx_section*in_plan.size_real_section; + + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){ + dest[idx_copy*in_plan.howmany] = src[idx_copy]; + } + } + } + } + } + + static void destroy_plan(many_plan& in_plan){ + destroy_plan(in_plan.plan_to_use); + } +#else + + template <class ... Params> + static ptrdiff_t mpi_local_size_many_transposed(Params ... params){ + return fftwf_mpi_local_size_many_transposed(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_c2r(Params ... params){ + return fftwf_mpi_plan_many_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_r2c(Params ... params){ + return fftwf_mpi_plan_many_dft_r2c(params...); + } +#endif + + template <class ... Params> + static plan mpi_plan_dft_c2r(Params ... params){ + return fftwf_mpi_plan_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_r2c(Params ... params){ + return fftwf_mpi_plan_dft_r2c(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_c2r_3d(Params ... params){ + return fftwf_mpi_plan_dft_c2r_3d(params...); + } +}; + +template <> +class fftw_interface<double> +{ +public: + using real = double; + using complex = fftw_complex; + using plan = fftw_plan; + using iodim = fftw_iodim; +#ifdef SPLIT_FFTW_MANY + struct many_plan_container{ + int rnk; + std::vector<ptrdiff_t> n; + int howmany; + ptrdiff_t iblock; + ptrdiff_t oblock; + std::unique_ptr<real[], fftw_free_deleter> buffer; + plan plan_to_use; + + ptrdiff_t local_n0, local_0_start; + ptrdiff_t local_n1, local_1_start; + + bool is_r2c; + void* in; + void* out; + + ptrdiff_t nb_sections_real; + ptrdiff_t size_real_section; + ptrdiff_t nb_sections_complex; + ptrdiff_t size_complex_section; + + ptrdiff_t sizeBuffer; + }; + + using many_plan = many_plan_container; +#else + using many_plan = fftw_plan; +#endif + + static complex* alloc_complex(const size_t in_size){ + return fftw_alloc_complex(in_size); + } + + static real* alloc_real(const size_t in_size){ + return fftw_alloc_real(in_size); + } + + static void free(void* ptr){ + fftw_free(ptr); + } + + static void execute(plan in_plan){ + fftw_execute(in_plan); + } + + static void destroy_plan(plan in_plan){ + fftw_destroy_plan(in_plan); + } + + template <class ... Params> + static ptrdiff_t mpi_local_size_many(Params ... params){ + return fftw_mpi_local_size_many(params...); + } + + template <class ... Params> + static plan mpi_plan_transpose(Params ... params){ + return fftw_mpi_plan_transpose(params...); + } + + template <class ... Params> + static plan mpi_plan_many_transpose(Params ... params){ + return fftw_mpi_plan_many_transpose(params...); + } + + template <class ... Params> + static plan plan_guru_r2r(Params ... params){ + return fftw_plan_guru_r2r(params...); + } + + template <class ... Params> + static plan plan_guru_dft(Params ... params){ + return fftw_plan_guru_dft(params...); + } + + template <class ... Params> + static ptrdiff_t mpi_local_size_transposed(Params ... params){ + return fftw_mpi_local_size_transposed(params...); + } + +#ifdef SPLIT_FFTW_MANY + static ptrdiff_t mpi_local_size_many_transposed(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start){ + assert(block0 == FFTW_MPI_DEFAULT_BLOCK); + assert(block1 == FFTW_MPI_DEFAULT_BLOCK); + return howmany*mpi_local_size_transposed(rnk, n, comm, + local_n0, local_0_start, + local_n1, local_1_start); + } + + static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + complex *in, real *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan c2r_plan; + c2r_plan.rnk = rnk; + c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk); + c2r_plan.howmany = howmany; + c2r_plan.iblock = iblock; + c2r_plan.oblock = oblock; + c2r_plan.is_r2c = false; + c2r_plan.in = in; + c2r_plan.out = out; + c2r_plan.sizeBuffer = 0; + + // If 1 then use default without copy + if(howmany == 1){ + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)in, + out, + comm, flags); + return c2r_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_transposed( + rnk, n, comm, + &c2r_plan.local_n0, &c2r_plan.local_0_start, + &c2r_plan.local_n1, &c2r_plan.local_1_start); + + ptrdiff_t sizeBuffer = c2r_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + c2r_plan.buffer.reset(alloc_real(sizeBuffer)); + memset(c2r_plan.buffer.get(), 0, sizeof(real)*sizeBuffer); + c2r_plan.sizeBuffer = sizeBuffer; + // Init the plan + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)c2r_plan.buffer.get(), + c2r_plan.buffer.get(), + comm, flags); + + c2r_plan.nb_sections_real = c2r_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + c2r_plan.nb_sections_real *= n[idxrnk]; + c2r_plan.nb_sections_complex *= n[idxrnk]; + } + c2r_plan.size_real_section = (n[rnk-1] + 2); + + c2r_plan.nb_sections_complex = c2r_plan.local_n1; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + if(idxrnk == 1){ + c2r_plan.nb_sections_complex *= n[0]; + } + else{ + c2r_plan.nb_sections_complex *= n[idxrnk]; + } + } + c2r_plan.size_complex_section = (n[rnk-1]/2 + 1); + + return c2r_plan; + } + + static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + real *in, complex *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan r2c_plan; + r2c_plan.rnk = rnk; + r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk); + r2c_plan.howmany = howmany; + r2c_plan.iblock = iblock; + r2c_plan.oblock = oblock; + r2c_plan.is_r2c = true; + r2c_plan.in = in; + r2c_plan.out = out; + r2c_plan.sizeBuffer = 0; + + // If 1 then use default without copy + if(howmany == 1){ + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + in, + (complex*)out, + comm, flags); + return r2c_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_transposed( + rnk, n, comm, + &r2c_plan.local_n0, &r2c_plan.local_0_start, + &r2c_plan.local_n1, &r2c_plan.local_1_start); + + ptrdiff_t sizeBuffer = r2c_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + r2c_plan.buffer.reset(alloc_real(sizeBuffer)); + memset(r2c_plan.buffer.get(), 0, sizeof(real)*sizeBuffer); + r2c_plan.sizeBuffer = sizeBuffer; + // Init the plan + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + r2c_plan.buffer.get(), + (complex*)r2c_plan.buffer.get(), + comm, flags); + + r2c_plan.nb_sections_real = r2c_plan.local_n0; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + r2c_plan.nb_sections_real *= n[idxrnk]; + r2c_plan.nb_sections_complex *= n[idxrnk]; + } + r2c_plan.size_real_section = (n[rnk-1] + 2); + + r2c_plan.nb_sections_complex = r2c_plan.local_n1; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + if(idxrnk == 1){ + r2c_plan.nb_sections_complex *= n[0]; + } + else{ + r2c_plan.nb_sections_complex *= n[idxrnk]; + } + } + r2c_plan.size_complex_section = (n[rnk-1]/2 + 1); + + return r2c_plan; + } + + static void execute(many_plan& in_plan){ + if(in_plan.howmany == 1){ + execute(in_plan.plan_to_use); + return; + } + + std::unique_ptr<real[]> in_copy; + if(in_plan.is_r2c){ + in_copy.reset(new real[in_plan.nb_sections_real * in_plan.size_real_section * in_plan.howmany]); + + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){ + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){ + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + in_copy[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany] = + ((const real*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany]; + } + } + } + } + else{ + in_copy.reset((real*)new complex[in_plan.nb_sections_complex * in_plan.size_complex_section * in_plan.howmany]); + + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){ + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){ + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0] = + ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0]; + ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1] = + ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1]; + } + } + } + } + + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + // Copy to buffer + if(in_plan.is_r2c){ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){ + real* dest = in_plan.buffer.get() + idx_section*in_plan.size_real_section; + const real* src = in_copy.get()+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany; + + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){ + dest[idx_copy] = src[idx_copy*in_plan.howmany]; + } + } + } + else{ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){ + complex* dest = ((complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section; + const complex* src = ((const complex*)in_copy.get()) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany; + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){ + dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0]; + dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1]; + } + } + } + + execute(in_plan.plan_to_use); + // Copy result from buffer + if(in_plan.is_r2c){ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){ + complex* dest = ((complex*)in_plan.out) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany; + const complex* src = ((const complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section; + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){ + dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0]; + dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1]; + } + } + } + else{ + for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){ + real* dest = ((real*)in_plan.out)+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany; + const real* src = in_plan.buffer.get() + idx_section*in_plan.size_real_section; + + for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){ + dest[idx_copy*in_plan.howmany] = src[idx_copy]; + } + } + } + } + } + + static void destroy_plan(many_plan& in_plan){ + destroy_plan(in_plan.plan_to_use); + } +#else + template <class ... Params> + static ptrdiff_t mpi_local_size_many_transposed(Params ... params){ + return fftw_mpi_local_size_many_transposed(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_c2r(Params ... params){ + return fftw_mpi_plan_many_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_r2c(Params ... params){ + return fftw_mpi_plan_many_dft_r2c(params...); + } +#endif + + template <class ... Params> + static plan mpi_plan_dft_c2r(Params ... params){ + return fftw_mpi_plan_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_r2c(Params ... params){ + return fftw_mpi_plan_dft_r2c(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_c2r_3d(Params ... params){ + return fftw_mpi_plan_dft_c2r_3d(params...); + } +}; + + + +#endif // FFTW_INTERFACE_HPP + diff --git a/bfps/cpp/fftw_tools.hpp b/cpp/fftw_tools.cpp similarity index 58% rename from bfps/cpp/fftw_tools.hpp rename to cpp/fftw_tools.cpp index d0f3dbf30df3ee95f3d7934f0dd7fca633858b44..55794b41ebf8ebfa03977d5a79704aa38b39af52 100644 --- a/bfps/cpp/fftw_tools.hpp +++ b/cpp/fftw_tools.cpp @@ -22,49 +22,19 @@ * * **********************************************************************/ - - -#include <mpi.h> -#include <fftw3-mpi.h> -#include "field_descriptor.hpp" - -#ifndef FFTW_TOOLS - -#define FFTW_TOOLS - -extern int myrank, nprocs; - -/* given two arrays of the same dimension, we do a simple resize in - * Fourier space: either chop off high modes, or pad with zeros. - * the arrays are assumed to use 3D mpi fftw layout. - * */ -template <class rnumber> -int copy_complex_array( - field_descriptor<rnumber> *fi, - rnumber (*ai)[2], - field_descriptor<rnumber> *fo, - rnumber (*ao)[2], - int howmany=1); - -template <class rnumber> -int clip_zero_padding( - field_descriptor<rnumber> *f, - rnumber *a, - int howmany=1); - -/* function to get pair of descriptors for real and Fourier space - * arrays used with fftw. - * the n0, n1, n2 correspond to the real space data WITHOUT the zero - * padding that FFTW needs. - * IMPORTANT: the real space array must be allocated with - * 2*fc->local_size, and then the zeros cleaned up before trying - * to write data. - * */ -template <class rnumber> -int get_descriptors_3D( - int n0, int n1, int n2, - field_descriptor<rnumber> **fr, - field_descriptor<rnumber> **fc); - -#endif//FFTW_TOOLS +#include <stdlib.h> +#include <algorithm> +#include <iostream> +#include "base.hpp" +#include "fftw_tools.hpp" +#include "fftw_interface.hpp" + +#define NDEBUG + +std::map<std::string, unsigned> fftw_planner_string_to_flag = { + {"FFTW_ESTIMATE", FFTW_ESTIMATE}, + {"FFTW_MEASURE", FFTW_MEASURE}, + {"FFTW_PATIENT", FFTW_PATIENT}, + {"parameter does not exist", DEFAULT_FFTW_FLAG}, +}; diff --git a/bfps/cpp/tracers.hpp b/cpp/fftw_tools.hpp similarity index 66% rename from bfps/cpp/tracers.hpp rename to cpp/fftw_tools.hpp index 1a063e026578dd71b9a223ee46b55d2c86d4399f..b41cd2a453c2c0aa34f56febb17f2a650a2a9685 100644 --- a/bfps/cpp/tracers.hpp +++ b/cpp/fftw_tools.hpp @@ -24,40 +24,17 @@ -#include "slab_field_particles.hpp" +#include <mpi.h> +#include <fftw3-mpi.h> +#include <map> -#ifndef TRACERS +#ifndef FFTW_TOOLS -#define TRACERS +#define FFTW_TOOLS extern int myrank, nprocs; -template <class rnumber> -class tracers final:public slab_field_particles<rnumber> -{ - public: - rnumber *source_data; - rnumber *data; - - /* methods */ - tracers( - const char *NAME, - fluid_solver_base<rnumber> *FSOLVER, - const int NPARTICLES, - base_polynomial_values BETA_POLYS, - const int NEIGHBOURS, - const int TRAJ_SKIP, - const int INTEGRATION_STEPS, - rnumber *SOURCE_DATA); - ~tracers(); - - void update_field(bool clip_on = true); - virtual void get_rhs(double *x, double *rhs); - virtual void jump_estimate(double *jump_length); - - void sample_vec_field(rnumber *vec_field, double *vec_values); -}; - - -#endif//TRACERS +extern std::map<std::string, unsigned> fftw_planner_string_to_flag; + +#endif//FFTW_TOOLS diff --git a/bfps/cpp/field.cpp b/cpp/field.cpp similarity index 63% rename from bfps/cpp/field.cpp rename to cpp/field.cpp index 197ccb5da26dabf9f35d84bdc627a31f20ee49ad..04eaa008e0c37b37b382335b6069425e1ce5d731 100644 --- a/bfps/cpp/field.cpp +++ b/cpp/field.cpp @@ -23,6 +23,9 @@ **********************************************************************/ + +#define NDEBUG + #include <sys/stat.h> #include <cmath> #include <cstdlib> @@ -73,11 +76,11 @@ field<rnumber, be, fc>::field( nfftw[0] = nz; nfftw[1] = ny; nfftw[2] = nx; - //ptrdiff_t tmp_local_size; + hsize_t tmp_local_size; ptrdiff_t local_n0, local_0_start; ptrdiff_t local_n1, local_1_start; - //tmp_local_size = fftw_mpi_local_size_many_transposed( - fftw_mpi_local_size_many_transposed( + variable_used_only_in_assert(tmp_local_size); + tmp_local_size = fftw_interface<rnumber>::mpi_local_size_many_transposed( 3, nfftw, ncomp(fc), FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, this->comm, &local_n0, &local_0_start, @@ -88,6 +91,7 @@ field<rnumber, be, fc>::field( starts[0] = local_0_start; starts[1] = 0; starts[2] = 0; this->rlayout = new field_layout<fc>( sizes, subsizes, starts, this->comm); + assert(tmp_local_size == this->rlayout->local_size); this->npoints = this->rlayout->full_size / ncomp(fc); sizes[0] = nz; sizes[1] = ny; sizes[2] = nx+2; subsizes[0] = local_n0; subsizes[1] = ny; subsizes[2] = nx+2; @@ -224,6 +228,7 @@ int field<rnumber, be, fc>::io( H5Tequal(dset_type, H5T_IEEE_F64LE) || H5Tequal(dset_type, H5T_INTEL_F64) || H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + variable_used_only_in_assert(io_for_real); H5Tclose(dset_type); assert(this->real_space_representation == io_for_real); } @@ -304,6 +309,7 @@ int field<rnumber, be, fc>::io( /* check file space */ int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + variable_used_only_in_assert(ndims_fspace); assert(((unsigned int)(ndims_fspace)) == ndim(fc)); if (this->real_space_representation) { @@ -414,6 +420,7 @@ int field<rnumber, be, fc>::io_database( H5Tequal(dset_type, H5T_IEEE_F64LE) || H5Tequal(dset_type, H5T_INTEL_F64) || H5Tequal(dset_type, H5T_NATIVE_DOUBLE)); + variable_used_only_in_assert(io_for_real); H5Tclose(dset_type); assert(this->real_space_representation == io_for_real); } @@ -490,6 +497,7 @@ int field<rnumber, be, fc>::io_database( /* check file space */ int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL); + variable_used_only_in_assert(ndims_fspace); assert(ndims_fspace == int(ndim(fc) + 1)); offset[0] = toffset; if (this->real_space_representation) @@ -576,7 +584,7 @@ int field<rnumber, be, fc>::write_0slice( count[1] = this->rmemlayout->sizes[1]; count[2] = this->rmemlayout->sizes[2]; count[3] = 3; - count[3] = 3; + count[4] = 3; mspace = H5Screate_simple(ndims, count, NULL); // array in file should not have the extra 2 points count[1] = this->rlayout->sizes[1]; @@ -612,6 +620,198 @@ int field<rnumber, be, fc>::write_0slice( return EXIT_SUCCESS; } +template <typename rnumber, + field_backend be, + field_components fc> +int field<rnumber, be, fc>::write_filtered( + const std::string fname, + const std::string field_name, + const int iteration, + int nx, + int ny, + int nz) +{ + /* file dataset has same dimensions as field */ + TIMEZONE("field::write_filtered"); + // only works in Fourier representation + assert(!this->real_space_representation); + assert(hsize_t(nx) <= this->rlayout->sizes[2]); + assert(hsize_t(ny) <= this->rlayout->sizes[1]); + assert(hsize_t(nz) <= this->rlayout->sizes[0]); + // current algorithm only works for more than one process + assert(this->nprocs >= 2); + hid_t file_id, dset_id, plist_id; + dset_id = H5I_BADID; + std::string dset_name = ( + "/" + field_name + + "/complex" + + "/" + std::to_string(iteration)); + + /* open/create file */ + plist_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL); + bool file_exists = false; + struct stat file_buffer; + file_exists = (stat(fname.c_str(), &file_buffer) == 0); + if (file_exists) + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + else + file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id); + assert(file_id >= 0); + H5Pclose(plist_id); + + /* generic space initialization */ + hid_t fspace, mspace; + hsize_t count[ndim(fc)], offset[ndim(fc)], dims[ndim(fc)], fdims[ndim(fc)]; + hsize_t memoffset[ndim(fc)], memshape[ndim(fc)]; + + // set up dimensions + for (unsigned int i=3; i<ndim(fc); i++) + { + count [i] = this->clayout->subsizes[i]; + offset[i] = this->clayout->starts[i]; + dims [i] = this->clayout->sizes[i]; + memshape [i] = count[i]; + memoffset[i] = 0; + } + // these are dimensions of dataset, needed + // to create dataset + //dims[0] = nz; + dims[0] = ny; + dims[1] = nz; + dims[2] = nx/2+1; + + /* open/create data set */ + if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT)) + { + hid_t gid_tmp = H5Gcreate( + file_id, field_name.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(gid_tmp); + } + if (!H5Lexists(file_id, (field_name + "/complex").c_str(), H5P_DEFAULT)) + { + hid_t gid_tmp = H5Gcreate( + file_id, ("/" + field_name + "/complex").c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(gid_tmp); + } + if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT)) + { + dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT); + fspace = H5Dget_space(dset_id); + } + else + { + fspace = H5Screate_simple( + ndim(fc), + dims, + NULL); + /* chunking needs to go in here */ + dset_id = H5Dcreate( + file_id, + dset_name.c_str(), + this->cnumber_H5T, + fspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + } + /* check file space */ + int ndims_fspace = H5Sget_simple_extent_dims(fspace, fdims, NULL); + variable_used_only_in_assert(ndims_fspace); + assert(((unsigned int)(ndims_fspace)) == ndim(fc)); + for (unsigned int i=0; i<ndim(fc); i++) + { + assert(dims[i] == fdims[i]); + } + /* both dset_id and fspace now have sane values */ + + /// set up counts and offsets + /// x is easy, since only positive modes are present + count [2] = nx/2+1; + offset[2] = 0; + memshape [2] = this->clayout->subsizes[2]; + memoffset[2] = 0; + + /// three options for y: + /// this->starts[0] <= ny/2 + /// ny / 2 < this->starts[0] +this->clayout->subsizes[0] < this->sizes[0] - ny/2 + /// this->starts[0] >= this->sizes[0] - ny/2 + /// we don't care about saving the ny/2 mode, because of symmetry + hsize_t y0 = this->clayout->starts[0]; + hsize_t y1 = this->clayout->starts[0] + this->clayout->subsizes[0]; + memshape[0] = this->clayout->subsizes[0]; + if (y1 <= hsize_t(ny/2)) + { + count[0] = this->clayout->subsizes[0]; + offset[0] = y0; + memoffset[0] = 0; + } + else + { + if (y0 < hsize_t(ny)/2) + { + count[0] = ny/2 - y0; + offset[0] = y0; + memoffset[0] = 0; + } + else + { + if (y1 <= hsize_t(this->clayout->sizes[0] - ny/2 + 1)) + { // y0 < y1 therefore y0 <= this->clayout->sizes[0] - ny/2 + count[0] = 0; + offset[0] = ny/2; + memoffset[0] = 0; + } + else + { + if (y0 <= hsize_t(this->clayout->sizes[0] - ny/2)) + { + count[0] = y1 - (this->clayout->sizes[0] - ny/2); + offset[0] = ny - (this->clayout->sizes[0] - y0); + memoffset[0] = this->clayout->subsizes[0] - count[0]; + } + else + { + count[0] = this->clayout->subsizes[0]; + offset[0] = ny - (this->clayout->sizes[0] - y0); + memoffset[0] = 0; + } + } + } + } + DEBUG_MSG("count[0] = %ld, offset[0] = %ld\n", + count[0], offset[0]); + /// for z, we need to take into account that there are + /// both positive and negative modes + for (int cz = 0; cz < 2; cz++) + { + count [1] = nz/2; + offset[1] = cz*nz/2; + memshape [1] = this->clayout->sizes[1]; + memoffset[1] = cz*(this->clayout->sizes[1] - nz/2); + DEBUG_MSG("cz = %d, count[1] + offset[1] = %ld\n", + cz, count[1] + offset[1]); + + //now write data + mspace = H5Screate_simple(ndim(fc), memshape, NULL); + H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL); + H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data); + H5Sclose(mspace); + } + + + /* close file data space */ + H5Sclose(fspace); + /* close data set */ + H5Dclose(dset_id); + /* close file */ + H5Fclose(file_id); + return EXIT_SUCCESS; +} + template <typename rnumber, field_backend be, @@ -869,25 +1069,138 @@ void field<rnumber, be, fc>::compute_rspace_stats( H5Sclose(wspace); H5Sclose(mspace); H5Dclose(dset); - if (H5Lexists( - group, - "0slices", - H5P_DEFAULT)) - { - if (H5Lexists( - group, - (std::string("0slices/") + dset_name).c_str(), - H5P_DEFAULT)) - this->write_0slice( - group, - dset_name, - toffset); - } } delete[] moments; delete[] hist; } + + +template <typename rnumber, + field_backend be, + field_components fc> +void field<rnumber, be, fc>::compute_rspace_zaverage( + const hid_t group, + const std::string dset_name, + const hsize_t toffset) +{ + TIMEZONE("field::compute_rspace_zaverage"); + assert(this->real_space_representation); + const hsize_t slice_size = this->rlayout->local_size / this->rlayout->subsizes[0]; + + // initial arrays MUST be 0, because I'm just adding to them afterwards. + shared_array<double> local_zaverage_threaded( + slice_size, [&](double* local_zaverage){ + std::fill_n(local_zaverage, slice_size, 0); + }); + + // sum along z direction + { + TIMEZONE("field::RLOOP"); + this->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + + double *local_zaverage = local_zaverage_threaded.getMine(); + ptrdiff_t zaverage_index = (yindex*this->rlayout->subsizes[2]+xindex)*ncomp(fc); + + switch(fc) + { + case ONE: + local_zaverage[zaverage_index] += this->rval(rindex); + break; + case THREE: + local_zaverage[zaverage_index+0] += this->rval(rindex, 0); + local_zaverage[zaverage_index+1] += this->rval(rindex, 1); + local_zaverage[zaverage_index+2] += this->rval(rindex, 2); + break; + case THREExTHREE: + local_zaverage[zaverage_index+0 + 0] += this->rval(rindex, 0, 0); + local_zaverage[zaverage_index+0 + 1] += this->rval(rindex, 0, 1); + local_zaverage[zaverage_index+0 + 2] += this->rval(rindex, 0, 2); + local_zaverage[zaverage_index+3 + 0] += this->rval(rindex, 1, 0); + local_zaverage[zaverage_index+3 + 1] += this->rval(rindex, 1, 1); + local_zaverage[zaverage_index+3 + 2] += this->rval(rindex, 1, 2); + local_zaverage[zaverage_index+6 + 0] += this->rval(rindex, 2, 0); + local_zaverage[zaverage_index+6 + 1] += this->rval(rindex, 2, 1); + local_zaverage[zaverage_index+6 + 2] += this->rval(rindex, 2, 2); + break; + } + }); + + TIMEZONE("FIELD_RLOOP::Merge"); + local_zaverage_threaded.mergeParallel(); + } + // sum along MPI processes + double *zaverage = new double[slice_size]; + { + TIMEZONE("MPI_Allreduce"); + MPI_Allreduce( + (void*)local_zaverage_threaded.getMasterData(), + (void*)zaverage, + slice_size, + MPI_DOUBLE, MPI_SUM, this->comm); + } + // divide by total number of slices + for (ptrdiff_t n=0; n < ptrdiff_t(slice_size); n++) + zaverage[n] /= this->rlayout->sizes[0]; + + if (this->myrank == 0) + { + TIMEZONE("root-work"); + hid_t dset, wspace, mspace; + int ndims; + hsize_t count[5], offset[5], dims[5]; + offset[0] = toffset; + offset[1] = 0; + offset[2] = 0; + offset[3] = 0; + offset[4] = 0; + dset = H5Dopen( + group, + ("zaverage/" + dset_name).c_str(), + H5P_DEFAULT); + wspace = H5Dget_space(dset); + ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); + count[0] = 1; + count[1] = this->rlayout->sizes[1]; + count[2] = this->rlayout->sizes[2]; + count[3] = 3; + count[4] = 3; + // select right slice in file + H5Sselect_hyperslab( + wspace, + H5S_SELECT_SET, + offset, + NULL, + count, + NULL); + offset[0] = 0; + // select proper regions of memory + mspace = H5Screate_simple(ndims-1, count+1, NULL); + H5Sselect_hyperslab( + mspace, + H5S_SELECT_SET, + offset+1, + NULL, + count+1, + NULL); + H5Dwrite( + dset, + H5T_NATIVE_DOUBLE, + mspace, + wspace, + H5P_DEFAULT, + zaverage); + H5Dclose(dset); + H5Sclose(mspace); + H5Sclose(wspace); + } + delete[] zaverage; +} + template <typename rnumber, field_backend be, field_components fc> @@ -904,77 +1217,129 @@ void field<rnumber, be, fc>::symmetrize() { TIMEZONE("field::symmetrize"); assert(!this->real_space_representation); - ptrdiff_t ii, cc; - typename fftw_interface<rnumber>::complex *data = this->get_cdata(); + // for debugging, just use FFTW + //this->ift(); + //this->dft(); + //this->normalize(); + //return; + typename fftw_interface<rnumber>::complex *cdata = this->get_cdata(); + // symmetrize kx = 0 plane, line by line, for ky != 0 MPI_Status *mpistatus = new MPI_Status; - if (this->myrank == this->clayout->rank[0][0]) - { - for (cc = 0; cc < ncomp(fc); cc++) - data[cc][1] = 0.0; - for (ii = 1; ii < ptrdiff_t(this->clayout->sizes[1]/2); ii++) - for (cc = 0; cc < ncomp(fc); cc++) { - ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[0] = - (*(data + cc + ncomp(fc)*( ii)*this->clayout->sizes[2]))[0]; - ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[1] = - -(*(data + cc + ncomp(fc)*( ii)*this->clayout->sizes[2]))[1]; - } - } - typename fftw_interface<rnumber>::complex *buffer; - buffer = fftw_interface<rnumber>::alloc_complex(ncomp(fc)*this->clayout->sizes[1]); - ptrdiff_t yy; + typename fftw_interface<rnumber>::complex *buffer = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]]; + //typename fftw_interface<rnumber>::complex *buffer; + //buffer = fftw_interface<rnumber>::alloc_complex(ncomp(fc)*this->clayout->sizes[1]); /*ptrdiff_t tindex;*/ int ranksrc, rankdst; - for (yy = 1; yy < ptrdiff_t(this->clayout->sizes[0]/2); yy++) { - ranksrc = this->clayout->rank[0][yy]; - rankdst = this->clayout->rank[0][this->clayout->sizes[0] - yy]; + for (ptrdiff_t iy = 1; iy < ptrdiff_t(this->clayout->sizes[0]/2); iy++) + { + ranksrc = this->clayout->rank[0][iy]; + rankdst = this->clayout->rank[0][this->clayout->sizes[0] - iy]; if (this->clayout->myrank == ranksrc) - for (ii = 0; ii < ptrdiff_t(this->clayout->sizes[1]); ii++) - for (cc = 0; cc < ncomp(fc); cc++) + { + ptrdiff_t iyy = iy - this->clayout->starts[0]; + for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->sizes[1]); iz++) + { + ptrdiff_t cindex = this->get_cindex(0, iyy, iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) for (int imag_comp=0; imag_comp<2; imag_comp++) - (*(buffer + ncomp(fc)*ii+cc))[imag_comp] = - (*(data + ncomp(fc)*((yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[imag_comp]; + (*(buffer + ncomp(fc)*iz+cc))[imag_comp] = + (*(cdata + ncomp(fc)*cindex + cc))[imag_comp]; + } + } if (ranksrc != rankdst) { if (this->clayout->myrank == ranksrc) MPI_Send((void*)buffer, - ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy, - this->clayout->comm); + ncomp(fc)*this->clayout->sizes[1], + mpi_real_type<rnumber>::complex(), + rankdst, iy, + this->clayout->comm); if (this->clayout->myrank == rankdst) MPI_Recv((void*)buffer, - ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy, - this->clayout->comm, mpistatus); + ncomp(fc)*this->clayout->sizes[1], + mpi_real_type<rnumber>::complex(), + ranksrc, iy, + this->clayout->comm, + mpistatus); } if (this->clayout->myrank == rankdst) { - for (ii = 1; ii < ptrdiff_t(this->clayout->sizes[1]); ii++) - for (cc = 0; cc < ncomp(fc); cc++) + ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0]; + for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]); iz++) + { + ptrdiff_t izz = (this->clayout->sizes[1] - iz); + ptrdiff_t cindex = this->get_cindex(0, iyy, izz); + //DEBUG_MSG("iy = %ld, iz = %ld\n", iy, iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { - (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[0] = - (*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[0]; - (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[1] = - -(*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[1]; + (*(cdata + ncomp(fc)*cindex + cc))[0] = (*(buffer + ncomp(fc)*iz+cc))[0]; + (*(cdata + ncomp(fc)*cindex + cc))[1] = -(*(buffer + ncomp(fc)*iz+cc))[1]; } - for (cc = 0; cc < ncomp(fc); cc++) + } + ptrdiff_t cindex = this->get_cindex(0, iyy, 0); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { - (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[0] = (*(buffer + cc))[0]; - (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[1] = -(*(buffer + cc))[1]; + (*(cdata + cc + ncomp(fc)*cindex))[0] = (*(buffer + cc))[0]; + (*(cdata + cc + ncomp(fc)*cindex))[1] = -(*(buffer + cc))[1]; } } } - fftw_interface<rnumber>::free(buffer); + //fftw_interface<rnumber>::free(buffer); + delete[] buffer; delete mpistatus; - /* put asymmetric data to 0 */ - /*if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2]) + // symmetrize kx = 0, ky = 0 line + if (this->clayout->myrank == this->clayout->rank[0][0]) + { + for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++) + { + ptrdiff_t cindex0 = this->get_cindex(0, 0, iz); + ptrdiff_t cindex1 = this->get_cindex(0, 0, this->clayout->sizes[1] - iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) + { + (*(cdata + cc + ncomp(fc)*cindex1))[0] = (*(cdata + cc + ncomp(fc)*cindex0))[0]; + (*(cdata + cc + ncomp(fc)*cindex1))[1] = -(*(cdata + cc + ncomp(fc)*cindex0))[1]; + } + } + } + // make 0 mode real + if (this->myrank == this->clayout->rank[0][0]) + { + for (ptrdiff_t cc = 0; cc < ncomp(fc); cc++) + cdata[cc][1] = 0.0; + } + // put kx = nx/2 modes to 0 + for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++) + for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++) { - tindex = ncomp(fc)*(this->clayout->sizes[0]/2 - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2]; - for (ii = 0; ii < this->clayout->sizes[1]; ii++) + ptrdiff_t cindex = this->get_cindex(this->clayout->sizes[2]-1, iy, iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { + (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0; + (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0; + } + } + // put ky = ny/2 modes to 0 + if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2]) + { + for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++) + for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++) { - std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2*this->clayout->sizes[2], 0.0); - tindex += ncomp(fc)*this->clayout->sizes[2]; + ptrdiff_t cindex = this->get_cindex(ix, this->clayout->sizes[0]/2-this->clayout->starts[0], iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { + (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0; + (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0; + } + } + } + // put kz = nz/2 modes to 0 + for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++) + for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++) + { + ptrdiff_t cindex = this->get_cindex(ix, iy, this->clayout->sizes[1]/2); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { + (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0; + (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0; } } - tindex = ncomp(fc)*(); - std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2, 0.0);*/ } template <typename rnumber, @@ -1022,7 +1387,6 @@ void field<rnumber, be, fc>::compute_stats( // what follows gave me a headache until I found this link: // http://stackoverflow.com/questions/8256636/expected-primary-expression-error-on-template-method-using kk->template cospectrum<rnumber, fc>( - (typename fftw_interface<rnumber>::complex*)this->data, (typename fftw_interface<rnumber>::complex*)this->data, group, dset_name + "_" + dset_name, @@ -1039,6 +1403,42 @@ void field<rnumber, be, fc>::compute_stats( } } +template <typename rnumber, + field_backend be, + field_components fc> +template <kspace_dealias_type dt> +double field<rnumber, be, fc>::L2norm( + kspace<be, dt> *kk) +{ + TIMEZONE("field::L2norm"); + if (!this->real_space_representation) + return kk->template L2norm<rnumber, fc>(this->get_cdata()); + else + { + shared_array<double> local_m2_threaded(1, [&](double* local_moment){ + std::fill_n(local_moment, 1, 0);}); + + this->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + double *local_m2 = local_m2_threaded.getMine(); + for (unsigned int i=0; i<ncomp(fc); i++) + local_m2[0] += this->data[rindex*ncomp(fc)+i]*this->data[rindex*ncomp(fc)+i]; + }); + + local_m2_threaded.mergeParallel(); + double m2; + MPI_Allreduce( + (void*)local_m2_threaded.getMasterData(), + &m2, + 1, + MPI_DOUBLE, MPI_SUM, this->comm); + return sqrt(m2 / this->npoints); + } +} + template <typename rnumber, field_backend be, field_components fc1, @@ -1172,6 +1572,7 @@ int joint_rspace_PDF( hid_t dset, wspace; hsize_t dims[5]; int ndims; + variable_used_only_in_assert(ndims); if (fc == THREE) { dset = H5Dopen( @@ -1180,7 +1581,6 @@ int joint_rspace_PDF( H5P_DEFAULT); wspace = H5Dget_space(dset); ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); - DEBUG_MSG("number of dimensions is %d\n", ndims); assert(ndims == 5); assert(dims[3] == 3); assert(dims[4] == 3); @@ -1235,8 +1635,8 @@ int joint_rspace_PDF( { for (unsigned int i=0; i<4; i++) { - bin1size[i] = max_f1_estimate[0] / nbins; - bin2size[i] = max_f2_estimate[0] / nbins; + bin1size[i] = 2*max_f1_estimate[0] / nbins; + bin2size[i] = 2*max_f2_estimate[0] / nbins; } } @@ -1279,8 +1679,8 @@ int joint_rspace_PDF( } else if (fc == ONE) { - bin1 = int(floor(f1->rval(rindex)/bin1size[3])); - bin2 = int(floor(f2->rval(rindex)/bin2size[3])); + bin1 = int(floor((f1->rval(rindex) + max_f1_estimate[0])/bin1size[3])); + bin2 = int(floor((f2->rval(rindex) + max_f2_estimate[0])/bin2size[3])); } if ((bin1 >= 0 && bin1 < nbins) && (bin2 >= 0 && bin2 < nbins)) @@ -1360,6 +1760,274 @@ int joint_rspace_PDF( return EXIT_SUCCESS; } +// Debarghya edit for 3 scale PDFs // + +template <typename rnumber, + field_backend be> +int joint_rspace_3PDF( + field<rnumber, be, ONE> *f1, + field<rnumber, be, ONE> *f2, + field<rnumber, be, ONE> *f3, + const hid_t group, + const std::string dset_name, + const hsize_t toffset, + const std::vector<double> max_f1_estimate, + const std::vector<double> max_f2_estimate, + const std::vector<double> max_f3_estimate) +{ + TIMEZONE("joint_rspace_3PDF"); + assert(f1->real_space_representation); + assert(f2->real_space_representation); + assert(f3->real_space_representation); + + assert(max_f1_estimate.size() == 1); + assert(max_f2_estimate.size() == 1); + assert(max_f3_estimate.size() == 1); + + int nbins; + std::string dsetc, dsetm; + dsetc = "histograms/" + dset_name + "_components"; + dsetm = "histograms/" + dset_name; + if (f1->myrank == 0) + { + hid_t dset, wspace; + hsize_t dims[5]; + int ndims; + dset = H5Dopen( + group, + dsetm.c_str(), + H5P_DEFAULT); + wspace = H5Dget_space(dset); + ndims = H5Sget_simple_extent_dims(wspace, dims, NULL); + assert(ndims == 4); + H5Sclose(wspace); + H5Dclose(dset); + nbins = dims[1]; + } + { + TIMEZONE("MPI_Bcast"); + MPI_Bcast(&nbins, 1, MPI_INT, 0, f1->comm); + } + + + /// histogram magnitudes + shared_array<ptrdiff_t> local_histm_threaded( + nbins*nbins*nbins, + [&](ptrdiff_t* local_hist){ + std::fill_n(local_hist, nbins*nbins*nbins, 0); + }); + + /// set up bin sizes + std::vector<double> bin1size, bin2size, bin3size; + bin1size.resize(1); + bin2size.resize(1); + bin3size.resize(1); + + bin1size[0] = 2*max_f1_estimate[0] / nbins; + bin2size[0] = 2*max_f2_estimate[0] / nbins; + bin3size[0] = 2*max_f3_estimate[0] / nbins; + + + { + TIMEZONE("field::RLOOP"); + f1->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + ptrdiff_t *local_histm = local_histm_threaded.getMine(); + int bin1 = 0; + int bin2 = 0; + int bin3 = 0; + + bin1 = int(floor((f1->rval(rindex) + max_f1_estimate[0])/bin1size[0])); + bin2 = int(floor((f2->rval(rindex) + max_f2_estimate[0])/bin2size[0])); + bin3 = int(floor((f3->rval(rindex) + max_f3_estimate[0])/bin3size[0])); + if ((bin1 >= 0 && bin1 < nbins) && + (bin2 >= 0 && bin2 < nbins) && + (bin3 >= 0 && bin3 < nbins)) + local_histm[bin1*nbins*nbins + bin2*nbins + bin3]++; + }); + } + local_histm_threaded.mergeParallel(); + ptrdiff_t *histm = new ptrdiff_t[nbins*nbins*nbins]; + ptrdiff_t *histc = NULL; + { + MPI_Allreduce( + (void*)local_histm_threaded.getMasterData(), + (void*)histm, + nbins*nbins*nbins, + MPI_INT64_T, MPI_SUM, f1->comm); + } + + if (f1->myrank == 0) + { + TIMEZONE("root-work"); + hid_t dset, wspace, mspace; + hsize_t count[5], offset[5]; + + dset = H5Dopen(group, dsetm.c_str(), H5P_DEFAULT); + assert(dset > 0); + offset[0] = toffset; + offset[1] = 0; + offset[2] = 0; + offset[3] = 0; + count[0] = 1; + count[1] = nbins; + count[2] = nbins; + count[3] = nbins; + mspace = H5Screate_simple(4, count, NULL); + wspace = H5Dget_space(dset); + H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset, H5T_NATIVE_INT64, mspace, wspace, H5P_DEFAULT, histm); + H5Sclose(wspace); + H5Sclose(mspace); + H5Dclose(dset); + } + + delete[] histm; + + return EXIT_SUCCESS; +} + + + +template <typename rnumber, + field_backend be, + field_components fc> +field<rnumber, be, fc> &field<rnumber, be, fc>::operator=( + const field<rnumber, be, fc> &src) +{ + TIMEZONE("field::operator="); + if (src.real_space_representation) + { + assert(this->get_nx() == src.get_nx()); + assert(this->get_ny() == src.get_ny()); + assert(this->get_nz() == src.get_nz()); + this->real_space_representation = true; + std::copy(src.data, + src.data + this->rmemlayout->local_size, + this->data); + } + else + { + this->real_space_representation = false; + // simple copy + if (this->get_nx() == src.get_nx() && + this->get_ny() == src.get_ny() && + this->get_nz() == src.get_nz()) + { + std::copy(src.data, + src.data + this->rmemlayout->local_size, + this->data); + } + // complicated resize + else + { + int64_t slice_size = src.clayout->local_size / src.clayout->subsizes[0]; + // clean up + std::fill_n(this->data, + this->rmemlayout->local_size, + 0.0); + typename fftw_interface<rnumber>::complex *buffer; + buffer = fftw_interface<rnumber>::alloc_complex(slice_size*ncomp(fc)); + + int min_fast_dim = + (src.clayout->sizes[2] > this->clayout->sizes[2]) ? + this->clayout->sizes[2] : src.clayout->sizes[2]; + + int64_t ii0, ii1; + int64_t oi0, oi1; + int64_t delta1, delta0; + int irank, orank; + delta0 = (this->clayout->sizes[0] - src.clayout->sizes[0]); + delta1 = (this->clayout->sizes[1] - src.clayout->sizes[1]); + for (ii0=0; ii0 < int64_t(src.clayout->sizes[0]); ii0++) + { + if (ii0 <= int64_t(src.clayout->sizes[0]/2)) + { + oi0 = ii0; + if (oi0 > int64_t(this->clayout->sizes[0]/2)) + continue; + } + else + { + oi0 = ii0 + delta0; + if ((oi0 < 0) || ((int64_t(this->clayout->sizes[0]) - oi0) >= int64_t(this->clayout->sizes[0]/2))) + continue; + } + if (be == FFTW) + { + irank = src.clayout->rank[0][ii0]; + orank = this->clayout->rank[0][oi0]; + } + else + {// TODO: handle 2D layout here + } + if ((irank == orank) && + (irank == src.clayout->myrank)) + { + std::copy( + (rnumber*)(src.get_cdata() + (ii0 - src.clayout->starts[0] )*slice_size), + (rnumber*)(src.get_cdata() + (ii0 - src.clayout->starts[0] + 1)*slice_size), + (rnumber*)buffer); + } + else + { + if (src.clayout->myrank == irank) + { + MPI_Send( + (void*)(src.get_cdata() + (ii0-src.clayout->starts[0])*slice_size), + slice_size, + mpi_real_type<rnumber>::complex(), + orank, + ii0, + src.clayout->comm); + } + if (src.clayout->myrank == orank) + { + MPI_Recv( + (void*)(buffer), + slice_size, + mpi_real_type<rnumber>::complex(), + irank, + ii0, + src.clayout->comm, + MPI_STATUS_IGNORE); + } + } + if (src.clayout->myrank == orank) + { + for (ii1 = 0; ii1 < int64_t(src.clayout->sizes[1]); ii1++) + { + if (ii1 <= int64_t(src.clayout->sizes[1]/2)) + { + oi1 = ii1; + if (oi1 > int64_t(this->clayout->sizes[1]/2)) + continue; + } + else + { + oi1 = ii1 + delta1; + if ((oi1 < 0) || ((int64_t(this->clayout->sizes[1]) - oi1) >= int64_t(this->clayout->sizes[1]/2))) + continue; + } + std::copy( + (rnumber*)(buffer + (ii1*src.clayout->sizes[2]*ncomp(fc))), + (rnumber*)(buffer + (ii1*src.clayout->sizes[2] + min_fast_dim)*ncomp(fc)), + (rnumber*)(this->get_cdata() + + ((oi0 - this->clayout->starts[0])*this->clayout->sizes[1] + + oi1)*this->clayout->sizes[2]*ncomp(fc))); + } + } + } + fftw_interface<rnumber>::free(buffer); + MPI_Barrier(src.clayout->comm); + } + } + return *this; +} + template class field<float, FFTW, ONE>; template class field<float, FFTW, THREE>; template class field<float, FFTW, THREExTHREE>; @@ -1407,6 +2075,34 @@ template void field<double, FFTW, THREExTHREE>::compute_stats<SMOOTH>( kspace<FFTW, SMOOTH> *, const hid_t, const std::string, const hsize_t, const double); +template double field<float, FFTW, ONE>::L2norm<TWO_THIRDS>( + kspace<FFTW, TWO_THIRDS> *); +template double field<float, FFTW, THREE>::L2norm<TWO_THIRDS>( + kspace<FFTW, TWO_THIRDS> *); +template double field<float, FFTW, THREExTHREE>::L2norm<TWO_THIRDS>( + kspace<FFTW, TWO_THIRDS> *); + +template double field<double, FFTW, ONE>::L2norm<TWO_THIRDS>( + kspace<FFTW, TWO_THIRDS> *); +template double field<double, FFTW, THREE>::L2norm<TWO_THIRDS>( + kspace<FFTW, TWO_THIRDS> *); +template double field<double, FFTW, THREExTHREE>::L2norm<TWO_THIRDS>( + kspace<FFTW, TWO_THIRDS> *); + +template double field<float, FFTW, ONE>::L2norm<SMOOTH>( + kspace<FFTW, SMOOTH> *); +template double field<float, FFTW, THREE>::L2norm<SMOOTH>( + kspace<FFTW, SMOOTH> *); +template double field<float, FFTW, THREExTHREE>::L2norm<SMOOTH>( + kspace<FFTW, SMOOTH> *); + +template double field<double, FFTW, ONE>::L2norm<SMOOTH>( + kspace<FFTW, SMOOTH> *); +template double field<double, FFTW, THREE>::L2norm<SMOOTH>( + kspace<FFTW, SMOOTH> *); +template double field<double, FFTW, THREExTHREE>::L2norm<SMOOTH>( + kspace<FFTW, SMOOTH> *); + template int compute_gradient<float, FFTW, THREE, THREExTHREE, SMOOTH>( kspace<FFTW, SMOOTH> *, field<float, FFTW, THREE> *, @@ -1468,3 +2164,24 @@ template int joint_rspace_PDF<double, FFTW, ONE>( const std::vector<double>, const std::vector<double>); +template int joint_rspace_3PDF<float, FFTW>( + field<float, FFTW, ONE> *, + field<float, FFTW, ONE> *, + field<float, FFTW, ONE> *, + const hid_t, + const std::string, + const hsize_t, + const std::vector<double>, + const std::vector<double>, + const std::vector<double>); +template int joint_rspace_3PDF<double, FFTW>( + field<double, FFTW, ONE> *, + field<double, FFTW, ONE> *, + field<double, FFTW, ONE> *, + const hid_t, + const std::string, + const hsize_t, + const std::vector<double>, + const std::vector<double>, + const std::vector<double>); + diff --git a/bfps/cpp/field.hpp b/cpp/field.hpp similarity index 87% rename from bfps/cpp/field.hpp rename to cpp/field.hpp index 52a936320974a9076a419d4b081d0ee9ab5d4ae5..c6a62b5c1739d5bafdf7c823aea7bc8b24059147 100644 --- a/bfps/cpp/field.hpp +++ b/cpp/field.hpp @@ -72,8 +72,8 @@ class field field_layout<fc> *clayout, *rlayout, *rmemlayout; /* FFT plans */ - typename fftw_interface<rnumber>::plan c2r_plan; - typename fftw_interface<rnumber>::plan r2c_plan; + typename fftw_interface<rnumber>::many_plan c2r_plan; + typename fftw_interface<rnumber>::many_plan r2c_plan; unsigned fftw_plan_rigor; /* HDF5 data types for arrays */ @@ -103,6 +103,13 @@ class field const hid_t group, const std::string field_name, const int iteration); + int write_filtered( + const std::string fname, + const std::string field_name, + const int iteration, + const int nx, + const int ny, + const int nz); int io_binary( const std::string fname, @@ -129,6 +136,25 @@ class field const hsize_t toffset, const std::vector<double> max_estimate); + void compute_rspace_zaverage( + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + + /* access sizes */ + inline int get_nx() const + { + return this->rlayout->sizes[2]; + } + inline int get_ny() const + { + return this->rlayout->sizes[1]; + } + inline int get_nz() const + { + return this->rlayout->sizes[0]; + } + /* acess data */ inline rnumber *__restrict__ get_rdata() { @@ -145,6 +171,11 @@ class field return (typename fftw_interface<rnumber>::complex*__restrict__)this->data; } + inline typename fftw_interface<rnumber>::complex *__restrict__ get_cdata() const + { + return (typename fftw_interface<rnumber>::complex*__restrict__)this->data; + } + inline rnumber &rval(ptrdiff_t rindex, unsigned int component = 0) { assert(fc == ONE || fc == THREE); @@ -154,7 +185,7 @@ class field inline const rnumber& rval(ptrdiff_t rindex, unsigned int component = 0) const { - assert(fc == ONE || fc == THREE); + //assert(fc == ONE || fc == THREE); assert(component >= 0 && component < ncomp(fc)); return *(this->data + rindex*ncomp(fc) + component); } @@ -216,6 +247,8 @@ class field return *this; } + field<rnumber, be, fc>& operator=(const field<rnumber, be, fc> &src); + template <kspace_dealias_type dt> void compute_stats( kspace<be, dt> *kk, @@ -223,6 +256,9 @@ class field const std::string dset_name, const hsize_t toffset, const double max_estimate); + template <kspace_dealias_type dt> + double L2norm( + kspace<be, dt> *kk); inline void impose_zero_mode() { if (this->clayout->myrank == this->clayout->rank[0][0] && @@ -318,5 +354,18 @@ int joint_rspace_PDF( const std::vector<double> max_f1_estimate, const std::vector<double> max_f2_estimate); +template <typename rnumber, + field_backend be> +int joint_rspace_3PDF( + field<rnumber, be, ONE> *f1, + field<rnumber, be, ONE> *f2, + field<rnumber, be, ONE> *f3, + const hid_t group, + const std::string dset_name, + const hsize_t toffset, + const std::vector<double> max_f1_estimate, + const std::vector<double> max_f2_estimate, + const std::vector<double> max_f3_estimate); + #endif//FIELD_HPP diff --git a/bfps/cpp/field_binary_IO.cpp b/cpp/field_binary_IO.cpp similarity index 100% rename from bfps/cpp/field_binary_IO.cpp rename to cpp/field_binary_IO.cpp diff --git a/bfps/cpp/field_binary_IO.hpp b/cpp/field_binary_IO.hpp similarity index 100% rename from bfps/cpp/field_binary_IO.hpp rename to cpp/field_binary_IO.hpp diff --git a/bfps/cpp/field_layout.cpp b/cpp/field_layout.cpp similarity index 99% rename from bfps/cpp/field_layout.cpp rename to cpp/field_layout.cpp index 908904991d5d95b0c89ba679b402d8d5727b8c85..61dd3f2ac1094e5f93a375fa295cffab669b34f9 100644 --- a/bfps/cpp/field_layout.cpp +++ b/cpp/field_layout.cpp @@ -23,10 +23,15 @@ **********************************************************************/ + +#define NDEBUG + #include <cassert> #include "field_layout.hpp" #include "scope_timer.hpp" + + template <field_components fc> field_layout<fc>::field_layout( const hsize_t *SIZES, diff --git a/bfps/cpp/field_layout.hpp b/cpp/field_layout.hpp similarity index 100% rename from bfps/cpp/field_layout.hpp rename to cpp/field_layout.hpp diff --git a/cpp/full_code/NSVE.cpp b/cpp/full_code/NSVE.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7b1b2d9550c45f9166c37e1b8132427fed046597 --- /dev/null +++ b/cpp/full_code/NSVE.cpp @@ -0,0 +1,201 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#define NDEBUG + +#include <string> +#include <cmath> +#include "NSVE.hpp" +#include "scope_timer.hpp" +#include "fftw_tools.hpp" + + +template <typename rnumber> +int NSVE<rnumber>::initialize(void) +{ + TIMEZONE("NSVE::initialize"); + this->read_iteration(); + this->read_parameters(); + if (this->myrank == 0) + { + // set caching parameters + hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); + herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); + variable_used_only_in_assert(cache_err); + DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err); + this->stat_file = H5Fopen( + (this->simname + ".h5").c_str(), + H5F_ACC_RDWR, + fapl); + } + int data_file_problem; + if (this->myrank == 0) + data_file_problem = this->grow_file_datasets(); + MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm); + if (data_file_problem > 0) + { + std::cerr << + data_file_problem << + " problems growing file datasets.\ntrying to exit now." << + std::endl; + return EXIT_FAILURE; + } + this->fs = new vorticity_equation<rnumber, FFTW>( + simname.c_str(), + nx, ny, nz, + dkx, dky, dkz, + fftw_planner_string_to_flag[this->fftw_plan_rigor]); + this->tmp_vec_field = new field<rnumber, FFTW, THREE>( + nx, ny, nz, + this->comm, + fftw_planner_string_to_flag[this->fftw_plan_rigor]); + + + this->fs->checkpoints_per_file = checkpoints_per_file; + this->fs->nu = nu; + this->fs->fmode = fmode; + this->fs->famplitude = famplitude; + this->fs->friction_coefficient = friction_coefficient; + this->fs->energy = energy; + this->fs->injection_rate = injection_rate; + this->fs->fk0 = fk0; + this->fs->fk1 = fk1; + strncpy(this->fs->forcing_type, forcing_type, 128); + this->fs->iteration = this->iteration; + this->fs->checkpoint = this->checkpoint; + + this->fs->cvorticity->real_space_representation = false; + this->fs->io_checkpoint(); + + if (this->myrank == 0 && this->iteration == 0) + this->fs->kk->store(stat_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE<rnumber>::step(void) +{ + TIMEZONE("NSVE::step"); + this->fs->step(this->dt); + this->iteration = this->fs->iteration; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE<rnumber>::write_checkpoint(void) +{ + TIMEZONE("NSVE::write_checkpoint"); + this->fs->io_checkpoint(false); + this->checkpoint = this->fs->checkpoint; + this->write_iteration(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE<rnumber>::finalize(void) +{ + TIMEZONE("NSVE::finalize"); + if (this->myrank == 0) + H5Fclose(this->stat_file); + delete this->fs; + delete this->tmp_vec_field; + return EXIT_SUCCESS; +} + +/** \brief Compute standard statistics for velocity and vorticity fields. + * + * IMPORTANT: at the end of this subroutine, `this->fs->cvelocity` contains + * the Fourier space representation of the velocity field, and + * `this->tmp_vec_field` contains the real space representation of the + * velocity field. + * This behavior is relied upon in the `NSVEparticles` class, so please + * don't break it. + */ + +template <typename rnumber> +int NSVE<rnumber>::do_stats() +{ + TIMEZONE("NSVE::do_stats"); + if (!(this->iteration % this->niter_stat == 0)) + return EXIT_SUCCESS; + hid_t stat_group; + if (this->myrank == 0) + stat_group = H5Gopen( + this->stat_file, + "statistics", + H5P_DEFAULT); + else + stat_group = 0; + + *tmp_vec_field = fs->cvorticity->get_cdata(); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "vorticity", + fs->iteration / niter_stat, + max_vorticity_estimate/sqrt(3)); + + fs->compute_velocity(fs->cvorticity); + *tmp_vec_field = fs->cvelocity->get_cdata(); + tmp_vec_field->compute_stats( + fs->kk, + stat_group, + "velocity", + fs->iteration / niter_stat, + max_velocity_estimate/sqrt(3)); + + if (this->myrank == 0) + H5Gclose(stat_group); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVE<rnumber>::read_parameters(void) +{ + TIMEZONE("NSVE::read_parameters"); + this->direct_numerical_simulation::read_parameters(); + hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + this->nu = hdf5_tools::read_value<double>(parameter_file, "parameters/nu"); + this->dt = hdf5_tools::read_value<double>(parameter_file, "parameters/dt"); + this->fmode = hdf5_tools::read_value<int>(parameter_file, "parameters/fmode"); + this->famplitude = hdf5_tools::read_value<double>(parameter_file, "parameters/famplitude"); + this->friction_coefficient = hdf5_tools::read_value<double>(parameter_file, "parameters/friction_coefficient"); + this->injection_rate = hdf5_tools::read_value<double>(parameter_file, "parameters/injection_rate"); + this->fk0 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk0"); + this->fk1 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk1"); + this->energy = hdf5_tools::read_value<double>(parameter_file, "parameters/energy"); + this->histogram_bins = hdf5_tools::read_value<int>(parameter_file, "parameters/histogram_bins"); + this->max_velocity_estimate = hdf5_tools::read_value<double>(parameter_file, "parameters/max_velocity_estimate"); + this->max_vorticity_estimate = hdf5_tools::read_value<double>(parameter_file, "parameters/max_vorticity_estimate"); + std::string tmp = hdf5_tools::read_string(parameter_file, "parameters/forcing_type"); + snprintf(this->forcing_type, 511, "%s", tmp.c_str()); + this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template class NSVE<float>; +template class NSVE<double>; + diff --git a/bfps/cpp/full_code/NSVE.hpp b/cpp/full_code/NSVE.hpp similarity index 95% rename from bfps/cpp/full_code/NSVE.hpp rename to cpp/full_code/NSVE.hpp index d444b71ceb48ea19dc292a57cc91ac81157e15ed..83c63d35790d3616cf143da1ac43bec133e91675 100644 --- a/bfps/cpp/full_code/NSVE.hpp +++ b/cpp/full_code/NSVE.hpp @@ -42,14 +42,18 @@ class NSVE: public direct_numerical_simulation /* parameters that are read in read_parameters */ double dt; double famplitude; + double friction_coefficient; double fk0; double fk1; + double energy; + double injection_rate; int fmode; char forcing_type[512]; int histogram_bins; double max_velocity_estimate; double max_vorticity_estimate; double nu; + std::string fftw_plan_rigor; /* other stuff */ vorticity_equation<rnumber, FFTW> *fs; diff --git a/bfps/cpp/full_code/NSVE_field_stats.cpp b/cpp/full_code/NSVE_field_stats.cpp similarity index 57% rename from bfps/cpp/full_code/NSVE_field_stats.cpp rename to cpp/full_code/NSVE_field_stats.cpp index 7e33acf93644208d292c5d8df66653f4bb7b806f..0969175cc75530e2dad2c3c5dd9e6a0449416ed0 100644 --- a/bfps/cpp/full_code/NSVE_field_stats.cpp +++ b/cpp/full_code/NSVE_field_stats.cpp @@ -1,17 +1,44 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #include <string> #include <cmath> #include "NSVE_field_stats.hpp" +#include "fftw_tools.hpp" #include "scope_timer.hpp" template <typename rnumber> int NSVE_field_stats<rnumber>::initialize(void) { + TIMEZONE("NSVE_field_stats::initialize"); this->postprocess::read_parameters(); this->vorticity = new field<rnumber, FFTW, THREE>( nx, ny, nz, this->comm, - DEFAULT_FFTW_FLAG); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); this->vorticity->real_space_representation = false; hid_t parameter_file = H5Fopen( (this->simname + std::string(".h5")).c_str(), @@ -42,6 +69,7 @@ int NSVE_field_stats<rnumber>::initialize(void) this->vorticity->clayout->starts, this->vorticity->clayout->comm); } + this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor"); H5Fclose(parameter_file); return EXIT_SUCCESS; } @@ -49,6 +77,7 @@ int NSVE_field_stats<rnumber>::initialize(void) template <typename rnumber> int NSVE_field_stats<rnumber>::read_current_cvorticity(void) { + TIMEZONE("NSVE_field_stats::read_current_cvorticity"); this->vorticity->real_space_representation = false; if (this->bin_IO != NULL) { @@ -76,6 +105,7 @@ int NSVE_field_stats<rnumber>::read_current_cvorticity(void) template <typename rnumber> int NSVE_field_stats<rnumber>::finalize(void) { + TIMEZONE("NSVE_field_stats::finalize"); if (this->bin_IO != NULL) delete this->bin_IO; delete this->vorticity; @@ -85,6 +115,7 @@ int NSVE_field_stats<rnumber>::finalize(void) template <typename rnumber> int NSVE_field_stats<rnumber>::work_on_current_iteration(void) { + TIMEZONE("NSVE_field_stats::work_on_current_iteration"); return EXIT_SUCCESS; } diff --git a/bfps/cpp/full_code/NSVE_field_stats.hpp b/cpp/full_code/NSVE_field_stats.hpp similarity index 98% rename from bfps/cpp/full_code/NSVE_field_stats.hpp rename to cpp/full_code/NSVE_field_stats.hpp index d544c0c7d5f4c75559e63ea3e59bf9457d4730c5..28a2376f17ac2ac837cbacac828cd91572bb3a17 100644 --- a/bfps/cpp/full_code/NSVE_field_stats.hpp +++ b/cpp/full_code/NSVE_field_stats.hpp @@ -42,6 +42,8 @@ class NSVE_field_stats: public postprocess private: field_binary_IO<rnumber, COMPLEX, THREE> *bin_IO; public: + std::string fftw_plan_rigor; + field<rnumber, FFTW, THREE> *vorticity; NSVE_field_stats( diff --git a/cpp/full_code/NSVE_no_output.hpp b/cpp/full_code/NSVE_no_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..045db08ec74b74206973e0dfbcb30716d62be0de --- /dev/null +++ b/cpp/full_code/NSVE_no_output.hpp @@ -0,0 +1,51 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef NSVE_NO_OUTPUT_HPP +#define NSVE_NO_OUTPUT_HPP + +#include "full_code/NSVE.hpp" + +template <typename rnumber> +class NSVE_no_output: public NSVE<rnumber> +{ + public: + NSVE_no_output( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVE<rnumber>( + COMMUNICATOR, + simulation_name){} + ~NSVE_no_output(){} + int write_checkpoint(void) + { + TIMEZONE("NSVE_no_output::write_checkpoint"); + return EXIT_SUCCESS; + } +}; + +#endif//NSVE_NO_OUTPUT_HPP + diff --git a/cpp/full_code/NSVEcomplex_particles.cpp b/cpp/full_code/NSVEcomplex_particles.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3bd27102d7495b39dfa92bb5b7975b3f64d6cca5 --- /dev/null +++ b/cpp/full_code/NSVEcomplex_particles.cpp @@ -0,0 +1,265 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#define NDEBUG + +#include <string> +#include <cmath> +#include "NSVEcomplex_particles.hpp" +#include "scope_timer.hpp" +#include "particles/particles_sampling.hpp" +#include "particles/p2p_computer.hpp" +#include "particles/particles_inner_computer.hpp" + +template <typename rnumber> +int NSVEcomplex_particles<rnumber>::initialize(void) +{ + TIMEZONE("NSVEcomplex_particles::initialize"); + this->NSVE<rnumber>::initialize(); + + p2p_computer<double, long long int> current_p2p_computer; + current_p2p_computer.setEnable(this->enable_p2p); + + particles_inner_computer<double, long long int> current_particles_inner_computer(inner_v0, this->lambda); + current_particles_inner_computer.setEnable(enable_inner); + + + this->ps = particles_system_builder_with_p2p( + this->fs->cvelocity, // (field object) + this->fs->kk, // (kspace object, contains dkx, dky, dkz) + tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) + (long long int)nparticles, // to check coherency between parameters and hdf input file + this->fs->get_current_fname(), // particles input filename + std::string("/tracers0/state/") + std::to_string(this->fs->iteration), // dataset name for initial input + std::string("/tracers0/rhs/") + std::to_string(this->fs->iteration), // dataset name for initial input + tracers0_neighbours, // parameter (interpolation no neighbours) + tracers0_smoothness, // parameter (how many continuous derivatives) + this->comm, + this->fs->iteration+1, + std::move(current_p2p_computer), + std::move(current_particles_inner_computer), + cutoff); + + this->particles_output_writer_mpi = new particles_output_hdf5< + long long int, double, 6>( + MPI_COMM_WORLD, + "tracers0", + nparticles, + tracers0_integration_steps); + this->particles_sample_writer_mpi = new particles_output_sampling_hdf5< + long long int, double, 3>( + MPI_COMM_WORLD, + this->ps->getGlobalNbParticles(), + (this->simname + "_particles.h5"), + "tracers0", + "position/0"); + + + /// allocate grad vel field + this->nabla_u = new field<rnumber, FFTW, THREExTHREE>( + this->nx, this->ny, this->nz, + this->comm, + this->fs->cvorticity->fftw_plan_rigor); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEcomplex_particles<rnumber>::step(void) +{ + TIMEZONE("NSVEcomplex_particles::step"); + this->fs->compute_velocity(this->fs->cvorticity); + if(this->enable_vorticity_omega){ + compute_gradient( + this->fs->kk, + this->fs->cvelocity, + this->nabla_u); + this->nabla_u->ift(); + this->fs->cvelocity->ift(); // needed before completeloop + //std::unique_ptr<double[]> sampled_vorticity(new double[9*this->ps->getLocalNbParticles()]); + //std::fill_n(sampled_vorticity.get(), 9*this->ps->getLocalNbParticles(), 0); + //this->ps->sample_compute_field(*this->nabla_u, sampled_vorticity.get()); + //*this->tmp_vec_field = this->fs->cvorticity->get_cdata(); + //this->tmp_vec_field->ift(); + this->ps->completeLoopWithExtraField(this->dt, *this->nabla_u); + } + else{ + this->fs->cvelocity->ift(); + this->ps->completeLoop(this->dt); + } + this->NSVE<rnumber>::step(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEcomplex_particles<rnumber>::write_checkpoint(void) +{ + TIMEZONE("NSVEcomplex_particles::write_checkpoint"); + this->NSVE<rnumber>::write_checkpoint(); + this->particles_output_writer_mpi->open_file(this->fs->get_current_fname()); + // TODO P2P write particle data too + this->particles_output_writer_mpi->template save<6>( + this->ps->getParticlesState(), + this->ps->getParticlesRhs(), + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->fs->iteration); + this->particles_output_writer_mpi->close_file(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEcomplex_particles<rnumber>::finalize(void) +{ + TIMEZONE("NSVEcomplex_particles::finalize"); + delete this->nabla_u; + delete this->particles_output_writer_mpi; + delete this->particles_sample_writer_mpi; + this->NSVE<rnumber>::finalize(); + return EXIT_SUCCESS; +} + +/** \brief Compute fluid stats and sample particle data. + */ + +template <typename rnumber> +int NSVEcomplex_particles<rnumber>::do_stats() +{ + TIMEZONE("NSVEcomplex_particles::do_stats"); + /// perform fluid stats + this->NSVE<rnumber>::do_stats(); + + /// check if particle stats should be performed now; + /// if not, exit method. + if (!(this->iteration % this->niter_part == 0)) + return EXIT_SUCCESS; + + /// allocate temporary data array + /// initialize pdata0 with the positions, and pdata1 with the orientations + std::unique_ptr<double[]> pdata0 = this->ps->extractParticlesState(0, 3); + std::unique_ptr<double[]> pdata1 = this->ps->extractParticlesState(3, 6); + std::unique_ptr<double[]> pdata2(new double[9*this->ps->getLocalNbParticles()]); + + /// sample position + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "position", + pdata0.get(), + &pdata0, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// sample orientation + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "orientation", + pdata0.get(), + &pdata1, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// sample velocity + /// from now on, we need to clean up data arrays before interpolation + std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "velocity", + pdata0.get(), + &pdata1, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// sample velocity gradient + /// fs->cvelocity should contain the velocity in Fourier space + this->fs->compute_velocity(this->fs->cvorticity); + compute_gradient( + this->fs->kk, + this->fs->cvelocity, + this->nabla_u); + this->nabla_u->ift(); + std::fill_n(pdata2.get(), 9*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->nabla_u, pdata2.get()); + this->particles_sample_writer_mpi->template save_dataset<9>( + "tracers0", + "velocity_gradient", + pdata0.get(), + &pdata2, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// compute acceleration and sample it + this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field); + this->tmp_vec_field->ift(); + std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "acceleration", + pdata0.get(), + &pdata1, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + // deallocate temporary data array + delete[] pdata0.release(); + delete[] pdata1.release(); + delete[] pdata2.release(); + + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEcomplex_particles<rnumber>::read_parameters(void) +{ + TIMEZONE("NSVEcomplex_particles::read_parameters"); + this->NSVE<rnumber>::read_parameters(); + hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + this->niter_part = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part"); + this->nparticles = hdf5_tools::read_value<int>(parameter_file, "parameters/nparticles"); + this->tracers0_integration_steps = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_integration_steps"); + this->tracers0_neighbours = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_neighbours"); + this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness"); + this->enable_p2p = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_p2p"); + this->enable_inner = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_inner"); + int tval = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_vorticity_omega"); + this->enable_vorticity_omega = tval; + DEBUG_MSG("tracers0_enable_vorticity_omega = %d, this->enable_vorticity_omega = %d\n", + tval, this->enable_vorticity_omega); + this->cutoff = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_cutoff"); + this->inner_v0 = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_inner_v0"); + this->lambda = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_lambda"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template class NSVEcomplex_particles<float>; +template class NSVEcomplex_particles<double>; + diff --git a/cpp/full_code/NSVEcomplex_particles.hpp b/cpp/full_code/NSVEcomplex_particles.hpp new file mode 100644 index 0000000000000000000000000000000000000000..20a84a6592f9b1158738610674836dd30362b6dc --- /dev/null +++ b/cpp/full_code/NSVEcomplex_particles.hpp @@ -0,0 +1,97 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef NSVECOMPLEX_PARTICLES_HPP +#define NSVECOMPLEX_PARTICLES_HPP + + + +#include <cstdlib> +#include "base.hpp" +#include "vorticity_equation.hpp" +#include "full_code/NSVE.hpp" +#include "particles/particles_system_builder.hpp" +#include "particles/particles_output_hdf5.hpp" +#include "particles/particles_sampling.hpp" + +/** \brief Navier-Stokes solver that includes complex particles. + * + * Child of Navier Stokes vorticity equation solver, this class calls all the + * methods from `NSVE`, and in addition integrates `complex particles` + * in the resulting velocity field. + * By `complex particles` we mean neutrally buoyant, very small particles, + * which have an orientation and actively swim in that direction, and they may + * also interact with each other, trying to reorient to a common orientation. + */ + +template <typename rnumber> +class NSVEcomplex_particles: public NSVE<rnumber> +{ + public: + + /* parameters that are read in read_parameters */ + int niter_part; + int nparticles; + int tracers0_integration_steps; + int tracers0_neighbours; + int tracers0_smoothness; + + double cutoff; + double inner_v0; + double lambda; + bool enable_p2p; + bool enable_inner; + bool enable_vorticity_omega; + + /* other stuff */ + std::unique_ptr<abstract_particles_system<long long int, double>> ps; + // TODO P2P use a reader with particle data + particles_output_hdf5<long long int, double,6> *particles_output_writer_mpi; + particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi; + // field for sampling velocity gradient + field<rnumber, FFTW, THREExTHREE> *nabla_u; + + + NSVEcomplex_particles( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVE<rnumber>( + COMMUNICATOR, + simulation_name), + cutoff(10), inner_v0(1), lambda(1.0), enable_p2p(true), enable_inner(true), enable_vorticity_omega(true){} + ~NSVEcomplex_particles(){} + + int initialize(void); + int step(void); + int finalize(void); + + int read_parameters(void); + int write_checkpoint(void); + int do_stats(void); +}; + +#endif//NSVECOMPLEX_PARTICLES_HPP + diff --git a/cpp/full_code/NSVEp_extra_sampling.cpp b/cpp/full_code/NSVEp_extra_sampling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7b3e5a76c6d47c990df9698ccb5f8ef22770a70d --- /dev/null +++ b/cpp/full_code/NSVEp_extra_sampling.cpp @@ -0,0 +1,154 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include "full_code/NSVEp_extra_sampling.hpp" + + + +template <typename rnumber> +int NSVEp_extra_sampling<rnumber>::initialize(void) +{ + TIMEZONE("NSVEp_extra_sampling::initialize"); + this->NSVEparticles<rnumber>::initialize(); + + /// allocate grad vel field + this->nabla_u = new field<rnumber, FFTW, THREExTHREE>( + this->nx, this->ny, this->nz, + this->comm, + this->fs->cvorticity->fftw_plan_rigor); + this->pressure = new field<rnumber, FFTW, ONE>( + this->nx, this->ny, this->nz, + this->comm, + this->fs->cvorticity->fftw_plan_rigor); + this->nabla_p = new field<rnumber, FFTW, THREE>( + this->nx, this->ny, this->nz, + this->comm, + this->fs->cvorticity->fftw_plan_rigor); + this->Hessian_p = new field<rnumber, FFTW, THREExTHREE>( + this->nx, this->ny, this->nz, + this->comm, + this->fs->cvorticity->fftw_plan_rigor); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEp_extra_sampling<rnumber>::finalize(void) +{ + TIMEZONE("NSVEp_extra_sampling::finalize"); + delete this->nabla_u; + delete this->pressure; + delete this->nabla_p; + delete this->Hessian_p; + this->NSVEparticles<rnumber>::finalize(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEp_extra_sampling<rnumber>::do_stats() +{ + TIMEZONE("NSVEp_extra_sampling::do_stats"); + this->NSVEparticles<rnumber>::do_stats(); + if (!(this->iteration % this->niter_part == 0)) + return EXIT_SUCCESS; + + /// fs->cvelocity should contain the velocity in Fourier space + this->fs->compute_velocity(this->fs->cvorticity); + compute_gradient( + this->fs->kk, + this->fs->cvelocity, + this->nabla_u); + this->nabla_u->ift(); + + this->fs->compute_pressure(this->pressure); + + compute_gradient( + this->fs->kk, + this->pressure, + this->nabla_p); + + compute_gradient( + this->fs->kk, + this->nabla_p, + this->Hessian_p); + + this->pressure->ift(); + this->nabla_p->ift(); + this->Hessian_p->ift(); + + // sample velocity gradient + std::unique_ptr<double[]> pdata(new double[9*this->ps->getLocalNbParticles()]); + std::fill_n(pdata.get(), 9*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->nabla_u, pdata.get()); + + this->particles_sample_writer_mpi->template save_dataset<9>( + "tracers0", + "velocity_gradient", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + // sample pressure + std::fill_n(pdata.get(), this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->pressure, pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<1>( + "tracers0", + "pressure", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + // sample pressure gradient + std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->nabla_p, pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "pressure_gradient", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + // sample pressure gradient + std::fill_n(pdata.get(), 9*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->Hessian_p, pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<9>( + "tracers0", + "pressure_Hessian", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + return EXIT_SUCCESS; +} + +template class NSVEp_extra_sampling<float>; +template class NSVEp_extra_sampling<double>; + diff --git a/cpp/full_code/NSVEp_extra_sampling.hpp b/cpp/full_code/NSVEp_extra_sampling.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d3d1c1863a315d14f774644f54eed4d6a606d176 --- /dev/null +++ b/cpp/full_code/NSVEp_extra_sampling.hpp @@ -0,0 +1,72 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef NSVEP_EXTRA_SAMPLING_HPP +#define NSVEP_EXTRA_SAMPLING_HPP + + + +#include <cstdlib> +#include "base.hpp" +#include "vorticity_equation.hpp" +#include "full_code/NSVEparticles.hpp" +#include "particles/particles_system_builder.hpp" +#include "particles/particles_output_hdf5.hpp" +#include "particles/particles_sampling.hpp" + +/** \brief Navier-Stokes solver with tracers that sample velocity gradient + * and pressure Hessian. + * + */ + +template <typename rnumber> +class NSVEp_extra_sampling: public NSVEparticles<rnumber> +{ + public: + + /* other stuff */ + field<rnumber, FFTW, ONE> *pressure; + field<rnumber, FFTW, THREE> *nabla_p; + field<rnumber, FFTW, THREExTHREE> *nabla_u; + field<rnumber, FFTW, THREExTHREE> *Hessian_p; + + NSVEp_extra_sampling( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVEparticles<rnumber>( + COMMUNICATOR, + simulation_name){} + ~NSVEp_extra_sampling(){} + + int initialize(void); + int finalize(void); + + int read_parameters(void); + int do_stats(void); +}; + +#endif//NSVEP_EXTRA_SAMPLING_HPP + + diff --git a/cpp/full_code/NSVEparticles.cpp b/cpp/full_code/NSVEparticles.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b8743cdb48a5f3575931dfcc200fe1f0362778d --- /dev/null +++ b/cpp/full_code/NSVEparticles.cpp @@ -0,0 +1,210 @@ +/********************************************************************** +* * +* Copyright 2019 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#define NDEBUG + +#include <string> +#include <cmath> +#include "NSVEparticles.hpp" +#include "scope_timer.hpp" + +template <typename rnumber> +int NSVEparticles<rnumber>::initialize(void) +{ + TIMEZONE("NSVEparticles::intialize"); + this->NSVE<rnumber>::initialize(); + this->pressure = new field<rnumber, FFTW, ONE>( + this->fs->cvelocity->rlayout->sizes[2], + this->fs->cvelocity->rlayout->sizes[1], + this->fs->cvelocity->rlayout->sizes[0], + this->fs->cvelocity->rlayout->comm, + this->fs->cvelocity->fftw_plan_rigor); + + this->ps = particles_system_builder( + this->fs->cvelocity, // (field object) + this->fs->kk, // (kspace object, contains dkx, dky, dkz) + tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) + (long long int)nparticles, // to check coherency between parameters and hdf input file + this->fs->get_current_fname(), // particles input filename + std::string("/tracers0/state/") + std::to_string(this->fs->iteration), // dataset name for initial input + std::string("/tracers0/rhs/") + std::to_string(this->fs->iteration), // dataset name for initial input + tracers0_neighbours, // parameter (interpolation no neighbours) + tracers0_smoothness, // parameter + this->comm, + this->fs->iteration+1); + this->particles_output_writer_mpi = new particles_output_hdf5< + long long int, double, 3>( + MPI_COMM_WORLD, + "tracers0", + nparticles, + tracers0_integration_steps); + this->particles_output_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout()); + this->particles_sample_writer_mpi = new particles_output_sampling_hdf5< + long long int, double, 3>( + MPI_COMM_WORLD, + this->ps->getGlobalNbParticles(), + (this->simname + "_particles.h5"), + "tracers0", + "position/0"); + this->particles_sample_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout()); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEparticles<rnumber>::step(void) +{ + TIMEZONE("NSVEparticles::step"); + this->fs->compute_velocity(this->fs->cvorticity); + this->fs->cvelocity->ift(); + this->ps->completeLoop(this->dt); + this->NSVE<rnumber>::step(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEparticles<rnumber>::write_checkpoint(void) +{ + TIMEZONE("NSVEparticles::write_checkpoint"); + this->NSVE<rnumber>::write_checkpoint(); + this->particles_output_writer_mpi->open_file(this->fs->get_current_fname()); + this->particles_output_writer_mpi->template save<3>( + this->ps->getParticlesState(), + this->ps->getParticlesRhs(), + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->fs->iteration); + this->particles_output_writer_mpi->close_file(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEparticles<rnumber>::finalize(void) +{ + TIMEZONE("NSVEparticles::finalize"); + delete this->pressure; + delete this->ps.release(); + delete this->particles_output_writer_mpi; + delete this->particles_sample_writer_mpi; + this->NSVE<rnumber>::finalize(); + return EXIT_SUCCESS; +} + +/** \brief Compute fluid stats and sample fields at particle locations. + */ + +template <typename rnumber> +int NSVEparticles<rnumber>::do_stats() +{ + TIMEZONE("NSVEparticles::do_stats"); + /// fluid stats go here + this->NSVE<rnumber>::do_stats(); + + + /// either one of two conditions suffices to compute statistics: + /// 1) current iteration is a multiple of niter_part + /// 2) we are within niter_part_fine_duration/2 of a multiple of niter_part_fine_period + if (!(this->iteration % this->niter_part == 0 || + ((this->iteration + this->niter_part_fine_duration/2) % this->niter_part_fine_period <= + this->niter_part_fine_duration))) + return EXIT_SUCCESS; + + // allocate temporary data array + std::unique_ptr<double[]> pdata(new double[3*this->ps->getLocalNbParticles()]); + + /// copy position data + + /// sample position + std::copy(this->ps->getParticlesState(), + this->ps->getParticlesState()+3*this->ps->getLocalNbParticles(), + pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "position", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// sample velocity + std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0); + if (!(this->iteration % this->niter_stat == 0)) + { + // we need to compute velocity field manually, because it didn't happen in NSVE::do_stats() + this->fs->compute_velocity(this->fs->cvorticity); + *this->tmp_vec_field = this->fs->cvelocity->get_cdata(); + this->tmp_vec_field->ift(); + } + this->ps->sample_compute_field(*this->tmp_vec_field, pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "velocity", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// compute acceleration and sample it + this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field, this->pressure); + this->tmp_vec_field->ift(); + std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->tmp_vec_field, pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "acceleration", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + // deallocate temporary data array + delete[] pdata.release(); + + return EXIT_SUCCESS; +} + +template <typename rnumber> +int NSVEparticles<rnumber>::read_parameters(void) +{ + TIMEZONE("NSVEparticles::read_parameters"); + this->NSVE<rnumber>::read_parameters(); + hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + this->niter_part = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part"); + this->niter_part_fine_period = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_period"); + this->niter_part_fine_duration = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_duration"); + this->nparticles = hdf5_tools::read_value<int>(parameter_file, "parameters/nparticles"); + this->tracers0_integration_steps = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_integration_steps"); + this->tracers0_neighbours = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_neighbours"); + this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template class NSVEparticles<float>; +template class NSVEparticles<double>; + diff --git a/bfps/cpp/full_code/NSVEparticles.hpp b/cpp/full_code/NSVEparticles.hpp similarity index 89% rename from bfps/cpp/full_code/NSVEparticles.hpp rename to cpp/full_code/NSVEparticles.hpp index ccafe6eeb09d27a6b211cfd75ecfba4fc5abe92b..8b70ead9b084aa4f7693c243b2f04e5b06c2d572 100644 --- a/bfps/cpp/full_code/NSVEparticles.hpp +++ b/cpp/full_code/NSVEparticles.hpp @@ -35,6 +35,7 @@ #include "full_code/NSVE.hpp" #include "particles/particles_system_builder.hpp" #include "particles/particles_output_hdf5.hpp" +#include "particles/particles_sampling.hpp" /** \brief Navier-Stokes solver that includes simple Lagrangian tracers. * @@ -50,6 +51,8 @@ class NSVEparticles: public NSVE<rnumber> /* parameters that are read in read_parameters */ int niter_part; + int niter_part_fine_period; + int niter_part_fine_duration; int nparticles; int tracers0_integration_steps; int tracers0_neighbours; @@ -57,7 +60,10 @@ class NSVEparticles: public NSVE<rnumber> /* other stuff */ std::unique_ptr<abstract_particles_system<long long int, double>> ps; - particles_output_hdf5<long long int, double,3,3> *particles_output_writer_mpi; + field<rnumber, FFTW, ONE> *pressure; + + particles_output_hdf5<long long int, double,3> *particles_output_writer_mpi; + particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi; NSVEparticles( diff --git a/cpp/full_code/NSVEparticles_no_output.hpp b/cpp/full_code/NSVEparticles_no_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4e6de379b06c593e7b9cfbacb50a81c7bdcefcfd --- /dev/null +++ b/cpp/full_code/NSVEparticles_no_output.hpp @@ -0,0 +1,50 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef NSVEPARTICLES_NO_OUTPUT_HPP +#define NSVEPARTICLES_NO_OUTPUT_HPP + +#include "full_code/NSVEparticles.hpp" + +template <typename rnumber> +class NSVEparticles_no_output: public NSVEparticles<rnumber> +{ + public: + NSVEparticles_no_output( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVEparticles<rnumber>( + COMMUNICATOR, + simulation_name){} + ~NSVEparticles_no_output(){} + int write_checkpoint(void) + { + TIMEZONE("NSVEparticles_no_output::write_checkpoint"); + return EXIT_SUCCESS; + } +}; + +#endif//NSVEPARTICLES_NO_OUTPUT_HPP + diff --git a/cpp/full_code/code_base.cpp b/cpp/full_code/code_base.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a6487c726de44b018392128f955ccebf7e7100a1 --- /dev/null +++ b/cpp/full_code/code_base.cpp @@ -0,0 +1,82 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#define NDEBUG + +#include "code_base.hpp" +#include "scope_timer.hpp" + + +code_base::code_base( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + comm(COMMUNICATOR), + simname(simulation_name) +{ + TIMEZONE("code_base::code_base"); + MPI_Comm_rank(this->comm, &this->myrank); + MPI_Comm_size(this->comm, &this->nprocs); + this->stop_code_now = false; +} + +int code_base::check_stopping_condition(void) +{ + TIMEZONE("code_base::check_stopping_condition"); + if (myrank == 0) + { + std::string fname = ( + std::string("stop_") + + std::string(this->simname)); + { + struct stat file_buffer; + this->stop_code_now = ( + stat(fname.c_str(), &file_buffer) == 0); + } + } + MPI_Bcast( + &this->stop_code_now, + 1, + MPI_C_BOOL, + 0, + MPI_COMM_WORLD); + return EXIT_SUCCESS; +} + +int code_base::read_parameters(void) +{ + TIMEZONE("code_base::read_parameters"); + hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + this->dkx = hdf5_tools::read_value<double>(parameter_file, "parameters/dkx"); + this->dky = hdf5_tools::read_value<double>(parameter_file, "parameters/dky"); + this->dkz = hdf5_tools::read_value<double>(parameter_file, "parameters/dkz"); + this->nx = hdf5_tools::read_value<int>(parameter_file, "parameters/nx"); + this->ny = hdf5_tools::read_value<int>(parameter_file, "parameters/ny"); + this->nz = hdf5_tools::read_value<int>(parameter_file, "parameters/nz"); + this->dealias_type = hdf5_tools::read_value<int>(parameter_file, "parameters/dealias_type"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + diff --git a/bfps/cpp/full_code/code_base.hpp b/cpp/full_code/code_base.hpp similarity index 99% rename from bfps/cpp/full_code/code_base.hpp rename to cpp/full_code/code_base.hpp index cf0521e2b7383edf925e1129d4fa4a931a55efe4..5ec4260dbfbaaa8ea4e123d8a38b680c0df121eb 100644 --- a/bfps/cpp/full_code/code_base.hpp +++ b/cpp/full_code/code_base.hpp @@ -108,6 +108,7 @@ class code_base return EXIT_SUCCESS; } + virtual int read_parameters(void); virtual int initialize(void) = 0; virtual int main_loop(void) = 0; virtual int finalize(void) = 0; diff --git a/cpp/full_code/codes_with_no_output.hpp b/cpp/full_code/codes_with_no_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..175bed22d2f6a7b9fa2fc469d6a6bb6f03c9a3a0 --- /dev/null +++ b/cpp/full_code/codes_with_no_output.hpp @@ -0,0 +1,34 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef CODES_WITH_NO_OUTPUT_HPP +#define CODES_WITH_NO_OUTPUT_HPP + +#include "full_code/NSVE_no_output.hpp" +#include "full_code/NSVEparticles_no_output.hpp" + + +#endif//CODES_WITH_NO_OUTPUT_HPP + diff --git a/bfps/cpp/full_code/direct_numerical_simulation.cpp b/cpp/full_code/direct_numerical_simulation.cpp similarity index 53% rename from bfps/cpp/full_code/direct_numerical_simulation.cpp rename to cpp/full_code/direct_numerical_simulation.cpp index edc2f99497a21368c63348167190dc6c64b44712..5329e7034e082b32cbdad7b4aae3d81665156215 100644 --- a/bfps/cpp/full_code/direct_numerical_simulation.cpp +++ b/cpp/full_code/direct_numerical_simulation.cpp @@ -1,3 +1,30 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#define NDEBUG + #include <cstdlib> #include <sys/types.h> #include <sys/stat.h> @@ -8,6 +35,7 @@ int direct_numerical_simulation::grow_file_datasets() { + TIMEZONE("direct_numerical_simulation::grow_file_datasets"); return hdf5_tools::grow_file_datasets( this->stat_file, "statistics", @@ -16,6 +44,7 @@ int direct_numerical_simulation::grow_file_datasets() int direct_numerical_simulation::read_iteration(void) { + TIMEZONE("direct_numerical_simulation::read_iteration"); /* read iteration */ hid_t dset; hid_t iteration_file = H5Fopen( @@ -56,6 +85,7 @@ int direct_numerical_simulation::read_iteration(void) int direct_numerical_simulation::write_iteration(void) { + TIMEZONE("direct_numerical_simulation::write_iteration"); if (this->myrank == 0) { hid_t dset = H5Dopen( @@ -88,6 +118,7 @@ int direct_numerical_simulation::write_iteration(void) int direct_numerical_simulation::main_loop(void) { + TIMEZONE("direct_numerical_simulation::main_loop"); this->start_simple_timer(); int max_iter = (this->iteration + this->niter_todo - (this->iteration % this->niter_todo)); @@ -117,3 +148,15 @@ int direct_numerical_simulation::main_loop(void) return EXIT_SUCCESS; } +int direct_numerical_simulation::read_parameters(void) +{ + TIMEZONE("direct_numerical_simulation::read_parameters"); + this->code_base::read_parameters(); + hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "parameters/checkpoints_per_file"); + this->niter_out = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_out"); + this->niter_stat = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_stat"); + this->niter_todo = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_todo"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} diff --git a/bfps/cpp/full_code/direct_numerical_simulation.hpp b/cpp/full_code/direct_numerical_simulation.hpp similarity index 98% rename from bfps/cpp/full_code/direct_numerical_simulation.hpp rename to cpp/full_code/direct_numerical_simulation.hpp index 8050bb045b29acf29d655273f7dff310dd10d0fa..15ab698a1128fd836b74b100b9a79d5c6d67d12f 100644 --- a/bfps/cpp/full_code/direct_numerical_simulation.hpp +++ b/cpp/full_code/direct_numerical_simulation.hpp @@ -51,6 +51,7 @@ class direct_numerical_simulation: public code_base simulation_name){} virtual ~direct_numerical_simulation(){} + virtual int read_parameters(void); virtual int write_checkpoint(void) = 0; virtual int initialize(void) = 0; virtual int step(void) = 0; diff --git a/cpp/full_code/field_output_test.cpp b/cpp/full_code/field_output_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..649d8dbb5f3a26d30f147ff228c90326cce9ee6d --- /dev/null +++ b/cpp/full_code/field_output_test.cpp @@ -0,0 +1,92 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <string> +#include <cmath> +#include <random> +#include "field_output_test.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int field_output_test<rnumber>::initialize(void) +{ + TIMEZONE("field_output_test::initialize"); + this->read_parameters(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_output_test<rnumber>::finalize(void) +{ + TIMEZONE("field_output_test::finalize"); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_output_test<rnumber>::read_parameters() +{ + TIMEZONE("field_output_test::read_parameters"); + this->test::read_parameters(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_output_test<rnumber>::do_work(void) +{ + TIMEZONE("field_output_test::do_work"); + // allocate + field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>( + this->nx, this->ny, this->nz, + this->comm, + FFTW_ESTIMATE); + std::default_random_engine rgen; + std::normal_distribution<rnumber> rdist; + rgen.seed(1); + + // fill up scal_field + scal_field->real_space_representation = true; + scal_field->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + scal_field->rval(rindex) = rdist(rgen); + }); + + scal_field->io( + this->simname + std::string("_fields.h5"), + "scal_field", + 0, + false); + + // deallocate + delete scal_field; + return EXIT_SUCCESS; +} + +template class field_output_test<float>; +template class field_output_test<double>; + diff --git a/cpp/full_code/field_output_test.hpp b/cpp/full_code/field_output_test.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3662e4b1ab610831e53be3edd2d1612eb2b45a46 --- /dev/null +++ b/cpp/full_code/field_output_test.hpp @@ -0,0 +1,60 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef FILTER_OUTPUT_TEST_HPP +#define FILTER_OUTPUT_TEST_HPP + + + +#include <cstdlib> +#include "base.hpp" +#include "kspace.hpp" +#include "field.hpp" +#include "full_code/test.hpp" + +/** \brief A class for testing basic field class functionality. + */ + +template <typename rnumber> +class field_output_test: public test +{ + public: + field_output_test( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + test( + COMMUNICATOR, + simulation_name){} + ~field_output_test(){} + + int initialize(void); + int do_work(void); + int finalize(void); + int read_parameters(void); +}; + +#endif//FILTER_OUTPUT_TEST_HPP + diff --git a/cpp/full_code/field_single_to_double.cpp b/cpp/full_code/field_single_to_double.cpp new file mode 100644 index 0000000000000000000000000000000000000000..93a03aed5a494138ba8279792788a7bb19105325 --- /dev/null +++ b/cpp/full_code/field_single_to_double.cpp @@ -0,0 +1,119 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <string> +#include <cmath> +#include "field_single_to_double.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int field_single_to_double<rnumber>::initialize(void) +{ + TIMEZONE("field_single_to_double::intialize"); + this->NSVE_field_stats<rnumber>::initialize(); + DEBUG_MSG("after NSVE_field_stats::initialize\n"); + this->kk = new kspace<FFTW, SMOOTH>( + this->vorticity->clayout, this->dkx, this->dky, this->dkz); + this->vec_field_double = new field<double, FFTW, THREE>( + this->nx, this->ny, this->nz, + this->comm, + this->vorticity->fftw_plan_rigor); + this->vec_field_double->real_space_representation = false; + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + hid_t dset = H5Dopen(parameter_file, "/parameters/niter_out", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->niter_out); + H5Dclose(dset); + if (H5Lexists(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT)) + { + dset = H5Dopen(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT); + H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->checkpoints_per_file); + H5Dclose(dset); + } + else + this->checkpoints_per_file = 1; + H5Fclose(parameter_file); + parameter_file = H5Fopen( + (this->simname + std::string("_post.h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + DEBUG_MSG("before read_vector\n"); + this->iteration_list = hdf5_tools::read_vector<int>( + parameter_file, + "/field_single_to_double/parameters/iteration_list"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_single_to_double<rnumber>::work_on_current_iteration(void) +{ + TIMEZONE("field_single_to_double::work_on_current_iteration"); + this->read_current_cvorticity(); + + // using CLOOP as opposed to a global std::copy because CLOOP + // is openmp parallelized. + this->kk->CLOOP( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + { + std::copy( + (rnumber*)(this->vorticity->get_cdata() + cindex*3), + (rnumber*)(this->vorticity->get_cdata() + cindex*3) + 6, + (double*)(this->vec_field_double->get_cdata() + cindex*3)); + } + } + ); + + std::string fname = ( + this->simname + + std::string("_checkpoint_double_") + + std::to_string(this->iteration / (this->niter_out*this->checkpoints_per_file)) + + std::string(".h5")); + this->vec_field_double->io( + fname, + "vorticity", + this->iteration, + false); + + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_single_to_double<rnumber>::finalize(void) +{ + TIMEZONE("field_single_to_double::finalize"); + delete this->vec_field_double; + delete this->kk; + return EXIT_SUCCESS; +} + +template class field_single_to_double<float>; + diff --git a/cpp/full_code/field_single_to_double.hpp b/cpp/full_code/field_single_to_double.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0a7550e40ccfbf40958b74b7a2cd63f54b2bd84a --- /dev/null +++ b/cpp/full_code/field_single_to_double.hpp @@ -0,0 +1,63 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef FIELD_SINGLE_TO_DOUBLE_HPP +#define FIELD_SINGLE_TO_DOUBLE_HPP + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include <vector> +#include "base.hpp" +#include "field.hpp" +#include "field_binary_IO.hpp" +#include "full_code/NSVE_field_stats.hpp" + +template <typename rnumber> +class field_single_to_double: public NSVE_field_stats<rnumber> +{ + public: + int checkpoints_per_file; + int niter_out; + kspace<FFTW, SMOOTH> *kk; + + field<double, FFTW, THREE> *vec_field_double; + + field_single_to_double( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVE_field_stats<rnumber>( + COMMUNICATOR, + simulation_name){} + virtual ~field_single_to_double(){} + + int initialize(void); + int work_on_current_iteration(void); + int finalize(void); +}; + +#endif//FIELD_SINGLE_TO_DOUBLE_HPP + diff --git a/cpp/full_code/field_test.cpp b/cpp/full_code/field_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aa055a6e162ae15fc3652d9d0533e1eb07d5f528 --- /dev/null +++ b/cpp/full_code/field_test.cpp @@ -0,0 +1,145 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <string> +#include <cmath> +#include <random> +#include "field_test.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int field_test<rnumber>::initialize(void) +{ + TIMEZONE("field_test::initialize"); + this->read_parameters(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_test<rnumber>::finalize(void) +{ + TIMEZONE("field_test::finalize"); + this->read_parameters(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_test<rnumber>::read_parameters() +{ + TIMEZONE("field_test::read_parameters"); + this->test::read_parameters(); + // in case any parameters are needed, this is where they should be read + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + this->filter_length = hdf5_tools::read_value<double>(parameter_file, "/parameters/filter_length"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int field_test<rnumber>::do_work(void) +{ + TIMEZONE("field_test::do_work"); + // allocate + field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>( + this->nx, this->ny, this->nz, + this->comm, + FFTW_ESTIMATE); + field<rnumber, FFTW, ONE> *scal_field_alt = new field<rnumber, FFTW, ONE>( + this->nx, this->ny, this->nz, + this->comm, + FFTW_ESTIMATE); + std::default_random_engine rgen; + std::normal_distribution<rnumber> rdist; + rgen.seed(2); + //auto gaussian = std::bind(rgen, rdist); + kspace<FFTW,SMOOTH> *kk = new kspace<FFTW, SMOOTH>( + scal_field->clayout, this->dkx, this->dky, this->dkz); + + if (this->myrank == 0) + { + hid_t stat_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDWR, + H5P_DEFAULT); + kk->store(stat_file); + H5Fclose(stat_file); + } + + // fill up scal_field + scal_field->real_space_representation = true; + scal_field->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + scal_field->rval(rindex) = rdist(rgen); + }); + + *scal_field_alt = scal_field->get_rdata(); + double L2r = scal_field->L2norm(kk); + scal_field->dft(); + double L2c = scal_field->L2norm(kk); + scal_field->ift(); + scal_field->normalize(); + DEBUG_MSG("L2r = %g, L2c = %g\n", + L2r, L2c / scal_field->npoints); + + double max_error = 0; + scal_field->RLOOP( + [&](ptrdiff_t rindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + double tval = fabs(scal_field->rval(rindex) - scal_field_alt->rval(rindex)); + if (max_error < tval) + max_error = tval; + }); + + DEBUG_MSG("maximum error is %g\n", max_error); + + scal_field->dft(); + kk->template dealias<rnumber, ONE>(scal_field->get_cdata()); + scal_field->symmetrize(); + scal_field->normalize(); + L2c = scal_field->L2norm(kk); + scal_field->ift(); + L2r = scal_field->L2norm(kk); + DEBUG_MSG("L2r = %g, L2c = %g\n", + L2r, L2c); + + // deallocate + delete kk; + delete scal_field; + delete scal_field_alt; + return EXIT_SUCCESS; +} + +template class field_test<float>; +template class field_test<double>; + diff --git a/bfps/cpp/interpolator.hpp b/cpp/full_code/field_test.hpp similarity index 51% rename from bfps/cpp/interpolator.hpp rename to cpp/full_code/field_test.hpp index 7e56ebe159fd24ed7cf623f0a869e1d262d4aadb..5339feb80ae690170f52935e97cd700e958a48a4 100644 --- a/bfps/cpp/interpolator.hpp +++ b/cpp/full_code/field_test.hpp @@ -1,6 +1,6 @@ /********************************************************************** * * -* Copyright 2015 Max Planck Institute * +* Copyright 2017 Max Planck Institute * * for Dynamics and Self-Organization * * * * This file is part of bfps. * @@ -24,56 +24,40 @@ -#include <cmath> -#include "field_descriptor.hpp" -#include "fftw_tools.hpp" -#include "fluid_solver_base.hpp" -#include "interpolator_base.hpp" +#ifndef FILTER_TEST_HPP +#define FILTER_TEST_HPP -#ifndef INTERPOLATOR -#define INTERPOLATOR -template <class rnumber, int interp_neighbours> -class interpolator:public interpolator_base<rnumber, interp_neighbours> -{ - private: - /* pointer to buffered field */ - rnumber *field; - - public: - using interpolator_base<rnumber, interp_neighbours>::operator(); - ptrdiff_t buffer_size; - - /* descriptor for buffered field */ - field_descriptor<rnumber> *buffered_descriptor; +#include <cstdlib> +#include "base.hpp" +#include "kspace.hpp" +#include "field.hpp" +#include "full_code/test.hpp" - interpolator( - fluid_solver_base<rnumber> *FSOLVER, - base_polynomial_values BETA_POLYS, - ...); - ~interpolator(); +/** \brief A class for testing basic field class functionality. + */ - int read_rFFTW(const void *src); - - inline int get_rank(double z) - { - return this->descriptor->rank[MOD(int(floor(z/this->dz)), this->descriptor->sizes[0])]; - } - - /* interpolate field at an array of locations */ - void sample( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - double *__restrict__ y, - const int *deriv = NULL); - void operator()( - const int *__restrict__ xg, - const double *__restrict__ xx, - double *__restrict__ dest, - const int *deriv = NULL); +template <typename rnumber> +class field_test: public test +{ + public: + double filter_length; + // kspace, in case we want to compute spectra or smth + + field_test( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + test( + COMMUNICATOR, + simulation_name){} + ~field_test(){} + + int initialize(void); + int do_work(void); + int finalize(void); + int read_parameters(void); }; -#endif//INTERPOLATOR +#endif//FILTER_TEST_HPP diff --git a/bfps/cpp/full_code/filter_test.cpp b/cpp/full_code/filter_test.cpp similarity index 66% rename from bfps/cpp/full_code/filter_test.cpp rename to cpp/full_code/filter_test.cpp index aeedfbe74806adcff53a97d6c227b8fdcd30195f..5df45a7941d5ce4989cee72cbb84731a7cec487f 100644 --- a/bfps/cpp/full_code/filter_test.cpp +++ b/cpp/full_code/filter_test.cpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #include <string> #include <cmath> #include "filter_test.hpp" @@ -7,11 +32,12 @@ template <typename rnumber> int filter_test<rnumber>::initialize(void) { + TIMEZONE("filter_test::initialize"); this->read_parameters(); this->scal_field = new field<rnumber, FFTW, ONE>( nx, ny, nz, this->comm, - DEFAULT_FFTW_FLAG); + FFTW_ESTIMATE); this->kk = new kspace<FFTW, SMOOTH>( this->scal_field->clayout, this->dkx, this->dky, this->dkz); @@ -30,6 +56,7 @@ int filter_test<rnumber>::initialize(void) template <typename rnumber> int filter_test<rnumber>::finalize(void) { + TIMEZONE("filter_test::finalize"); delete this->scal_field; delete this->kk; return EXIT_SUCCESS; @@ -38,16 +65,13 @@ int filter_test<rnumber>::finalize(void) template <typename rnumber> int filter_test<rnumber>::read_parameters() { + TIMEZONE("filter_test::read_parameters"); this->test::read_parameters(); - hid_t parameter_file; - hid_t dset, memtype, space; - parameter_file = H5Fopen( + hid_t parameter_file = H5Fopen( (this->simname + std::string(".h5")).c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); - dset = H5Dopen(parameter_file, "/parameters/filter_length", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->filter_length); - H5Dclose(dset); + this->filter_length = hdf5_tools::read_value<double>(parameter_file, "/parameters/filter_length"); H5Fclose(parameter_file); return EXIT_SUCCESS; } @@ -56,6 +80,7 @@ template <typename rnumber> int filter_test<rnumber>::reset_field( int dimension) { + TIMEZONE("filter_test::reset_field"); this->scal_field->real_space_representation = true; *this->scal_field = 0.0; if (this->scal_field->rlayout->starts[0] == 0) @@ -95,6 +120,7 @@ int filter_test<rnumber>::reset_field( template <typename rnumber> int filter_test<rnumber>::do_work(void) { + TIMEZONE("filter_test::do_work"); std::string filename = this->simname + std::string("_fields.h5"); for (int dimension = 0; dimension < 3; dimension++) { diff --git a/bfps/cpp/full_code/filter_test.hpp b/cpp/full_code/filter_test.hpp similarity index 100% rename from bfps/cpp/full_code/filter_test.hpp rename to cpp/full_code/filter_test.hpp diff --git a/bfps/cpp/full_code/get_rfields.cpp b/cpp/full_code/get_rfields.cpp similarity index 53% rename from bfps/cpp/full_code/get_rfields.cpp rename to cpp/full_code/get_rfields.cpp index 0df8b564a61fba11118ef3f551b0a2db6cbfec1d..45f6b5dce95b5d4fbb9edc2ce353fdde51f0fba8 100644 --- a/bfps/cpp/full_code/get_rfields.cpp +++ b/cpp/full_code/get_rfields.cpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #include <string> #include <cmath> #include "get_rfields.hpp" @@ -7,27 +32,27 @@ template <typename rnumber> int get_rfields<rnumber>::initialize(void) { + TIMEZONE("get_rfields::initialize"); this->NSVE_field_stats<rnumber>::initialize(); + DEBUG_MSG("after NSVE_field_stats::initialize\n"); this->kk = new kspace<FFTW, SMOOTH>( this->vorticity->clayout, this->dkx, this->dky, this->dkz); hid_t parameter_file = H5Fopen( (this->simname + std::string(".h5")).c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); - hid_t dset = H5Dopen(parameter_file, "/parameters/niter_out", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->niter_out); - H5Dclose(dset); - if (H5Lexists(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT)) - { - dset = H5Dopen(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT); - H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->checkpoints_per_file); - H5Dclose(dset); - } - else + this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out"); + this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file"); + if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist this->checkpoints_per_file = 1; + H5Fclose(parameter_file); + parameter_file = H5Fopen( + (this->simname + std::string("_post.h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); this->iteration_list = hdf5_tools::read_vector<int>( parameter_file, - "/get_rfields/iteration_list"); + "/get_rfields/parameters/iteration_list"); H5Fclose(parameter_file); return EXIT_SUCCESS; } @@ -35,7 +60,7 @@ int get_rfields<rnumber>::initialize(void) template <typename rnumber> int get_rfields<rnumber>::work_on_current_iteration(void) { - DEBUG_MSG("entered get_rfields::work_on_current_iteration\n"); + TIMEZONE("get_rfields::work_on_current_iteration"); this->read_current_cvorticity(); field<rnumber, FFTW, THREE> *vel = new field<rnumber, FFTW, THREE>( this->nx, this->ny, this->nz, @@ -77,12 +102,20 @@ int get_rfields<rnumber>::work_on_current_iteration(void) false); delete vel; + + this->vorticity->ift(); + this->vorticity->io( + fname, + "vorticity", + this->iteration, + false); return EXIT_SUCCESS; } template <typename rnumber> int get_rfields<rnumber>::finalize(void) { + TIMEZONE("get_rfields::finalize"); delete this->kk; this->NSVE_field_stats<rnumber>::finalize(); return EXIT_SUCCESS; diff --git a/bfps/cpp/full_code/get_rfields.hpp b/cpp/full_code/get_rfields.hpp similarity index 100% rename from bfps/cpp/full_code/get_rfields.hpp rename to cpp/full_code/get_rfields.hpp diff --git a/bfps/cpp/full_code/joint_acc_vel_stats.cpp b/cpp/full_code/joint_acc_vel_stats.cpp similarity index 73% rename from bfps/cpp/full_code/joint_acc_vel_stats.cpp rename to cpp/full_code/joint_acc_vel_stats.cpp index e4f4d5d40772292f44c7e776dcd4d1b82c4ce222..be2cd9fe5a38dfb28df12d2c221b37c4d152212e 100644 --- a/bfps/cpp/full_code/joint_acc_vel_stats.cpp +++ b/cpp/full_code/joint_acc_vel_stats.cpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #include <string> #include <cmath> #include "joint_acc_vel_stats.hpp" @@ -7,6 +32,7 @@ template <typename rnumber> int joint_acc_vel_stats<rnumber>::initialize(void) { + TIMEZONE("joint_acc_vel_stats::initialize"); this->NSVE_field_stats<rnumber>::initialize(); this->kk = new kspace<FFTW, SMOOTH>( this->vorticity->clayout, this->dkx, this->dky, this->dkz); @@ -85,7 +111,7 @@ int joint_acc_vel_stats<rnumber>::initialize(void) template <typename rnumber> int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void) { - DEBUG_MSG("entered joint_acc_vel_stats::work_on_current_iteration\n"); + TIMEZONE("joint_acc_vel_stats::work_on_current_iteration"); /// read current vorticity, place it in this->ve->cvorticity this->read_current_cvorticity(); *this->ve->cvorticity = this->vorticity->get_cdata(); @@ -109,7 +135,7 @@ int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void) vel = new field<rnumber, FFTW, THREE>( this->nx, this->ny, this->nz, this->comm, - DEFAULT_FFTW_FLAG); + this->vorticity->fftw_plan_rigor); invert_curl(kk, this->ve->cvorticity, vel); vel->ift(); @@ -156,6 +182,7 @@ int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void) template <typename rnumber> int joint_acc_vel_stats<rnumber>::finalize(void) { + DEBUG_MSG("entered joint_acc_vel_stats::finalize\n"); delete this->ve; delete this->kk; if (this->myrank == 0) diff --git a/bfps/cpp/full_code/joint_acc_vel_stats.hpp b/cpp/full_code/joint_acc_vel_stats.hpp similarity index 100% rename from bfps/cpp/full_code/joint_acc_vel_stats.hpp rename to cpp/full_code/joint_acc_vel_stats.hpp diff --git a/bfps/cpp/full_code/main_code.hpp b/cpp/full_code/main_code.hpp similarity index 100% rename from bfps/cpp/full_code/main_code.hpp rename to cpp/full_code/main_code.hpp diff --git a/cpp/full_code/native_binary_to_hdf5.cpp b/cpp/full_code/native_binary_to_hdf5.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0c2d738493aea060838c4acfed05251066b99bae --- /dev/null +++ b/cpp/full_code/native_binary_to_hdf5.cpp @@ -0,0 +1,99 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <string> +#include <cmath> +#include "native_binary_to_hdf5.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::initialize(void) +{ + TIMEZONE("native_binary_to_hdf5::initialize"); + this->read_parameters(); + this->vec_field = new field<rnumber, FFTW, THREE>( + nx, ny, nz, + this->comm, + FFTW_ESTIMATE); + this->vec_field->real_space_representation = false; + this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>( + this->vec_field->clayout->sizes, + this->vec_field->clayout->subsizes, + this->vec_field->clayout->starts, + this->vec_field->clayout->comm); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::work_on_current_iteration(void) +{ + TIMEZONE("native_binary_to_hdf5::work_on_current_iteration"); + char itername[16]; + sprintf(itername, "i%.5x", this->iteration); + std::string native_binary_fname = ( + this->simname + + std::string("_cvorticity_") + + std::string(itername)); + this->bin_IO->read( + native_binary_fname, + this->vec_field->get_cdata()); + this->vec_field->io( + (native_binary_fname + + std::string(".h5")), + "vorticity", + this->iteration, + false); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::finalize(void) +{ + TIMEZONE("native_binary_to_hdf5::finalize"); + delete this->bin_IO; + delete this->vec_field; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int native_binary_to_hdf5<rnumber>::read_parameters(void) +{ + TIMEZONE("native_binary_to_hdf5::read_parameters"); + this->postprocess::read_parameters(); + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + this->iteration_list = hdf5_tools::read_vector<int>( + parameter_file, + "/native_binary_to_hdf5/iteration_list"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template class native_binary_to_hdf5<float>; +template class native_binary_to_hdf5<double>; + diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.hpp b/cpp/full_code/native_binary_to_hdf5.hpp similarity index 100% rename from bfps/cpp/full_code/native_binary_to_hdf5.hpp rename to cpp/full_code/native_binary_to_hdf5.hpp diff --git a/cpp/full_code/postprocess.cpp b/cpp/full_code/postprocess.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e8c7fb279821fff0e3fd82c85b1c490a0b6a68e7 --- /dev/null +++ b/cpp/full_code/postprocess.cpp @@ -0,0 +1,78 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include "scope_timer.hpp" +#include "hdf5_tools.hpp" +#include "full_code/postprocess.hpp" + + +int postprocess::main_loop(void) +{ + TIMEZONE("postprocess::main_loop"); + this->start_simple_timer(); + for (unsigned int iteration_counter = 0; + iteration_counter < iteration_list.size(); + iteration_counter++) + { + this->iteration = iteration_list[iteration_counter]; + #ifdef USE_TIMINGOUTPUT + const std::string loopLabel = ("postprocess::main_loop-" + + std::to_string(this->iteration)); + TIMEZONE(loopLabel.c_str()); + #endif + this->work_on_current_iteration(); + this->print_simple_timer( + "iteration " + std::to_string(this->iteration)); + + this->check_stopping_condition(); + if (this->stop_code_now) + break; + } + return EXIT_SUCCESS; +} + + +int postprocess::read_parameters() +{ + TIMEZONE("postprocess::read_parameters"); + this->code_base::read_parameters(); + hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + this->nu = hdf5_tools::read_value<double>(parameter_file, "parameters/nu"); + this->dt = hdf5_tools::read_value<double>(parameter_file, "parameters/dt"); + this->fmode = hdf5_tools::read_value<int>(parameter_file, "parameters/fmode"); + this->famplitude = hdf5_tools::read_value<double>(parameter_file, "parameters/famplitude"); + this->friction_coefficient = hdf5_tools::read_value<double>(parameter_file, "parameters/friction_coefficient"); + this->fk0 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk0"); + this->fk1 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk1"); + this->energy = hdf5_tools::read_value<double>(parameter_file, "parameters/energy"); + std::string tmp = hdf5_tools::read_string(parameter_file, "parameters/forcing_type"); + snprintf(this->forcing_type, 511, "%s", tmp.c_str()); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + diff --git a/bfps/cpp/full_code/postprocess.hpp b/cpp/full_code/postprocess.hpp similarity index 97% rename from bfps/cpp/full_code/postprocess.hpp rename to cpp/full_code/postprocess.hpp index c80fc3f2dfdc35691d9e69442fa3ad7b6e592891..65e6eadd1fd05615eb69cb7d8ca1754abd1b7e42 100644 --- a/bfps/cpp/full_code/postprocess.hpp +++ b/cpp/full_code/postprocess.hpp @@ -43,8 +43,10 @@ class postprocess: public code_base /* parameters that are read in read_parameters */ double dt; double famplitude; + double friction_coefficient; double fk0; double fk1; + double energy; int fmode; char forcing_type[512]; double nu; diff --git a/cpp/full_code/resize.cpp b/cpp/full_code/resize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d372dc462df8fc45729afe961488979f18ef818 --- /dev/null +++ b/cpp/full_code/resize.cpp @@ -0,0 +1,101 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <string> +#include <cmath> +#include "resize.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int resize<rnumber>::initialize(void) +{ + TIMEZONE("resize::initialize"); + this->NSVE_field_stats<rnumber>::initialize(); + DEBUG_MSG("after NSVE_field_stats::initialize\n"); + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + + this->niter_out = hdf5_tools::read_value<int>( + parameter_file, "/parameters/niter_out"); + H5Fclose(parameter_file); + parameter_file = H5Fopen( + (this->simname + std::string("_post.h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + DEBUG_MSG("before read_vector\n"); + this->iteration_list = hdf5_tools::read_vector<int>( + parameter_file, + "/resize/parameters/iteration_list"); + + this->new_nx = hdf5_tools::read_value<int>( + parameter_file, "/resize/parameters/new_nx"); + this->new_ny = hdf5_tools::read_value<int>( + parameter_file, "/resize/parameters/new_ny"); + this->new_nz = hdf5_tools::read_value<int>( + parameter_file, "/resize/parameters/new_nz"); + this->new_simname = hdf5_tools::read_string( + parameter_file, "/resize/parameters/new_simname"); + H5Fclose(parameter_file); + + this->new_field = new field<rnumber, FFTW, THREE>( + this->new_nx, this->new_ny, this->new_nz, + this->comm, + this->vorticity->fftw_plan_rigor); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int resize<rnumber>::work_on_current_iteration(void) +{ + TIMEZONE("resize::work_on_current_iteration"); + this->read_current_cvorticity(); + + std::string fname = ( + this->new_simname + + std::string("_fields.h5")); + *this->new_field = *this->vorticity; + this->new_field->io( + fname, + "vorticity", + this->iteration, + false); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int resize<rnumber>::finalize(void) +{ + TIMEZONE("resize::finalize"); + delete this->new_field; + this->NSVE_field_stats<rnumber>::finalize(); + return EXIT_SUCCESS; +} + +template class resize<float>; +template class resize<double>; + diff --git a/cpp/full_code/resize.hpp b/cpp/full_code/resize.hpp new file mode 100644 index 0000000000000000000000000000000000000000..de227c886615ad48c8d7872f6533a0ad93b65307 --- /dev/null +++ b/cpp/full_code/resize.hpp @@ -0,0 +1,67 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef RESIZE_HPP +#define RESIZE_HPP + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include <vector> +#include "base.hpp" +#include "field.hpp" +#include "field_binary_IO.hpp" +#include "full_code/NSVE_field_stats.hpp" + +template <typename rnumber> +class resize: public NSVE_field_stats<rnumber> +{ + public: + std::string new_simname; + + int new_nx; + int new_ny; + int new_nz; + + int niter_out; + + field<rnumber, FFTW, THREE> *new_field; + + resize( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + NSVE_field_stats<rnumber>( + COMMUNICATOR, + simulation_name){} + virtual ~resize(){} + + int initialize(void); + int work_on_current_iteration(void); + int finalize(void); +}; + +#endif//RESIZE_HPP + diff --git a/cpp/full_code/symmetrize_test.cpp b/cpp/full_code/symmetrize_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..111d3a833815b3f9abf539db19b81f2d18d33a99 --- /dev/null +++ b/cpp/full_code/symmetrize_test.cpp @@ -0,0 +1,220 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <string> +#include <cmath> +#include <random> +#include "symmetrize_test.hpp" +#include "fftw_tools.hpp" +#include "scope_timer.hpp" + + +template <typename rnumber> +int symmetrize_test<rnumber>::initialize(void) +{ + TIMEZONE("symmetrize_test::initialize"); + this->read_parameters(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int symmetrize_test<rnumber>::finalize(void) +{ + TIMEZONE("symmetrize_test::finalize"); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int symmetrize_test<rnumber>::read_parameters() +{ + TIMEZONE("symmetrize_test::read_parameters"); + this->test::read_parameters(); + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + this->random_seed = hdf5_tools::read_value<int>( + parameter_file, "/parameters/random_seed"); + this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int symmetrize_test<rnumber>::do_work(void) +{ + TIMEZONE("symmetrize_test::do_work"); + // allocate + DEBUG_MSG("about to allocate field0\n"); + field<rnumber, FFTW, THREE> *test_field0 = new field<rnumber, FFTW, THREE>( + this->nx, this->ny, this->nz, + this->comm, + fftw_planner_string_to_flag[this->fftw_plan_rigor]); + DEBUG_MSG("finished allocating field0\n"); + DEBUG_MSG("about to allocate field1\n"); + field<rnumber, FFTW, THREE> *test_field1 = new field<rnumber, FFTW, THREE>( + this->nx, this->ny, this->nz, + this->comm, + fftw_planner_string_to_flag[this->fftw_plan_rigor]); + DEBUG_MSG("finished allocating field1\n"); + std::default_random_engine rgen; + std::normal_distribution<rnumber> rdist; + rgen.seed(1); + kspace<FFTW,SMOOTH> *kk = new kspace<FFTW, SMOOTH>( + test_field0->clayout, this->dkx, this->dky, this->dkz); + + if (this->myrank == 0) + { + hid_t stat_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDWR, + H5P_DEFAULT); + kk->store(stat_file); + H5Fclose(stat_file); + } + + // fill up test_field0 + *test_field0 = 0.0; + test_field0->real_space_representation = false; + kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + test_field0->cval(cindex, 0, 0) = rdist(rgen); + test_field0->cval(cindex, 0, 1) = rdist(rgen); + test_field0->cval(cindex, 1, 0) = rdist(rgen); + test_field0->cval(cindex, 1, 1) = rdist(rgen); + test_field0->cval(cindex, 2, 0) = rdist(rgen); + test_field0->cval(cindex, 2, 1) = rdist(rgen); + if (k2 > 0) + { + test_field0->cval(cindex, 0, 0) /= sqrt(k2); + test_field0->cval(cindex, 0, 1) /= sqrt(k2); + test_field0->cval(cindex, 1, 0) /= sqrt(k2); + test_field0->cval(cindex, 1, 1) /= sqrt(k2); + test_field0->cval(cindex, 2, 0) /= sqrt(k2); + test_field0->cval(cindex, 2, 1) /= sqrt(k2); + } + else + { + test_field0->cval(cindex, 0, 0) = 0; + test_field0->cval(cindex, 0, 1) = 0; + test_field0->cval(cindex, 1, 0) = 0; + test_field0->cval(cindex, 1, 1) = 0; + test_field0->cval(cindex, 2, 0) = 0; + test_field0->cval(cindex, 2, 1) = 0; + } + }); + // dealias (?!) + kk->template dealias<rnumber, THREE>(test_field0->get_cdata()); + // make the field divergence free + kk->template force_divfree<rnumber>(test_field0->get_cdata()); + // apply symmetrize to test_field0 + test_field0->symmetrize(); + + + // make copy in test_field1 + // this MUST be made after symmetrizing test_field0 + // (alternatively, we may symmetrize test_field1 as well before the ift-dft cycle + test_field1->real_space_representation = false; + *test_field1 = test_field0->get_cdata(); + + // go back and forth with test_field1, to enforce symmetry + test_field1->ift(); + test_field1->dft(); + test_field1->normalize(); + + // now compare the two fields + double max_diff = 0; + ptrdiff_t ix, iy, iz; + double k_at_max_diff = 0; + double a0, a1; + + kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + double diff_re0 = test_field0->cval(cindex, 0, 0) - test_field1->cval(cindex, 0, 0); + double diff_re1 = test_field0->cval(cindex, 1, 0) - test_field1->cval(cindex, 1, 0); + double diff_re2 = test_field0->cval(cindex, 2, 0) - test_field1->cval(cindex, 2, 0); + double diff_im0 = test_field0->cval(cindex, 0, 1) - test_field1->cval(cindex, 0, 1); + double diff_im1 = test_field0->cval(cindex, 1, 1) - test_field1->cval(cindex, 1, 1); + double diff_im2 = test_field0->cval(cindex, 2, 1) - test_field1->cval(cindex, 2, 1); + double diff = sqrt(diff_re0*diff_re0 + diff_re1*diff_re1 + diff_re2*diff_re2 + + diff_im0*diff_im0 + diff_im1*diff_im1 + diff_im2*diff_im2); + double amplitude0 = (test_field0->cval(cindex, 0, 0)*test_field0->cval(cindex, 0, 0) + + test_field0->cval(cindex, 1, 0)*test_field0->cval(cindex, 1, 0) + + test_field0->cval(cindex, 2, 0)*test_field0->cval(cindex, 2, 0) + + test_field0->cval(cindex, 0, 1)*test_field0->cval(cindex, 0, 1) + + test_field0->cval(cindex, 1, 1)*test_field0->cval(cindex, 1, 1) + + test_field0->cval(cindex, 2, 1)*test_field0->cval(cindex, 2, 1)); + double amplitude1 = (test_field1->cval(cindex, 0, 0)*test_field1->cval(cindex, 0, 0) + + test_field1->cval(cindex, 1, 0)*test_field1->cval(cindex, 1, 0) + + test_field1->cval(cindex, 2, 0)*test_field1->cval(cindex, 2, 0) + + test_field1->cval(cindex, 0, 1)*test_field1->cval(cindex, 0, 1) + + test_field1->cval(cindex, 1, 1)*test_field1->cval(cindex, 1, 1) + + test_field1->cval(cindex, 2, 1)*test_field1->cval(cindex, 2, 1)); + double amplitude = sqrt((amplitude0 + amplitude1)/2); + if (amplitude > 0) + if (diff/amplitude > max_diff) + { + max_diff = diff / amplitude; + ix = xindex; + iy = yindex + test_field0->clayout->starts[0]; + iz = zindex; + k_at_max_diff = sqrt(k2); + a0 = sqrt(amplitude0); + a1 = sqrt(amplitude1); + } + }); + DEBUG_MSG("found maximum relative difference %g at ix = %ld, iy = %ld, iz = %ld, wavenumber = %g, amplitudes %g %g\n", + max_diff, ix, iy, iz, k_at_max_diff, a0, a1); + + test_field1->io( + this->simname + "_fields.h5", + "field1", + 0, + false); + test_field1->ift(); + test_field1->io( + this->simname + "_fields.h5", + "field1", + 0, + false); + + // deallocate + delete kk; + delete test_field1; + delete test_field0; + return EXIT_SUCCESS; +} + +template class symmetrize_test<float>; +template class symmetrize_test<double>; + diff --git a/cpp/full_code/symmetrize_test.hpp b/cpp/full_code/symmetrize_test.hpp new file mode 100644 index 0000000000000000000000000000000000000000..628aee6f5ba3fac23cfbe551418a6ff1213d7d5c --- /dev/null +++ b/cpp/full_code/symmetrize_test.hpp @@ -0,0 +1,63 @@ +/********************************************************************** +* * +* Copyright 2018 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#ifndef SYMMETRIZE_TEST_HPP +#define SYMMETRIZE_TEST_HPP + + + +#include <cstdlib> +#include "base.hpp" +#include "kspace.hpp" +#include "field.hpp" +#include "full_code/test.hpp" + +/** \brief A class for testing basic field class functionality. + */ + +template <typename rnumber> +class symmetrize_test: public test +{ + public: + std::string fftw_plan_rigor; + int random_seed; + + symmetrize_test( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + test( + COMMUNICATOR, + simulation_name){} + ~symmetrize_test(){} + + int initialize(void); + int do_work(void); + int finalize(void); + int read_parameters(void); +}; + +#endif//SYMMETRIZE_TEST_HPP + diff --git a/cpp/full_code/test.cpp b/cpp/full_code/test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5fd265ec7ba661994f2e0664013770db0834fb7d --- /dev/null +++ b/cpp/full_code/test.cpp @@ -0,0 +1,43 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include <cstdlib> +#include <sys/types.h> +#include <sys/stat.h> +#include "scope_timer.hpp" +#include "hdf5_tools.hpp" +#include "full_code/test.hpp" + + +int test::main_loop(void) +{ + TIMEZONE("test::main_loop"); + this->start_simple_timer(); + this->do_work(); + this->print_simple_timer( + "do_work required "); + return EXIT_SUCCESS; +} + diff --git a/bfps/cpp/full_code/test.hpp b/cpp/full_code/test.hpp similarity index 98% rename from bfps/cpp/full_code/test.hpp rename to cpp/full_code/test.hpp index 134a01512b3fd836a8ac4d40068b3954752c4844..96ddaf8104f1dd3d050b4acf16a68dbcd539b290 100644 --- a/bfps/cpp/full_code/test.hpp +++ b/cpp/full_code/test.hpp @@ -56,7 +56,6 @@ class test: public code_base virtual int finalize(void) = 0; int main_loop(void); - virtual int read_parameters(void); }; #endif//TEST_HPP diff --git a/cpp/full_code/test_interpolation.cpp b/cpp/full_code/test_interpolation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e5722fc0cdb3c851695c0c3eeb34a5da97a405eb --- /dev/null +++ b/cpp/full_code/test_interpolation.cpp @@ -0,0 +1,234 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include "full_code/test_interpolation.hpp" + + +template <typename rnumber> +int test_interpolation<rnumber>::read_parameters(void) +{ + TIMEZONE("test_interpolation::read_parameters"); + this->test::read_parameters(); + hid_t parameter_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + this->nparticles = hdf5_tools::read_value<int>( + parameter_file, "/parameters/nparticles"); + this->tracers0_integration_steps = hdf5_tools::read_value<int>( + parameter_file, "/parameters/tracers0_integration_steps"); + this->tracers0_neighbours = hdf5_tools::read_value<int>( + parameter_file, "/parameters/tracers0_neighbours"); + this->tracers0_smoothness = hdf5_tools::read_value<int>( + parameter_file, "/parameters/tracers0_smoothness"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int test_interpolation<rnumber>::initialize(void) +{ + TIMEZONE("test_interpolation::initialize"); + this->read_parameters(); + this->vorticity = new field<rnumber, FFTW, THREE>( + this->nx, this->ny, this->nz, + this->comm, + FFTW_ESTIMATE); + this->vorticity->real_space_representation = false; + + this->velocity = new field<rnumber, FFTW, THREE>( + this->nx, this->ny, this->nz, + this->comm, + FFTW_ESTIMATE); + + this->nabla_u = new field<rnumber, FFTW, THREExTHREE>( + this->nx, this->ny, this->nz, + this->comm, + FFTW_ESTIMATE); + + this->kk = new kspace<FFTW, SMOOTH>( + this->vorticity->clayout, this->dkx, this->dky, this->dkz); + + if (this->myrank == 0) + { + hid_t stat_file = H5Fopen( + (this->simname + std::string(".h5")).c_str(), + H5F_ACC_RDWR, + H5P_DEFAULT); + this->kk->store(stat_file); + H5Fclose(stat_file); + } + + this->ps = particles_system_builder( + this->velocity, // (field object) + this->kk, // (kspace object, contains dkx, dky, dkz) + this->tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) + (long long int)nparticles, // to check coherency between parameters and hdf input file + this->simname + "_input.h5", // particles input filename + std::string("/tracers0/state/0"), // dataset name for initial input + std::string("/tracers0/rhs/0") , // dataset name for initial input + this->tracers0_neighbours, // parameter (interpolation no neighbours) + this->tracers0_smoothness, // parameter + this->comm, + 1); + this->particles_output_writer_mpi = new particles_output_hdf5< + long long int, double, 3>( + MPI_COMM_WORLD, + "tracers0", + nparticles, + this->tracers0_integration_steps); + this->particles_sample_writer_mpi = new particles_output_sampling_hdf5< + long long int, double, 3>( + MPI_COMM_WORLD, + this->ps->getGlobalNbParticles(), + (this->simname + "_output.h5"), + "tracers0", + "position/0"); + + return EXIT_SUCCESS; +} + +template <typename rnumber> +int test_interpolation<rnumber>::finalize(void) +{ + TIMEZONE("test_interpolation::finalize"); + delete this->nabla_u; + delete this->velocity; + delete this->vorticity; + delete this->ps.release(); + delete this->kk; + delete particles_output_writer_mpi; + delete particles_sample_writer_mpi; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int test_interpolation<rnumber>::do_work() +{ + TIMEZONE("test_interpolation::do_work"); + *this->nabla_u = 0.0; + this->velocity->real_space_representation = false; + this->vorticity->real_space_representation = false; + this->nabla_u->real_space_representation = false; + // read vorticity field + this->vorticity->io( + this->simname + std::string("_input.h5"), + "vorticity", + 0, true); + this->kk->template force_divfree<rnumber>(this->vorticity->get_cdata()); + + // compute velocity + invert_curl(this->kk, this->vorticity, this->velocity); + + // compute velocity gradient + compute_gradient(this->kk, this->velocity, this->nabla_u); + + // go to real space + this->vorticity->ift(); + this->velocity->ift(); + this->nabla_u->ift(); + + DEBUG_MSG("some vorticity values: %g %g %g\n", + this->vorticity->rval(20, 1), + this->vorticity->rval(200, 2), + this->vorticity->rval(741, 0)); + DEBUG_MSG("corresponding velocity gradient to vorticity values: %g %g %g\n", + this->nabla_u->rval( 20, 2, 0) - this->nabla_u->rval( 20, 0, 2), + this->nabla_u->rval(200, 1, 0) - this->nabla_u->rval(200, 0, 1), + this->nabla_u->rval(741, 1, 2) - this->nabla_u->rval(741, 2, 1)); + + // allocate interpolation arrays + std::unique_ptr<double[]> p3data; + std::unique_ptr<double[]> p9data; + if(this->ps->getLocalNbParticles()){ + p3data.reset(new double[3*this->ps->getLocalNbParticles()]); + p9data.reset(new double[9*this->ps->getLocalNbParticles()]); + } + + /// sample position + std::copy(this->ps->getParticlesState(), + this->ps->getParticlesState()+3*this->ps->getLocalNbParticles(), + p3data.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "position", + this->ps->getParticlesState(), + &p3data, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// sample velocity at particles' position + std::fill_n(p3data.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->velocity, p3data.get()); + if(p3data){ + DEBUG_MSG("first vel value is %g\n", p3data.get()[0]); + } + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "velocity", + this->ps->getParticlesState(), + &p3data, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + /// sample vorticity at particles' position + std::fill_n(p3data.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->vorticity, p3data.get()); + if(p3data){ + DEBUG_MSG("first vort value is %g\n", p3data.get()[0]); + } + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "vorticity", + this->ps->getParticlesState(), + &p3data, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + /// sample velocity gradient at particles' position + std::fill_n(p9data.get(), 9*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->nabla_u, p9data.get()); + if(p9data){ + DEBUG_MSG("first vel gradient value is %g\n", p9data.get()[0]); + } + this->particles_sample_writer_mpi->template save_dataset<9>( + "tracers0", + "velocity_gradient", + this->ps->getParticlesState(), + &p9data, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + // deallocate temporary arrays + delete[] p3data.release(); + delete[] p9data.release(); + return EXIT_SUCCESS; +} + +template class test_interpolation<float>; +template class test_interpolation<double>; + diff --git a/cpp/full_code/test_interpolation.hpp b/cpp/full_code/test_interpolation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4d65d4fa8105d8e5f78f3fc9f623929038f8ac0e --- /dev/null +++ b/cpp/full_code/test_interpolation.hpp @@ -0,0 +1,84 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef TEST_INTERPOLATION_HPP +#define TEST_INTERPOLATION_HPP + + + +#include <cstdlib> +#include "base.hpp" +#include "kspace.hpp" +#include "full_code/test.hpp" +#include "particles/particles_system_builder.hpp" +#include "particles/particles_output_hdf5.hpp" +#include "particles/particles_sampling.hpp" + +/** \brief Interpolation tester. + * + */ + +template <typename rnumber> +class test_interpolation: public test +{ + public: + int nparticles; + int tracers0_integration_steps; + int tracers0_neighbours; + int tracers0_smoothness; + + std::unique_ptr<abstract_particles_system<long long int, double>> ps; + + particles_output_hdf5<long long int, double,3> *particles_output_writer_mpi; + particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi; + + field<rnumber, FFTW, THREE> *velocity, *vorticity; + field<rnumber, FFTW, THREExTHREE> *nabla_u; + + kspace<FFTW, SMOOTH> *kk; + + test_interpolation( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + test( + COMMUNICATOR, + simulation_name), + particles_output_writer_mpi(nullptr), + particles_sample_writer_mpi(nullptr), + velocity(nullptr), + vorticity(nullptr), + nabla_u(nullptr), + kk(nullptr) {} + ~test_interpolation(){} + + int initialize(void); + int do_work(void); + int finalize(void); + + int read_parameters(void); +}; + +#endif//TEST_INTERPOLATION_HPP + diff --git a/bfps/cpp/hdf5_tools.cpp b/cpp/hdf5_tools.cpp similarity index 71% rename from bfps/cpp/hdf5_tools.cpp rename to cpp/hdf5_tools.cpp index 4328b28703ac60de7e82e4e3e729134ee3ff1520..5a3aef39caa2824f4d08e579d35734a1438ba5ec 100644 --- a/bfps/cpp/hdf5_tools.cpp +++ b/cpp/hdf5_tools.cpp @@ -1,4 +1,6 @@ #include "hdf5_tools.hpp" +#include <cfloat> +#include <climits> int hdf5_tools::require_size_single_dataset(hid_t dset, int tsize) { @@ -136,6 +138,37 @@ std::vector<number> hdf5_tools::read_vector( return result; } +template <typename number> +number hdf5_tools::read_value( + const hid_t group, + const std::string dset_name) +{ + number result; + hid_t dset; + hid_t mem_dtype; + if (typeid(number) == typeid(int)) + mem_dtype = H5Tcopy(H5T_NATIVE_INT); + else if (typeid(number) == typeid(double)) + mem_dtype = H5Tcopy(H5T_NATIVE_DOUBLE); + if (H5Lexists(group, dset_name.c_str(), H5P_DEFAULT)) + { + dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); + H5Dread(dset, mem_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &result); + H5Dclose(dset); + } + else + { + DEBUG_MSG("attempted to read dataset %s which does not exist.\n", + dset_name.c_str()); + if (typeid(number) == typeid(int)) + result = INT_MAX; + else if (typeid(number) == typeid(double)) + result = number(DBL_MAX); + } + H5Tclose(mem_dtype); + return result; +} + template <typename dtype> std::vector<dtype> hdf5_tools::read_vector_with_single_rank( const int myrank, @@ -175,17 +208,29 @@ std::string hdf5_tools::read_string( const hid_t group, const std::string dset_name) { - hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); - hid_t space = H5Dget_space(dset); - hid_t memtype = H5Dget_type(dset); - char *string_data = (char*)malloc(256); - H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); - std::string std_string_data = std::string(string_data); - free(string_data); - H5Sclose(space); - H5Tclose(memtype); - H5Dclose(dset); - return std_string_data; + if (H5Lexists(group, dset_name.c_str(), H5P_DEFAULT)) + { + hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); + hid_t space = H5Dget_space(dset); + hid_t memtype = H5Dget_type(dset); + // fsanitize complains unless I have a static array here + // but that doesn't actually work (data is read incorrectly). + // this is caught by bfps.test_NSVEparticles + char *string_data = (char*)malloc(256); + H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data); + std::string std_string_data = std::string(string_data); + free(string_data); + H5Sclose(space); + H5Tclose(memtype); + H5Dclose(dset); + return std_string_data; + } + else + { + DEBUG_MSG("attempted to read dataset %s which does not exist.\n", + dset_name.c_str()); + return std::string("parameter does not exist"); + } } template @@ -214,3 +259,13 @@ std::vector<double> hdf5_tools::read_vector_with_single_rank<double>( const hid_t file_id, const std::string dset_name); +template +int hdf5_tools::read_value<int>( + const hid_t, + const std::string); + +template +double hdf5_tools::read_value<double>( + const hid_t, + const std::string); + diff --git a/bfps/cpp/hdf5_tools.hpp b/cpp/hdf5_tools.hpp similarity index 95% rename from bfps/cpp/hdf5_tools.hpp rename to cpp/hdf5_tools.hpp index 456beefe362c5d0871f8014c7a1cc468614e6374..99ba45a1c25593e063e33c54521ed492822aca45 100644 --- a/bfps/cpp/hdf5_tools.hpp +++ b/cpp/hdf5_tools.hpp @@ -79,6 +79,11 @@ namespace hdf5_tools std::string read_string( const hid_t group, const std::string dset_name); + + template <typename number> + number read_value( + const hid_t group, + const std::string dset_name); } #endif//HDF5_TOOLS_HPP diff --git a/bfps/cpp/kspace.cpp b/cpp/kspace.cpp similarity index 73% rename from bfps/cpp/kspace.cpp rename to cpp/kspace.cpp index 01accf4dc1f24d7fe92ee622dd4faf9eaf12a485..5accb969cd93735b0cfb4d5deecfbb8ccd914910 100644 --- a/bfps/cpp/kspace.cpp +++ b/cpp/kspace.cpp @@ -23,6 +23,9 @@ **********************************************************************/ + +#define NDEBUG + #include <cmath> #include <cstdlib> #include <algorithm> @@ -31,6 +34,8 @@ #include "scope_timer.hpp" #include "shared_array.hpp" + + template <field_backend be, kspace_dealias_type dt> template <field_components fc> @@ -66,17 +71,17 @@ kspace<be, dt>::kspace( for (i = 0; i<int(this->layout->subsizes[0]); i++) { ii = i + this->layout->starts[0]; - if (ii <= int(this->layout->sizes[1]/2)) + if (ii <= int(this->layout->sizes[0]/2)) this->ky[i] = this->dky*ii; else - this->ky[i] = this->dky*(ii - int(this->layout->sizes[1])); + this->ky[i] = this->dky*(ii - int(this->layout->sizes[0])); } for (i = 0; i<int(this->layout->sizes[1]); i++) { - if (i <= int(this->layout->sizes[0]/2)) + if (i <= int(this->layout->sizes[1]/2)) this->kz[i] = this->dkz*i; else - this->kz[i] = this->dkz*(i - int(this->layout->sizes[0])); + this->kz[i] = this->dkz*(i - int(this->layout->sizes[1])); } switch(dt) { @@ -116,8 +121,6 @@ kspace<be, dt>::kspace( std::fill_n(nshell_local, this->nshells, 0); }); - std::vector<std::unordered_map<int, double>> dealias_filter_threaded(omp_get_max_threads()); - this->CLOOP_K2_NXMODES( [&](ptrdiff_t cindex, ptrdiff_t xindex, @@ -131,9 +134,6 @@ kspace<be, dt>::kspace( kshell_local_thread.getMine()[int(knorm/this->dk)] += nxmodes*knorm; nshell_local_thread.getMine()[int(knorm/this->dk)] += nxmodes; } - if (dt == SMOOTH){ - dealias_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); - } }); // Merge results @@ -141,14 +141,6 @@ kspace<be, dt>::kspace( kshell_local_thread.mergeParallel(); nshell_local_thread.mergeParallel(); - if (dt == SMOOTH){ - for(int idxMerge = 0 ; idxMerge < int(dealias_filter_threaded.size()) ; ++idxMerge){ - for(const auto kv : dealias_filter_threaded[idxMerge]){ - this->dealias_filter[kv.first] = kv.second; - } - } - } - MPI_Allreduce( nshell_local_thread.getMasterData(), &this->nshell.front(), @@ -293,6 +285,42 @@ void kspace<be, dt>::ball_filter( }); } +/** \brief Filter a field using a M-filter to reproduce dissipation range. + * + * Filter's Fourier space expression: + * \f[ + * \hat{\phi^M_\ell}(k) = + * \exp(-\frac{(3.54 k \ell)^(122*(\ell)^(0.0836))}{2}) + * \left( 1 + \frac{(k \eta/0.0636)^3.44}{1 + (k \eta/ 0.0621)^3.44} \right)^{1/2} + * \f] + */ +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::general_M_filter( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double ell) +{ + const double prefactor0 = 1.0; + this->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + if (k2 > 0) + { + double argument = sqrt(k2)*ell; + double prefactor = prefactor0; + for (unsigned int tcounter=0; tcounter<2*ncomp(fc); tcounter++) + ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= ( + prefactor * (exp(-0.5*pow((2.9*argument),(68.0*(pow(ell,0.74))))) * sqrt(1.0 + (pow((argument/0.06),3.8))/(1.0 + (pow((argument/0.057),3.8)))))); + } + }); +} + + /** \brief Filter a field using a Gaussian kernel. * * Filter's mathematical expression in Fourier space is as follows: @@ -386,6 +414,12 @@ int kspace<be, dt>::filter( a, 2*acos(0.)/wavenumber); } + else if (filter_type == std::string("general_M")) + { + this->template general_M_filter<rnumber, fc>( + a, + 2*acos(0.)/wavenumber); + } return EXIT_SUCCESS; } @@ -437,6 +471,7 @@ int kspace<be, dt>::filter_calibrated_ell( const double ell, std::string filter_type) { + TIMEZONE("kspace::filter_calibrated_ell"); if (filter_type == std::string("sharp_Fourier_sphere")) { this->template low_pass<rnumber, fc>( @@ -455,6 +490,12 @@ int kspace<be, dt>::filter_calibrated_ell( a, ell); } + else if (filter_type == std::string("general_M")) + { + this->template general_M_filter<rnumber, fc>( + a, + ell); + } return EXIT_SUCCESS; } @@ -464,19 +505,22 @@ template <typename rnumber, field_components fc> void kspace<be, dt>::dealias(typename fftw_interface<rnumber>::complex *__restrict__ a) { + TIMEZONE("kspace::dealias"); switch(dt) { case TWO_THIRDS: this->low_pass<rnumber, fc>(a, this->kM); break; case SMOOTH: - this->CLOOP_K2( + this->CLOOP( [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex, - double k2){ - double tval = this->dealias_filter[int(round(k2 / this->dk2))]; + ptrdiff_t zindex){ + double kk2 = (pow(this->kx[xindex]/this->kMx, 2) + + pow(this->ky[yindex]/this->kMy, 2) + + pow(this->kz[zindex]/this->kMz, 2)); + double tval = exp(-36.0 * (pow(kk2, 18))); for (unsigned int tcounter=0; tcounter<2*ncomp(fc); tcounter++) ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= tval; }); @@ -529,7 +573,7 @@ void kspace<be, dt>::cospectrum( const std::string dset_name, const hsize_t toffset) { - TIMEZONE("field::cospectrum"); + TIMEZONE("field::cospectrum2"); shared_array<double> spec_local_thread(this->nshells*ncomp(fc)*ncomp(fc),[&](double* spec_local){ std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0); }); @@ -575,13 +619,13 @@ void kspace<be, dt>::cospectrum( case THREExTHREE: offset[4] = 0; offset[5] = 0; - count[4] = ncomp(fc); - count[5] = ncomp(fc); + count[4] = 3; + count[5] = 3; case THREE: offset[2] = 0; offset[3] = 0; - count[2] = ncomp(fc); - count[3] = ncomp(fc); + count[2] = 3; + count[3] = 3; default: offset[0] = toffset; offset[1] = 0; @@ -597,6 +641,124 @@ void kspace<be, dt>::cospectrum( } } +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::cospectrum( + const rnumber(* __restrict a)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset) +{ + TIMEZONE("field::cospectrum1"); + shared_array<double> spec_local_thread(this->nshells*ncomp(fc)*ncomp(fc),[&](double* spec_local){ + std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0); + }); + + this->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + if (k2 <= this->kM2) + { + double* spec_local = spec_local_thread.getMine(); + int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc); + for (hsize_t i=0; i<ncomp(fc); i++) + for (hsize_t j=0; j<ncomp(fc); j++){ + spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * ( + (a[ncomp(fc)*cindex + i][0] * a[ncomp(fc)*cindex + j][0]) + + (a[ncomp(fc)*cindex + i][1] * a[ncomp(fc)*cindex + j][1])); + } + } + }); + + spec_local_thread.mergeParallel(); + + std::vector<double> spec; + spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); + MPI_Allreduce( + spec_local_thread.getMasterData(), + &spec.front(), + spec.size(), + MPI_DOUBLE, MPI_SUM, this->layout->comm); + if (this->layout->myrank == 0) + { + hid_t dset, wspace, mspace; + hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2]; + dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT); + wspace = H5Dget_space(dset); + H5Sget_simple_extent_dims(wspace, dims, NULL); + switch (fc) + { + case THREExTHREE: + offset[4] = 0; + offset[5] = 0; + count[4] = 3; + count[5] = 3; + case THREE: + offset[2] = 0; + offset[3] = 0; + count[2] = 3; + count[3] = 3; + default: + offset[0] = toffset; + offset[1] = 0; + count[0] = 1; + count[1] = this->nshells; + } + mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL); + H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front()); + H5Sclose(wspace); + H5Sclose(mspace); + H5Dclose(dset); + } +} + +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +double kspace<be, dt>::L2norm( + const rnumber(* __restrict a)[2]) +{ + TIMEZONE("field::L2norm"); + shared_array<double> L2_local_thread(1,[&](double* spec_local){ + std::fill_n(spec_local, 1, 0); + }); + + this->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + { + double* L2_local = L2_local_thread.getMine(); + for (hsize_t i=0; i<ncomp(fc); i++){ + L2_local[0] += nxmodes * ( + (a[ncomp(fc)*cindex + i][0] * a[ncomp(fc)*cindex + i][0]) + + (a[ncomp(fc)*cindex + i][1] * a[ncomp(fc)*cindex + i][1])); + } + } + }); + + L2_local_thread.mergeParallel(); + + double L2; + MPI_Allreduce( + L2_local_thread.getMasterData(), + &L2, + 1, + MPI_DOUBLE, MPI_SUM, this->layout->comm); + return sqrt(L2 * this->dkx * this->dky * this->dkz); +} + template class kspace<FFTW, TWO_THIRDS>; template class kspace<FFTW, SMOOTH>; @@ -801,6 +963,94 @@ template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>( const std::string dset_name, const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + +template void kspace<FFTW, SMOOTH>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + +template double kspace<FFTW, TWO_THIRDS>::L2norm<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a); +template double kspace<FFTW, TWO_THIRDS>::L2norm<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a); +template double kspace<FFTW, TWO_THIRDS>::L2norm<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a); +template double kspace<FFTW, TWO_THIRDS>::L2norm<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a); +template double kspace<FFTW, TWO_THIRDS>::L2norm<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a); +template double kspace<FFTW, TWO_THIRDS>::L2norm<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a); + +template double kspace<FFTW, SMOOTH>::L2norm<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a); +template double kspace<FFTW, SMOOTH>::L2norm<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a); +template double kspace<FFTW, SMOOTH>::L2norm<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a); +template double kspace<FFTW, SMOOTH>::L2norm<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a); +template double kspace<FFTW, SMOOTH>::L2norm<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a); +template double kspace<FFTW, SMOOTH>::L2norm<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a); + template void kspace<FFTW, SMOOTH>::force_divfree<float>( typename fftw_interface<float>::complex *__restrict__ a); template void kspace<FFTW, SMOOTH>::force_divfree<double>( diff --git a/bfps/cpp/kspace.hpp b/cpp/kspace.hpp similarity index 91% rename from bfps/cpp/kspace.hpp rename to cpp/kspace.hpp index d8bc008daade0704c5f8c1981c4a4f24400a5868..0d36e9cceece13cd7fec2e741b4a087aa4e26156 100644 --- a/bfps/cpp/kspace.hpp +++ b/cpp/kspace.hpp @@ -54,7 +54,6 @@ class kspace /* mode and dealiasing information */ double kMx, kMy, kMz, kM, kM2; std::vector<double> kx, ky, kz; - std::unordered_map<int, double> dealias_filter; std::vector<double> kshell; std::vector<int64_t> nshell; int nshells; @@ -88,6 +87,12 @@ class kspace typename fftw_interface<rnumber>::complex *__restrict__ a, const double sigma); + template <typename rnumber, + field_components fc> + void general_M_filter( + typename fftw_interface<rnumber>::complex *__restrict__ a, + const double sigma); + template <typename rnumber, field_components fc> int filter( @@ -114,6 +119,20 @@ class kspace const hid_t group, const std::string dset_name, const hsize_t toffset); + + template <typename rnumber, + field_components fc> + void cospectrum( + const rnumber(* __restrict__ a)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + + template <typename rnumber, + field_components fc> + double L2norm( + const rnumber(* __restrict__ a)[2]); + template <class func_type> void CLOOP(func_type expression) { @@ -150,8 +169,8 @@ class kspace for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++) { double k2 = (this->kx[xindex]*this->kx[xindex] + - this->ky[yindex]*this->ky[yindex] + - this->kz[zindex]*this->kz[zindex]); + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); expression(cindex, xindex, yindex, zindex, k2); cindex++; } @@ -173,7 +192,6 @@ class kspace + zindex*this->layout->subsizes[2]; hsize_t xindex = 0; double k2 = ( - this->kx[xindex]*this->kx[xindex] + this->ky[yindex]*this->ky[yindex] + this->kz[zindex]*this->kz[zindex]); expression(cindex, xindex, yindex, zindex, k2, 1); diff --git a/bfps/cpp/omputils.hpp b/cpp/omputils.hpp similarity index 100% rename from bfps/cpp/omputils.hpp rename to cpp/omputils.hpp diff --git a/cpp/particles/.tocompile b/cpp/particles/.tocompile new file mode 100644 index 0000000000000000000000000000000000000000..02874ed792f4eedb859e1b779facd3d2c775ec08 --- /dev/null +++ b/cpp/particles/.tocompile @@ -0,0 +1,2 @@ +mpicxx -g main_tocompile.cpp -o /tmp/main_test_part.exe -I/home/bbramas/Projects/bfps/bfps/cpp/ -I/home/bbramas/Downloads/hdf5install/include -L/home/bbramas/Downloads/hdf5install/lib -lhdf5 -lsz -lz +mpicxx -fPIC -rdynamic -g NSVE-v2.0.1-single.cpp -o /tmp/NSVE-v2.0.1-single.exe -I/home/bbramas/Projects/bfps/bfps/cpp/ -I/home/bbramas/Downloads/hdf5install/include -I/home/bbramas/Downloads/fftw-3.3.4/install/include/ -L/home/bbramas/Downloads/hdf5install/lib -lhdf5 -lsz -lz -L/home/bbramas/.local/lib/python2.7/site-packages/bfps-2.0.1.post31+g12693ea-py2.7.egg/bfps/ -lbfps -fopenmp -lgomp -L/home/bbramas/Downloads/fftw-3.3.4/install/lib/ -lfftw3_mpi -lfftw3f_mpi -lfftw3_omp -lfftw3f_omp -lfftw3 -lfftw3f diff --git a/cpp/particles/abstract_particles_input.hpp b/cpp/particles/abstract_particles_input.hpp new file mode 100644 index 0000000000000000000000000000000000000000..48c38bc592ddc442489d437327b421312bfd3f55 --- /dev/null +++ b/cpp/particles/abstract_particles_input.hpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef ABSTRACT_PARTICLES_INPUT_HPP +#define ABSTRACT_PARTICLES_INPUT_HPP + +#include <tuple> + +template <class partsize_t, class real_number> +class abstract_particles_input { +public: + virtual ~abstract_particles_input(){} + + virtual partsize_t getTotalNbParticles() = 0; + virtual partsize_t getLocalNbParticles() = 0; + virtual int getNbRhs() = 0; + + virtual std::unique_ptr<real_number[]> getMyParticles() = 0; + virtual std::unique_ptr<partsize_t[]> getMyParticlesIndexes() = 0; + virtual std::vector<std::unique_ptr<real_number[]>> getMyRhs() = 0; +}; + + +#endif diff --git a/bfps/cpp/particles/abstract_particles_output.hpp b/cpp/particles/abstract_particles_output.hpp similarity index 71% rename from bfps/cpp/particles/abstract_particles_output.hpp rename to cpp/particles/abstract_particles_output.hpp index a6eccaea003618b8acbf1a9252c1e6c5bedb3378..6dc85cebba83e8650329700f15284081301ba3c5 100644 --- a/bfps/cpp/particles/abstract_particles_output.hpp +++ b/cpp/particles/abstract_particles_output.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef ABSTRACT_PARTICLES_OUTPUT #define ABSTRACT_PARTICLES_OUTPUT @@ -13,7 +38,7 @@ #include "scope_timer.hpp" #include "env_utils.hpp" -template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> +template <class partsize_t, class real_number, int size_particle_positions> class abstract_particles_output { MPI_Comm mpi_com; MPI_Comm mpi_com_writer; @@ -28,11 +53,13 @@ class abstract_particles_output { std::unique_ptr<real_number[]> buffer_particles_positions_send; std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_send; partsize_t size_buffers_send; + int buffers_size_particle_rhs_send; std::unique_ptr<real_number[]> buffer_particles_positions_recv; std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_recv; std::unique_ptr<partsize_t[]> buffer_indexes_recv; - partsize_t size_buffers_recv; + partsize_t size_buffers_recv; + int buffers_size_particle_rhs_recv; int nb_processes_involved; bool current_is_involved; @@ -41,6 +68,10 @@ class abstract_particles_output { partsize_t particles_chunk_current_offset; protected: + MPI_Comm& getCom(){ + return mpi_com; + } + MPI_Comm& getComWriter(){ return mpi_com_writer; } @@ -61,8 +92,10 @@ public: abstract_particles_output(MPI_Comm in_mpi_com, const partsize_t inTotalNbParticles, const int in_nb_rhs) throw() : mpi_com(in_mpi_com), my_rank(-1), nb_processes(-1), total_nb_particles(inTotalNbParticles), nb_rhs(in_nb_rhs), - buffer_particles_rhs_send(in_nb_rhs), size_buffers_send(-1), - buffer_particles_rhs_recv(in_nb_rhs), size_buffers_recv(-1), + buffer_particles_rhs_send(in_nb_rhs), size_buffers_send(0), + buffers_size_particle_rhs_send(0), + buffer_particles_rhs_recv(in_nb_rhs), size_buffers_recv(0), + buffers_size_particle_rhs_recv(0), nb_processes_involved(0), current_is_involved(true), particles_chunk_per_process(0), particles_chunk_current_size(0), particles_chunk_current_offset(0) { @@ -129,18 +162,21 @@ public: } void releaseMemory(){ - buffer_indexes_send.release(); - buffer_particles_positions_send.release(); - size_buffers_send = -1; - buffer_indexes_recv.release(); - buffer_particles_positions_recv.release(); - size_buffers_recv = -1; + delete[] buffer_indexes_send.release(); + delete[] buffer_particles_positions_send.release(); + size_buffers_send = 0; + delete[] buffer_indexes_recv.release(); + delete[] buffer_particles_positions_recv.release(); + size_buffers_recv = 0; for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ - buffer_particles_rhs_send[idx_rhs].release(); - buffer_particles_rhs_recv[idx_rhs].release(); + delete[] buffer_particles_rhs_send[idx_rhs].release(); + delete[] buffer_particles_rhs_recv[idx_rhs].release(); } + buffers_size_particle_rhs_send = 0; + buffers_size_particle_rhs_recv = 0; } + template <int size_particle_rhs> void save( const real_number input_particles_positions[], const std::unique_ptr<real_number[]> input_particles_rhs[], @@ -153,13 +189,25 @@ public: { TIMEZONE("sort-to-distribute"); - if(size_buffers_send < nb_particles && nb_particles){ - buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_particles]); - buffer_particles_positions_send.reset(new real_number[nb_particles*size_particle_positions]); + if(size_buffers_send < nb_particles){ + size_buffers_send = nb_particles; + buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[size_buffers_send]); + buffer_particles_positions_send.reset(new real_number[size_buffers_send*size_particle_positions]); + + if(buffers_size_particle_rhs_send < size_particle_rhs){ + buffers_size_particle_rhs_send = size_particle_rhs; + } for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ - buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_particles*size_particle_rhs]); + buffer_particles_rhs_send[idx_rhs].reset(new real_number[size_buffers_send*buffers_size_particle_rhs_send]); + } + } + else if(buffers_size_particle_rhs_send < size_particle_rhs){ + buffers_size_particle_rhs_send = size_particle_rhs; + if(size_buffers_send > 0){ + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + buffer_particles_rhs_send[idx_rhs].reset(new real_number[size_buffers_send*buffers_size_particle_rhs_send]); + } } - size_buffers_send = nb_particles; } for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ @@ -204,13 +252,23 @@ public: const int nb_to_receive = exchanger.getTotalToRecv(); assert(nb_to_receive == particles_chunk_current_size); - if(size_buffers_recv < nb_to_receive && nb_to_receive){ - buffer_indexes_recv.reset(new partsize_t[nb_to_receive]); - buffer_particles_positions_recv.reset(new real_number[nb_to_receive*size_particle_positions]); + if(size_buffers_recv < nb_to_receive){ + size_buffers_recv = nb_to_receive; + buffer_indexes_recv.reset(new partsize_t[size_buffers_recv]); + buffer_particles_positions_recv.reset(new real_number[size_buffers_recv*size_particle_positions]); + + buffers_size_particle_rhs_recv = size_particle_rhs; for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ - buffer_particles_rhs_recv[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]); + buffer_particles_rhs_recv[idx_rhs].reset(new real_number[size_buffers_recv*buffers_size_particle_rhs_recv]); + } + } + else if(buffers_size_particle_rhs_recv < size_particle_rhs){ + buffers_size_particle_rhs_recv = size_particle_rhs; + if(size_buffers_recv > 0){ + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + buffer_particles_rhs_recv[idx_rhs].reset(new real_number[size_buffers_recv*buffers_size_particle_rhs_recv]); + } } - size_buffers_recv = nb_to_receive; } { @@ -229,13 +287,14 @@ public: return; } - if(size_buffers_send < nb_to_receive && nb_to_receive){ - buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_to_receive]); - buffer_particles_positions_send.reset(new real_number[nb_to_receive*size_particle_positions]); + if(size_buffers_send < nb_to_receive){ + size_buffers_send = nb_to_receive; + buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[size_buffers_send]); + buffer_particles_positions_send.reset(new real_number[size_buffers_send*size_particle_positions]); + buffers_size_particle_rhs_send = size_particle_rhs; for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ - buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]); + buffer_particles_rhs_send[idx_rhs].reset(new real_number[size_buffers_send*buffers_size_particle_rhs_send]); } - size_buffers_send = nb_to_receive; } { @@ -260,11 +319,11 @@ public: } write(idx_time_step, buffer_particles_positions_send.get(), buffer_particles_rhs_send.data(), - nb_to_receive, particles_chunk_current_offset); + nb_to_receive, particles_chunk_current_offset, size_particle_rhs); } virtual void write(const int idx_time_step, const real_number* positions, const std::unique_ptr<real_number[]>* rhs, - const partsize_t nb_particles, const partsize_t particles_idx_offset) = 0; + const partsize_t nb_particles, const partsize_t particles_idx_offset, const int size_particle_rhs) = 0; }; #endif diff --git a/cpp/particles/abstract_particles_system.hpp b/cpp/particles/abstract_particles_system.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2f2f510f4bdad22b26b243607c7bbddcd2536771 --- /dev/null +++ b/cpp/particles/abstract_particles_system.hpp @@ -0,0 +1,129 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef ABSTRACT_PARTICLES_SYSTEM_HPP +#define ABSTRACT_PARTICLES_SYSTEM_HPP + +#include <memory> + +//- Not generic to enable sampling begin +#include "field.hpp" +#include "kspace.hpp" +//- Not generic to enable sampling end + + +template <class partsize_t, class real_number> +class abstract_particles_system { +public: + virtual ~abstract_particles_system(){} + + virtual void compute() = 0; + + virtual void compute_p2p() = 0; + + virtual void compute_particles_inner() = 0; + + virtual void enforce_unit_orientation() = 0; + + virtual void add_Lagrange_multipliers() = 0; + + virtual void compute_sphere_particles_inner(const real_number particle_extra_rhs[]) = 0; + virtual void compute_ellipsoid_particles_inner(const real_number particle_extra_rhs[]) = 0; + + virtual void move(const real_number dt) = 0; + + virtual void redistribute() = 0; + + virtual void inc_step_idx() = 0; + + virtual void shift_rhs_vectors() = 0; + + virtual void completeLoop(const real_number dt) = 0; + + virtual void completeLoopWithVorticity( + const real_number dt, + const real_number sampled_vorticity[]) = 0; + + virtual void completeLoopWithVelocityGradient( + const real_number dt, + const real_number sampled_velocity_gradient[]) = 0; + + virtual const real_number* getParticlesState() const = 0; + + virtual std::unique_ptr<real_number[]> extractParticlesState(const int firstState, const int lastState) const = 0; + + virtual const std::unique_ptr<real_number[]>* getParticlesRhs() const = 0; + + virtual const partsize_t* getParticlesIndexes() const = 0; + + virtual partsize_t getLocalNbParticles() const = 0; + + virtual partsize_t getGlobalNbParticles() const = 0; + + virtual int getNbRhs() const = 0; + + virtual int get_step_idx() const = 0; + + //- Not generic to enable sampling begin + virtual void sample_compute_field(const field<float, FFTW, ONE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<float, FFTW, THREE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<float, FFTW, THREExTHREE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<double, FFTW, ONE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<double, FFTW, THREE>& sample_field, + real_number sample_rhs[]) = 0; + virtual void sample_compute_field(const field<double, FFTW, THREExTHREE>& sample_field, + real_number sample_rhs[]) = 0; + //- Not generic to enable sampling end + + template <typename rnumber, field_backend be, field_components fc> + void completeLoopWithExtraField( + const real_number dt, + const field<rnumber, be, fc>& in_field) { + static_assert((fc == THREE) || (fc == THREExTHREE), "only THREE or THREExTHREE is supported for now"); + if (fc == THREE) + { + std::unique_ptr<real_number[]> extra_rhs(new real_number[getLocalNbParticles()*3]()); + std::fill_n(extra_rhs.get(), 3*getLocalNbParticles(), 0); + sample_compute_field(in_field, extra_rhs.get()); + completeLoopWithVorticity(dt, extra_rhs.get()); + } + else if (fc == THREExTHREE) + { + std::unique_ptr<real_number[]> extra_rhs(new real_number[getLocalNbParticles()*9]()); + std::fill_n(extra_rhs.get(), 9*getLocalNbParticles(), 0); + sample_compute_field(in_field, extra_rhs.get()); + completeLoopWithVelocityGradient(dt, extra_rhs.get()); + } + } + + virtual int setParticleFileLayout(std::vector<hsize_t>) = 0; + virtual std::vector<hsize_t> getParticleFileLayout() = 0; +}; + +#endif diff --git a/bfps/cpp/particles/alltoall_exchanger.hpp b/cpp/particles/alltoall_exchanger.hpp similarity index 79% rename from bfps/cpp/particles/alltoall_exchanger.hpp rename to cpp/particles/alltoall_exchanger.hpp index 2beaf092e8e6c7a801efd492270d29c2d4dba398..d3423523d9b9d02347514972c3bcb3f92129df56 100644 --- a/bfps/cpp/particles/alltoall_exchanger.hpp +++ b/cpp/particles/alltoall_exchanger.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef ALLTOALL_EXCHANGER_HPP #define ALLTOALL_EXCHANGER_HPP diff --git a/bfps/cpp/particles/env_utils.hpp b/cpp/particles/env_utils.hpp similarity index 59% rename from bfps/cpp/particles/env_utils.hpp rename to cpp/particles/env_utils.hpp index cd6fb3026ac19397fb525235f3d4f87e2cc2bb94..829fd5b46f879c4485276d3f3866b8ae3d81e8d5 100644 --- a/bfps/cpp/particles/env_utils.hpp +++ b/cpp/particles/env_utils.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef ENV_UTILS_HPP #define ENV_UTILS_HPP diff --git a/cpp/particles/lock_free_bool_array.hpp b/cpp/particles/lock_free_bool_array.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5e32a7d41bec3ddc7d56962d14d338a78f2b084a --- /dev/null +++ b/cpp/particles/lock_free_bool_array.hpp @@ -0,0 +1,58 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef LOCK_FREE_BOOL_ARRAY_HPP +#define LOCK_FREE_BOOL_ARRAY_HPP + +#include <vector> +#include <memory> + +class lock_free_bool_array{ + std::vector<std::unique_ptr<long int>> keys; + +public: + explicit lock_free_bool_array(const long int inNbKeys = 512){ + keys.resize(inNbKeys); + for(std::unique_ptr<long int>& k : keys){ + k.reset(new long int(0)); + } + } + + void lock(const long int inKey){ + volatile long int* k = keys[inKey%keys.size()].get(); + long int res = 1; + while(res == 1){ + res = __sync_val_compare_and_swap(k, 0, res); + } + } + + void unlock(const long int inKey){ + volatile long int* k = keys[inKey%keys.size()].get(); + assert(k && *k); + (*k) = 0; + } +}; + +#endif diff --git a/cpp/particles/p2p_computer.hpp b/cpp/particles/p2p_computer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..74d9c9ebeff2e61864fe5e827f103d1691709e4d --- /dev/null +++ b/cpp/particles/p2p_computer.hpp @@ -0,0 +1,110 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef P2P_COMPUTER_HPP +#define P2P_COMPUTER_HPP + +#include <cstring> +#include <cassert> + +template <class real_number, class partsize_t> +class p2p_computer{ + + bool isActive; + + /** \brief A simple distance weighting function. + * + * This function returns 1 if a distance is smaller than a cut-off length, + * i.e. particle 1 interacts with particle 2 if particle 2 is inside a + * sphere of radius `cutoff' centered on particle 1. + */ + static double dumb_distance_weight( + const double dist_pow2, + const double cutoff){ + // this function should only be called for interacting particles, + // and particles interact if they are closer than cutoff. + assert(dist_pow2 < cutoff*cutoff); + return 1.0; + } + + +public: + p2p_computer() : isActive(true){} + + template <int size_particle_rhs> + void init_result_array(real_number rhs[], const partsize_t nbParticles) const{ + memset(rhs, 0, sizeof(real_number)*nbParticles*size_particle_rhs); + } + + template <int size_particle_rhs> + void reduce_particles_rhs(real_number rhs_dst[], const real_number rhs_src[], const partsize_t nbParticles) const{ + static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs"); + for(int idx_part = 0 ; idx_part < nbParticles ; ++idx_part){ + // We merge only the values modified by the current kernel (3-5) + for(int idx_rhs = 3 ; idx_rhs < size_particle_rhs ; ++idx_rhs){ + rhs_dst[idx_part*size_particle_rhs+idx_rhs] += rhs_src[idx_part*size_particle_rhs+idx_rhs]; + } + } + } + + template <int size_particle_positions, int size_particle_rhs> + void compute_interaction(const real_number pos_part1[], real_number rhs_part1[], + const real_number pos_part2[], real_number rhs_part2[], + const real_number dist_pow2, const real_number cutoff, + const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{ + static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position+orientation"); + static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs"); + + // TODO: a reasonable way of choosing between different distance_weight functions should be thought of. + // We need to ask Michael about how flexible this distance_weight needs to be. + const double ww = dumb_distance_weight(dist_pow2, cutoff); + /// + /// term in equation is: + /// + /// \f[ + /// (4 / \tau) \sum_j W_\ell ( | x^i - x^j | ) (p^i \cdot p^j)p^j + /// \f] + /// + const double dot_product = (pos_part1[3+IDXC_X]*pos_part2[3+IDXC_X] + + pos_part1[3+IDXC_Y]*pos_part2[3+IDXC_Y] + + pos_part1[3+IDXC_Z]*pos_part2[3+IDXC_Z]); + rhs_part1[3+IDXC_X] += pos_part2[3+IDXC_X] * 4 * ww * dot_product; + rhs_part1[3+IDXC_Y] += pos_part2[3+IDXC_Y] * 4 * ww * dot_product; + rhs_part1[3+IDXC_Z] += pos_part2[3+IDXC_Z] * 4 * ww * dot_product; + rhs_part2[3+IDXC_X] += pos_part1[3+IDXC_X] * 4 * ww * dot_product; + rhs_part2[3+IDXC_Y] += pos_part1[3+IDXC_Y] * 4 * ww * dot_product; + rhs_part2[3+IDXC_Z] += pos_part1[3+IDXC_Z] * 4 * ww * dot_product; + } + + bool isEnable() const { + return isActive; + } + + void setEnable(const bool inIsActive) { + isActive = inIsActive; + } +}; + +#endif diff --git a/cpp/particles/p2p_computer_empty.hpp b/cpp/particles/p2p_computer_empty.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0599dc1abb08207fcb761a534c282f8fceda5ce3 --- /dev/null +++ b/cpp/particles/p2p_computer_empty.hpp @@ -0,0 +1,54 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef P2P_COMPUTER_EMPTY_HPP +#define P2P_COMPUTER_EMPTY_HPP + +#include <cstring> + +template <class real_number, class partsize_t> +class p2p_computer_empty{ +public: + template <int size_particle_rhs> + void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_rhs> + void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_positions, int size_particle_rhs> + void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], + const real_number /*pos_part2*/[], real_number /*rhs_part2*/[], + const real_number /*dist_pow2*/, const real_number /*cutoff*/, + const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{ + } + + constexpr static bool isEnable() { + return false; + } +}; + +#endif diff --git a/cpp/particles/p2p_distr_mpi.hpp b/cpp/particles/p2p_distr_mpi.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7ab3a8b36722b8aa03ec9f4c070a68aa1fbe1776 --- /dev/null +++ b/cpp/particles/p2p_distr_mpi.hpp @@ -0,0 +1,784 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef P2P_DISTR_MPI_HPP +#define P2P_DISTR_MPI_HPP + +#include <mpi.h> + +#include <vector> +#include <memory> +#include <cassert> + +#include <type_traits> +#include <omp.h> +#include <algorithm> + +#include "scope_timer.hpp" +#include "particles_utils.hpp" +#include "p2p_tree.hpp" +#include "lock_free_bool_array.hpp" + +template <class partsize_t, class real_number> +class p2p_distr_mpi { +protected: + static const int MaxNbRhs = 10; + + enum MpiTag{ + TAG_NB_PARTICLES, + TAG_POSITION_PARTICLES, + TAG_RESULT_PARTICLES, + }; + + struct NeighborDescriptor{ + partsize_t nbParticlesToExchange; + int destProc; + int nbLevelsToExchange; + bool isRecv; + + std::unique_ptr<real_number[]> toRecvAndMerge; + std::unique_ptr<real_number[]> toCompute; + std::unique_ptr<real_number[]> results; + }; + + enum Action{ + NOTHING_TODO = 512, + RECV_PARTICLES, + COMPUTE_PARTICLES, + RELEASE_BUFFER_PARTICLES, + MERGE_PARTICLES + }; + + MPI_Comm current_com; + + int my_rank; + int nb_processes; + int nb_processes_involved; + + const std::pair<int,int> current_partition_interval; + const int current_partition_size; + const std::array<size_t,3> field_grid_dim; + + std::unique_ptr<int[]> partition_interval_size_per_proc; + std::unique_ptr<int[]> partition_interval_offset_per_proc; + + std::unique_ptr<partsize_t[]> current_offset_particles_for_partition; + + std::vector<std::pair<Action,int>> whatNext; + std::vector<MPI_Request> mpiRequests; + std::vector<NeighborDescriptor> neigDescriptors; + + std::array<real_number,3> spatial_box_width; + std::array<real_number,3> spatial_box_offset; + + const real_number cutoff_radius_compute; + const int nb_cells_factor; + const real_number cutoff_radius; + std::array<long int,3> nb_cell_levels; + + template <class DataType, int sizeElement> + static void permute_copy(const partsize_t offsetIdx, const partsize_t nbElements, + const std::pair<long int,partsize_t> permutation[], + DataType data[], std::vector<unsigned char>* buffer){ + buffer->resize(nbElements*sizeof(DataType)*sizeElement); + DataType* dataBuffer = reinterpret_cast<DataType*>(buffer->data()); + + // Permute + for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){ + const partsize_t srcData = permutation[idxPart].second; + const partsize_t destData = idxPart; + for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){ + dataBuffer[destData*sizeElement + idxVal] + = data[srcData*sizeElement + idxVal]; + } + } + + // Copy back + for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){ + const partsize_t srcData = idxPart; + const partsize_t destData = idxPart+offsetIdx; + for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){ + data[destData*sizeElement + idxVal] + = dataBuffer[srcData*sizeElement + idxVal]; + } + } + } + + static int foundGridFactor(const real_number in_cutoff_radius, const std::array<real_number,3>& in_spatial_box_width){ + int idx_factor = 1; + while(in_cutoff_radius <= in_spatial_box_width[IDXC_Z]/real_number(idx_factor+1)){ + idx_factor += 1; + } + return idx_factor; + } + +public: + //////////////////////////////////////////////////////////////////////////// + + p2p_distr_mpi(MPI_Comm in_current_com, + const std::pair<int,int>& in_current_partitions, + const std::array<size_t,3>& in_field_grid_dim, + const std::array<real_number,3>& in_spatial_box_width, + const std::array<real_number,3>& in_spatial_box_offset, + const real_number in_cutoff_radius) + : current_com(in_current_com), + my_rank(-1), nb_processes(-1),nb_processes_involved(-1), + current_partition_interval(in_current_partitions), + current_partition_size(current_partition_interval.second-current_partition_interval.first), + field_grid_dim(in_field_grid_dim), + spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset), + cutoff_radius_compute(in_cutoff_radius), + nb_cells_factor(foundGridFactor(in_cutoff_radius, in_spatial_box_width)), + cutoff_radius(in_spatial_box_width[IDXC_Z]/real_number(nb_cells_factor)){ + + AssertMpi(MPI_Comm_rank(current_com, &my_rank)); + AssertMpi(MPI_Comm_size(current_com, &nb_processes)); + + partition_interval_size_per_proc.reset(new int[nb_processes]); + AssertMpi( MPI_Allgather( const_cast<int*>(¤t_partition_size), 1, MPI_INT, + partition_interval_size_per_proc.get(), 1, MPI_INT, + current_com) ); + assert(partition_interval_size_per_proc[my_rank] == current_partition_size); + + partition_interval_offset_per_proc.reset(new int[nb_processes+1]); + partition_interval_offset_per_proc[0] = 0; + for(int idxProc = 0 ; idxProc < nb_processes ; ++idxProc){ + partition_interval_offset_per_proc[idxProc+1] = partition_interval_offset_per_proc[idxProc] + partition_interval_size_per_proc[idxProc]; + } + + current_offset_particles_for_partition.reset(new partsize_t[current_partition_size+1]); + + nb_processes_involved = nb_processes; + while(nb_processes_involved != 0 && partition_interval_size_per_proc[nb_processes_involved-1] == 0){ + nb_processes_involved -= 1; + } + assert(nb_processes_involved != 0); + for(int idx_proc_involved = 0 ; idx_proc_involved < nb_processes_involved ; ++idx_proc_involved){ + assert(partition_interval_size_per_proc[idx_proc_involved] != 0); + } + + assert(int(field_grid_dim[IDXC_Z]) == partition_interval_offset_per_proc[nb_processes_involved]); + + nb_cell_levels[IDXC_X] = nb_cells_factor; + nb_cell_levels[IDXC_Y] = nb_cells_factor; + nb_cell_levels[IDXC_Z] = nb_cells_factor; + } + + virtual ~p2p_distr_mpi(){} + + //////////////////////////////////////////////////////////////////////////// + + int getGridFactor() const{ + return nb_cells_factor; + } + + real_number getGridCutoff() const{ + return cutoff_radius; + } + + long int get_cell_coord_x_from_index(const long int index) const{ + return index % nb_cell_levels[IDXC_X]; + } + + long int get_cell_coord_y_from_index(const long int index) const{ + return (index % (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y])) + / nb_cell_levels[IDXC_X]; + } + + long int get_cell_coord_z_from_index(const long int index) const{ + return index / (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]); + } + + long int first_cell_level_proc(const int dest_proc) const{ + const real_number field_section_width_z = spatial_box_width[IDXC_Z]/real_number(field_grid_dim[IDXC_Z]); + return static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc]))/cutoff_radius); + } + + long int last_cell_level_proc(const int dest_proc) const{ + const real_number field_section_width_z = spatial_box_width[IDXC_Z]/real_number(field_grid_dim[IDXC_Z]); + const long int limite = static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc+1]) + - std::numeric_limits<real_number>::epsilon())/cutoff_radius); + if(static_cast<real_number>(limite)*cutoff_radius + == field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc+1])){ + return limite-1; + } + return limite; + } + + real_number apply_pbc(real_number pos, IDX_COMPONENT_3D dim) const{ + while( pos < spatial_box_offset[dim] ){ + pos += spatial_box_width[dim]; + } + while( spatial_box_width[dim]+spatial_box_offset[dim] <= pos){ + pos -= spatial_box_width[dim]; + } + return pos; + } + + std::array<long int,3> get_cell_coordinate(const real_number pos_x, const real_number pos_y, + const real_number pos_z) const { + const real_number diff_x = apply_pbc(pos_x,IDXC_X) - spatial_box_offset[IDXC_X]; + const real_number diff_y = apply_pbc(pos_y,IDXC_Y) - spatial_box_offset[IDXC_Y]; + const real_number diff_z = apply_pbc(pos_z,IDXC_Z) - spatial_box_offset[IDXC_Z]; + std::array<long int,3> coord; + coord[IDXC_X] = static_cast<long int>(diff_x/cutoff_radius); + coord[IDXC_Y] = static_cast<long int>(diff_y/cutoff_radius); + coord[IDXC_Z] = static_cast<long int>(diff_z/cutoff_radius); + return coord; + } + + long int get_cell_idx(const real_number pos_x, const real_number pos_y, + const real_number pos_z) const { + std::array<long int,3> coord = get_cell_coordinate(pos_x, pos_y, pos_z); + return ((coord[IDXC_Z]*nb_cell_levels[IDXC_Y])+coord[IDXC_Y])*nb_cell_levels[IDXC_X]+coord[IDXC_X]; + } + + real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1, + const real_number x2, const real_number y2, const real_number z2, + const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const { + real_number diff_x = std::abs(apply_pbc(x1,IDXC_X)-apply_pbc(x2,IDXC_X)+xshift_coef*spatial_box_width[IDXC_X]); + assert(diff_x <= 2*cutoff_radius); + + real_number diff_y = std::abs(apply_pbc(y1,IDXC_X)-apply_pbc(y2,IDXC_X)+yshift_coef*spatial_box_width[IDXC_Y]); + assert(diff_y <= 2*cutoff_radius); + + real_number diff_z = std::abs(apply_pbc(z1,IDXC_X)-apply_pbc(z2,IDXC_X)+zshift_coef*spatial_box_width[IDXC_Z]); + assert(diff_z <= 2*cutoff_radius); + + return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z); + } + + template <class computer_class, int size_particle_positions, int size_particle_rhs> + void compute_distr(computer_class& in_computer, + const partsize_t current_my_nb_particles_per_partition[], + real_number particles_positions[], + real_number particles_current_rhs[], + partsize_t inout_index_particles[]){ + TIMEZONE("compute_distr"); + + // Some processes might not be involved + if(nb_processes_involved <= my_rank){ + return; + } + + const long int my_top_z_cell_level = last_cell_level_proc(my_rank); + const long int my_down_z_cell_level = first_cell_level_proc(my_rank); + const long int my_nb_cell_levels = 1+my_top_z_cell_level-my_down_z_cell_level; + + current_offset_particles_for_partition[0] = 0; + partsize_t myTotalNbParticles = 0; + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition]; + current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition]; + } + + // Compute box idx for each particle + std::unique_ptr<long int[]> particles_coord(new long int[current_offset_particles_for_partition[current_partition_size]]); + + { + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + #pragma omp parallel for schedule(static) + for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){ + particles_coord[idxPart] = get_cell_idx(particles_positions[(idxPart)*size_particle_positions + IDXC_X], + particles_positions[(idxPart)*size_particle_positions + IDXC_Y], + particles_positions[(idxPart)*size_particle_positions + IDXC_Z]); + assert(my_down_z_cell_level <= get_cell_coord_z_from_index(particles_coord[idxPart])); + assert(get_cell_coord_z_from_index(particles_coord[idxPart]) <= my_top_z_cell_level); + } + } + + std::vector<std::pair<long int,partsize_t>> part_to_sort; + + // Sort each partition in cells + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + part_to_sort.clear(); + + for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){ + part_to_sort.emplace_back(); + part_to_sort.back().first = particles_coord[idxPart]; + part_to_sort.back().second = idxPart; + } + + assert(partsize_t(part_to_sort.size()) == (current_my_nb_particles_per_partition[idxPartition])); + + std::sort(part_to_sort.begin(), part_to_sort.end(), + [](const std::pair<long int,partsize_t>& p1, + const std::pair<long int,partsize_t>& p2){ + return p1.first < p2.first; + }); + + // Permute array using buffer + std::vector<unsigned char> buffer; + permute_copy<real_number, size_particle_positions>(current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), particles_positions, &buffer); + permute_copy<real_number, size_particle_rhs>(current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), particles_current_rhs, &buffer); + permute_copy<partsize_t, 1>(current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), inout_index_particles, &buffer); + permute_copy<long int, 1>(current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), particles_coord.get(), &buffer); + } + } + + // Build the tree + p2p_tree<std::vector<std::pair<partsize_t,partsize_t>>> my_tree(nb_cell_levels); + + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + long int current_cell_idx = -1; + partsize_t current_nb_particles_in_cell = 0; + partsize_t current_cell_offset = 0; + + for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ; + idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){ + if(particles_coord[idx_part] != current_cell_idx){ + if(current_nb_particles_in_cell){ + my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell); + } + current_cell_idx = particles_coord[idx_part]; + current_nb_particles_in_cell = 1; + current_cell_offset = idx_part; + } + else{ + current_nb_particles_in_cell += 1; + } + } + if(current_nb_particles_in_cell){ + my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell); + + } + } + + // Offset per cell layers + long int previous_index = 0; + variable_used_only_in_assert(previous_index); + std::unique_ptr<partsize_t[]> particles_offset_layers(new partsize_t[my_nb_cell_levels+1]()); + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ; + idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){ + const long int part_box_z_index = get_cell_coord_z_from_index(particles_coord[idx_part]); + assert(my_down_z_cell_level <= part_box_z_index); + assert(part_box_z_index <= my_top_z_cell_level); + particles_offset_layers[part_box_z_index+1-my_down_z_cell_level] += 1; + assert(previous_index <= part_box_z_index); + previous_index = part_box_z_index; + } + } + for(long int idx_layer = 0 ; idx_layer < my_nb_cell_levels ; ++idx_layer){ + particles_offset_layers[idx_layer+1] += particles_offset_layers[idx_layer]; + } + + // Reset vectors + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + neigDescriptors.clear(); + + // Find process with at least one neighbor + { + int dest_proc = (my_rank+1)%nb_processes_involved; + while(dest_proc != my_rank + && (my_top_z_cell_level == first_cell_level_proc(dest_proc) + || (my_top_z_cell_level+1)%nb_cell_levels[IDXC_Z] == first_cell_level_proc(dest_proc))){ + // Find if we have to send 1 or 2 cell levels + int nb_levels_to_send = 1; + if(my_nb_cell_levels > 1 // I have more than one level + && (my_top_z_cell_level-1+2)%nb_cell_levels[IDXC_Z] <= last_cell_level_proc(dest_proc)){ + nb_levels_to_send += 1; + } + + NeighborDescriptor descriptor; + descriptor.destProc = dest_proc; + descriptor.nbLevelsToExchange = nb_levels_to_send; + descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send]; + descriptor.isRecv = false; + + neigDescriptors.emplace_back(std::move(descriptor)); + + dest_proc = (dest_proc+1)%nb_processes_involved; + } + + int src_proc = (my_rank-1+nb_processes_involved)%nb_processes_involved; + while(src_proc != my_rank + && (last_cell_level_proc(src_proc) == my_down_z_cell_level + || (last_cell_level_proc(src_proc)+1)%nb_cell_levels[IDXC_Z] == my_down_z_cell_level)){ + // Find if we have to send 1 or 2 cell levels + int nb_levels_to_recv = 1; + if(my_nb_cell_levels > 1 // I have more than one level + && first_cell_level_proc(src_proc) <= (my_down_z_cell_level-1+2)%nb_cell_levels[IDXC_Z]){ + nb_levels_to_recv += 1; + } + + NeighborDescriptor descriptor; + descriptor.destProc = src_proc; + descriptor.nbLevelsToExchange = nb_levels_to_recv; + descriptor.nbParticlesToExchange = -1; + descriptor.isRecv = true; + + neigDescriptors.emplace_back(std::move(descriptor)); + + src_proc = (src_proc-1+nb_processes_involved)%nb_processes_involved; + } + } + + ////////////////////////////////////////////////////////////////////// + /// Exchange the number of particles in each partition + /// Could involve only here but I do not think it will be a problem + ////////////////////////////////////////////////////////////////////// + + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); +#ifndef NDEBUG // Just for assertion + std::vector<int> willsend(nb_processes_involved, 0); + std::vector<int> willrecv(nb_processes_involved, 0); +#endif + + for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){ + NeighborDescriptor& descriptor = neigDescriptors[idxDescr]; + + if(descriptor.isRecv == false){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToExchange), + 1, particles_utils::GetMpiType(partsize_t()), + descriptor.destProc, TAG_NB_PARTICLES, + current_com, &mpiRequests.back())); +#ifndef NDEBUG // Just for assertion + willsend[descriptor.destProc] += 1; +#endif + if(descriptor.nbParticlesToExchange){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToExchange*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_positions]), + int(descriptor.nbParticlesToExchange*size_particle_positions), particles_utils::GetMpiType(real_number()), + descriptor.destProc, TAG_POSITION_PARTICLES, + current_com, &mpiRequests.back())); + + assert(descriptor.toRecvAndMerge == nullptr); + descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToExchange*size_particle_rhs]); + whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToExchange*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToExchange*size_particle_rhs), + particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_RESULT_PARTICLES, + current_com, &mpiRequests.back())); + } + } + else{ +#ifndef NDEBUG // Just for assertion + willrecv[descriptor.destProc] += 1; +#endif + whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&descriptor.nbParticlesToExchange, + 1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_NB_PARTICLES, + current_com, &mpiRequests.back())); + } + } + +#ifndef NDEBUG // Just for assertion + { + if(myrank == 0){ + std::vector<int> willsendall(nb_processes_involved*nb_processes_involved, 0);// TODO debug + std::vector<int> willrecvall(nb_processes_involved*nb_processes_involved, 0);// TODO debug + + MPI_Gather(willrecv.data(), nb_processes_involved, MPI_INT, willrecvall.data(), + nb_processes_involved, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gather(willsend.data(), nb_processes_involved, MPI_INT, willsendall.data(), + nb_processes_involved, MPI_INT, 0, MPI_COMM_WORLD); + + for(int idxproc = 0 ; idxproc < nb_processes_involved ; ++idxproc){ + for(int idxtest = 0 ; idxtest < nb_processes_involved ; ++idxtest){ + assert(willsendall[idxproc*nb_processes_involved + idxtest] + == willrecvall[idxtest*nb_processes_involved + idxproc]); + } + } + } + else{ + MPI_Gather(willrecv.data(), nb_processes_involved, MPI_INT, nullptr, + 0, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gather(willsend.data(), nb_processes_involved, MPI_INT, nullptr, + 0, MPI_INT, 0, MPI_COMM_WORLD); + } + } +#endif + + lock_free_bool_array cells_locker(512); + + TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) + #pragma omp parallel default(shared) + { + #pragma omp master + { + while(mpiRequests.size()){ + TIMEZONE("wait-loop"); + assert(mpiRequests.size() == whatNext.size()); + + int idxDone = int(mpiRequests.size()); + { + TIMEZONE("wait"); + AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + } + const std::pair<Action, int> releasedAction = whatNext[idxDone]; + std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); + std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]); + mpiRequests.pop_back(); + whatNext.pop_back(); + + ////////////////////////////////////////////////////////////////////// + /// Data to exchange particles + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == RECV_PARTICLES){ + TIMEZONE("post-recv-particles"); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.isRecv); + const int destProc = descriptor.destProc; + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange; + assert(NbParticlesToReceive != -1); + assert(descriptor.toCompute == nullptr); + + if(NbParticlesToReceive){ + descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions), + particles_utils::GetMpiType(real_number()), destProc, TAG_POSITION_PARTICLES, + current_com, &mpiRequests.back())); + } + } + + ////////////////////////////////////////////////////////////////////// + /// Computation + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == COMPUTE_PARTICLES){ + TIMEZONE("compute-particles"); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.isRecv); + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange; + + assert(descriptor.toCompute != nullptr); + descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]); + in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); + + // Compute + partsize_t idxPart = 0; + while(idxPart != NbParticlesToReceive){ + const long int current_cell_idx = get_cell_idx(descriptor.toCompute[idxPart*size_particle_positions + IDXC_X], + descriptor.toCompute[idxPart*size_particle_positions + IDXC_Y], + descriptor.toCompute[idxPart*size_particle_positions + IDXC_Z]); + partsize_t nb_parts_in_cell = 1; + while(idxPart+nb_parts_in_cell != NbParticlesToReceive + && current_cell_idx == get_cell_idx(descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_X], + descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Y], + descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Z])){ + nb_parts_in_cell += 1; + } + + #pragma omp task default(shared) firstprivate(idxPart, nb_parts_in_cell, current_cell_idx) + { + const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; + long int neighbors_indexes[27]; + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true); + + // with other interval + for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ + cells_locker.lock(neighbors_indexes[idx_neighbor]); + + for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_X], + descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Y], + descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Z], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], + shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + in_computer.template compute_interaction<size_particle_positions, size_particle_rhs>( + &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], + &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], + &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + } + } + } + } + + cells_locker.unlock(neighbors_indexes[idx_neighbor]); + } + } + + idxPart += nb_parts_in_cell; + } + + #pragma omp taskwait + + // Send back + const int destProc = descriptor.destProc; + whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs), + particles_utils::GetMpiType(real_number()), destProc, TAG_RESULT_PARTICLES, + current_com, &mpiRequests.back())); + delete[] descriptor.toCompute.release(); + } + ////////////////////////////////////////////////////////////////////// + /// Release memory that was sent back + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == RELEASE_BUFFER_PARTICLES){ + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.results != nullptr); + assert(descriptor.isRecv); + delete[] descriptor.results.release(); + } + ////////////////////////////////////////////////////////////////////// + /// Merge + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == MERGE_PARTICLES){ + TIMEZONE("merge"); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.isRecv == false); + assert(descriptor.toRecvAndMerge != nullptr); + in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs], + descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToExchange); + delete[] descriptor.toRecvAndMerge.release(); + } + } + } + } + + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + + // Compute self data + for(const auto& iter_cell : my_tree){ + TIMEZONE("proceed-leaf"); + const long int currenct_cell_idx = iter_cell.first; + const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second; + +#pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr) + { + const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr); + + cells_locker.lock(currenct_cell_idx); + + for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ + // self interval + for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ + for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z], + 0, 0, 0); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], + &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], + dist_r2, cutoff_radius_compute, 0, 0, 0); + } + } + } + + // with other interval + for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], + 0, 0, 0); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], + &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, cutoff_radius_compute, 0, 0, 0); + } + } + } + } + } + + const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; + long int neighbors_indexes[27]; + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false); + + for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ + // with other interval + for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ + if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){ + cells_locker.lock(neighbors_indexes[idx_neighbor]); + + for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], + shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + } + } + } + } + cells_locker.unlock(neighbors_indexes[idx_neighbor]); + } + } + } + + cells_locker.unlock(currenct_cell_idx); + } + } + } +}; + +#endif diff --git a/cpp/particles/p2p_tree.hpp b/cpp/particles/p2p_tree.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cdb3089174ff888cbfc13810d18c617b4a8358e7 --- /dev/null +++ b/cpp/particles/p2p_tree.hpp @@ -0,0 +1,153 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef P2P_TREE_HPP +#define P2P_TREE_HPP + +#include <unordered_map> +#include <vector> + +template <class CellClass> +class p2p_tree{ + std::unordered_map<long int, CellClass> data; + CellClass emptyCell; + std::array<long int,3> nb_cell_levels; + + long int get_cell_coord_x_from_index(const long int index) const{ + return index % nb_cell_levels[IDXC_X]; + } + + long int get_cell_coord_y_from_index(const long int index) const{ + return (index % (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y])) + / nb_cell_levels[IDXC_X]; + } + + long int get_cell_coord_z_from_index(const long int index) const{ + return index / (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]); + } + + long int get_cell_idx(const long int idx_x, const long int idx_y, + const long int idx_z) const { + return (((idx_z*nb_cell_levels[IDXC_Y])+idx_y)*nb_cell_levels[IDXC_X])+idx_x; + } + +public: + explicit p2p_tree(std::array<long int,3> in_nb_cell_levels) + : nb_cell_levels(in_nb_cell_levels){ + } + + CellClass& getCell(const long int idx){ + return data[idx]; + } + + + const CellClass& getCell(const long int idx) const { + const auto& iter = data.find(idx); + if(iter != data.end()){ + return iter->second; + } + return emptyCell; + } + + template <class ShiftType> + int getNeighbors(const long int idx, const CellClass* output[27], long int output_indexes[27], + std::array<ShiftType,3> shift[27], const bool include_target) const{ + int nbNeighbors = 0; + + std::fill_n(output, 27, nullptr); + + const long int idx_x = get_cell_coord_x_from_index(idx); + const long int idx_y = get_cell_coord_y_from_index(idx); + const long int idx_z = get_cell_coord_z_from_index(idx); + + for(long int neigh_x = -1 ; neigh_x <= 1 ; ++neigh_x){ + long int neigh_x_pbc = neigh_x+idx_x; + ShiftType shift_x = 0; + if(neigh_x_pbc < 0){ + neigh_x_pbc += nb_cell_levels[IDXC_X]; + shift_x = 1; + } + else if(nb_cell_levels[IDXC_X] <= neigh_x_pbc){ + neigh_x_pbc -= nb_cell_levels[IDXC_X]; + shift_x = -1; + } + + for(long int neigh_y = -1 ; neigh_y <= 1 ; ++neigh_y){ + long int neigh_y_pbc = neigh_y+idx_y; + ShiftType shift_y = 0; + if(neigh_y_pbc < 0){ + neigh_y_pbc += nb_cell_levels[IDXC_Y]; + shift_y = 1; + } + else if(nb_cell_levels[IDXC_Y] <= neigh_y_pbc){ + neigh_y_pbc -= nb_cell_levels[IDXC_Y]; + shift_y = -1; + } + + for(long int neigh_z = -1 ; neigh_z <= 1 ; ++neigh_z){ + long int neigh_z_pbc = neigh_z+idx_z; + ShiftType shift_z = 0; + if(neigh_z_pbc < 0){ + neigh_z_pbc += nb_cell_levels[IDXC_Z]; + shift_z = 1; + } + else if(nb_cell_levels[IDXC_Z] <= neigh_z_pbc){ + neigh_z_pbc -= nb_cell_levels[IDXC_Z]; + shift_z = -1; + } + + if(include_target || neigh_x_pbc != idx_x || neigh_y_pbc != idx_y || neigh_z_pbc != idx_z){ + const long int idx_neigh = get_cell_idx(neigh_x_pbc, + neigh_y_pbc, + neigh_z_pbc); + const auto& iter = data.find(idx_neigh); + if(iter != data.end()){ + output[nbNeighbors] = &(iter->second); + output_indexes[nbNeighbors] = idx_neigh; + + shift[nbNeighbors][IDXC_X] = shift_x; + shift[nbNeighbors][IDXC_Y] = shift_y; + shift[nbNeighbors][IDXC_Z] = shift_z; + + nbNeighbors += 1; + } + } + } + } + } + + return nbNeighbors; + } + + typename std::unordered_map<long int, CellClass>::iterator begin(){ + return data.begin(); + } + + typename std::unordered_map<long int, CellClass>::iterator end(){ + return data.end(); + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_adams_bashforth.hpp b/cpp/particles/particles_adams_bashforth.hpp similarity index 76% rename from bfps/cpp/particles/particles_adams_bashforth.hpp rename to cpp/particles/particles_adams_bashforth.hpp index 2fb61462f7970d823acd6dc3405799e362fa15af..21412e3530408a5980c376453cd6f5199466d830 100644 --- a/bfps/cpp/particles/particles_adams_bashforth.hpp +++ b/cpp/particles/particles_adams_bashforth.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_ADAMS_BASHFORTH_HPP #define PARTICLES_ADAMS_BASHFORTH_HPP @@ -7,11 +32,10 @@ #include "scope_timer.hpp" #include "particles_utils.hpp" -template <class partsize_t, class real_number, int size_particle_positions = 3, int size_particle_rhs = 3> -class particles_adams_bashforth { - static_assert(size_particle_positions == size_particle_rhs, - "Not having the same dimension for positions and rhs looks like a bug," - "otherwise comment this assertion."); +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> +class particles_adams_bashforth{ + static_assert(size_particle_positions == size_particle_rhs, "This class is designed for the same number of values in positions and rhs"); + public: static const int Max_steps = 6; diff --git a/bfps/cpp/particles/particles_distr_mpi.hpp b/cpp/particles/particles_distr_mpi.hpp similarity index 88% rename from bfps/cpp/particles/particles_distr_mpi.hpp rename to cpp/particles/particles_distr_mpi.hpp index 485595181f69b9fe1cf204b06df550a9ca74215d..43d61ca407af23e3cf3c3979d678af08cd7b5ff8 100644 --- a/bfps/cpp/particles/particles_distr_mpi.hpp +++ b/cpp/particles/particles_distr_mpi.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_DISTR_MPI_HPP #define PARTICLES_DISTR_MPI_HPP @@ -17,7 +42,7 @@ template <class partsize_t, class real_number> class particles_distr_mpi { protected: - static const int MaxNbRhs = 100; + static const int MaxNbRhs = 10; enum MpiTag{ TAG_LOW_UP_NB_PARTICLES, @@ -127,7 +152,7 @@ public: assert(partition_interval_size_per_proc[idx_proc_involved] != 0); } - assert(int(field_grid_dim[IDX_Z]) == partition_interval_offset_per_proc[nb_processes_involved]); + assert(int(field_grid_dim[IDXC_Z]) == partition_interval_offset_per_proc[nb_processes_involved]); } virtual ~particles_distr_mpi(){} @@ -136,12 +161,12 @@ public: template <class computer_class, class field_class, int size_particle_positions, int size_particle_rhs> void compute_distr(computer_class& in_computer, - field_class& in_field, + const field_class& in_field, const partsize_t current_my_nb_particles_per_partition[], const real_number particles_positions[], real_number particles_current_rhs[], const int interpolation_size){ - TIMEZONE("compute_distr"); + TIMEZONE("particle_distr_mpi::compute_distr"); // Some processes might not be involved if(nb_processes_involved <= my_rank){ @@ -235,6 +260,7 @@ public: } const int nbProcToRecvUpper = int(neigDescriptors.size())-nbProcToRecvLower; const int nbProcToRecv = nbProcToRecvUpper + nbProcToRecvLower; + variable_used_only_in_assert(nbProcToRecv); assert(int(neigDescriptors.size()) == nbProcToRecv); for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){ @@ -383,7 +409,7 @@ public: in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); if(more_than_one_thread == false){ - in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, descriptor.toCompute.get(), descriptor.results.get(), NbParticlesToReceive); + in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, descriptor.toCompute.get(), descriptor.results.get(), NbParticlesToReceive); } else{ TIMEZONE_OMP_INIT_PRETASK(timeZoneTaskKey) @@ -396,7 +422,7 @@ public: TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) { TIMEZONE_OMP_TASK("in_computer.apply_computation", timeZoneTaskKey); - in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, &ptr_descriptor->toCompute[idxPart*size_particle_positions], + in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, &ptr_descriptor->toCompute[idxPart*size_particle_positions], &ptr_descriptor->results[idxPart*size_particle_rhs], sizeToDo); } } @@ -417,7 +443,7 @@ public: if(releasedAction.first == RELEASE_BUFFER_PARTICLES){ NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; assert(descriptor.toCompute != nullptr); - descriptor.toCompute.release(); + delete[] descriptor.toCompute.release(); } ////////////////////////////////////////////////////////////////////// /// Merge @@ -429,14 +455,14 @@ public: TIMEZONE("reduce"); assert(descriptor.toRecvAndMerge != nullptr); in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); - descriptor.toRecvAndMerge.release(); + delete[] descriptor.toRecvAndMerge.release(); } else { TIMEZONE("reduce"); assert(descriptor.toRecvAndMerge != nullptr); in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[(current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend)*size_particle_rhs], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); - descriptor.toRecvAndMerge.release(); + delete[] descriptor.toRecvAndMerge.release(); } } } @@ -456,7 +482,7 @@ public: #pragma omp task default(shared) firstprivate(idxPart, sizeToDo) priority(0) TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) { TIMEZONE_OMP_TASK("in_computer.apply_computation", timeZoneTaskKey); - in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, &particles_positions[idxPart*size_particle_positions], + in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, &particles_positions[idxPart*size_particle_positions], &particles_current_rhs[idxPart*size_particle_rhs], sizeToDo); } @@ -474,14 +500,14 @@ public: TIMEZONE("reduce_later"); assert(descriptor.toRecvAndMerge != nullptr); in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); - descriptor.toRecvAndMerge.release(); + delete[] descriptor.toRecvAndMerge.release(); } else { TIMEZONE("reduce_later"); assert(descriptor.toRecvAndMerge != nullptr); in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[(current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend)*size_particle_rhs], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); - descriptor.toRecvAndMerge.release(); + delete[] descriptor.toRecvAndMerge.release(); } } } @@ -492,7 +518,7 @@ public: TIMEZONE("compute-my_compute"); // Compute my particles if(myTotalNbParticles){ - in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, particles_positions, particles_current_rhs, myTotalNbParticles); + in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, particles_positions, particles_current_rhs, myTotalNbParticles); } } @@ -517,6 +543,20 @@ public: return; } + {// TODO remove + partsize_t partOffset = 0; + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + for(partsize_t idx = 0 ; idx < current_my_nb_particles_per_partition[idxPartition] ; ++idx){ + const int partition_level = in_computer.pbc_field_layer((*inout_positions_particles)[(idx+partOffset)*size_particle_positions+IDXC_Z], IDXC_Z); + variable_used_only_in_assert(partition_level); + assert(partition_level == current_partition_interval.first + idxPartition + || partition_level == (current_partition_interval.first + idxPartition-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z]) + || partition_level == (current_partition_interval.first + idxPartition+1)%int(field_grid_dim[IDXC_Z])); + } + partOffset += current_my_nb_particles_per_partition[idxPartition]; + } + } + current_offset_particles_for_partition[0] = 0; partsize_t myTotalNbParticles = 0; for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ @@ -528,16 +568,17 @@ public: // Find particles outside my interval const partsize_t nbOutLower = particles_utils::partition_extra<partsize_t, size_particle_positions>(&(*inout_positions_particles)[0], current_my_nb_particles_per_partition[0], [&](const real_number val[]){ - const int partition_level = in_computer.pbc_field_layer(val[IDX_Z], IDX_Z); + const int partition_level = in_computer.pbc_field_layer(val[IDXC_Z], IDXC_Z); assert(partition_level == current_partition_interval.first - || partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]) - || partition_level == (current_partition_interval.first+1)%int(field_grid_dim[IDX_Z])); - const bool isLower = partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]); + || partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z]) + || partition_level == (current_partition_interval.first+1)%int(field_grid_dim[IDXC_Z])); + const bool isLower = partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z]); return isLower; }, [&](const partsize_t idx1, const partsize_t idx2){ for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ - std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + std::swap((*inout_index_particles)[size_particle_index*idx1+idx_val], + (*inout_index_particles)[size_particle_index*idx2+idx_val]); } for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ @@ -553,16 +594,17 @@ public: &(*inout_positions_particles)[(current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow)*size_particle_positions], myTotalNbParticles - (current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow), [&](const real_number val[]){ - const int partition_level = in_computer.pbc_field_layer(val[IDX_Z], IDX_Z); + const int partition_level = in_computer.pbc_field_layer(val[IDXC_Z], IDXC_Z); assert(partition_level == (current_partition_interval.second-1) - || partition_level == ((current_partition_interval.second-1)-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]) - || partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z])); - const bool isUpper = (partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z])); + || partition_level == ((current_partition_interval.second-1)-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z]) + || partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDXC_Z])); + const bool isUpper = (partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDXC_Z])); return !isUpper; }, [&](const partsize_t idx1, const partsize_t idx2){ for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ - std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + std::swap((*inout_index_particles)[size_particle_index*idx1+idx_val], + (*inout_index_particles)[size_particle_index*idx2+idx_val]); } for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ @@ -607,10 +649,11 @@ public: assert(nbOutLower*size_particle_positions < std::numeric_limits<int>::max()); AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], int(nbOutLower*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - assert(nbOutLower < std::numeric_limits<int>::max()); - AssertMpi(MPI_Isend(&(*inout_index_particles)[0], int(nbOutLower), particles_utils::GetMpiType(partsize_t()), + assert(nbOutLower*size_particle_index < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_index_particles)[0], int(nbOutLower*size_particle_index), particles_utils::GetMpiType(partsize_t()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); @@ -643,14 +686,14 @@ public: AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions], int(nbOutUpper*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - assert(nbOutUpper < std::numeric_limits<int>::max()); - AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], int(nbOutUpper), + assert(nbOutUpper*size_particle_index < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_index], int(nbOutUpper*size_particle_index), particles_utils::GetMpiType(partsize_t()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); - for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); @@ -684,11 +727,12 @@ public: (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); - newParticlesLowIndexes.reset(new partsize_t[nbNewFromLow]); + newParticlesLowIndexes.reset(new partsize_t[nbNewFromLow*size_particle_index]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - assert(nbNewFromLow < std::numeric_limits<int>::max()); - AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], int(nbNewFromLow), particles_utils::GetMpiType(partsize_t()), + assert(nbNewFromLow*size_particle_index < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], int(nbNewFromLow*size_particle_index), + particles_utils::GetMpiType(partsize_t()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); @@ -713,11 +757,12 @@ public: AssertMpi(MPI_Irecv(&newParticlesUp[0], int(nbNewFromUp*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); - newParticlesUpIndexes.reset(new partsize_t[nbNewFromUp]); + newParticlesUpIndexes.reset(new partsize_t[nbNewFromUp*size_particle_index]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - assert(nbNewFromUp < std::numeric_limits<int>::max()); - AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], int(nbNewFromUp), particles_utils::GetMpiType(partsize_t()), + assert(nbNewFromUp*size_particle_index < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], int(nbNewFromUp*size_particle_index), + particles_utils::GetMpiType(partsize_t()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); @@ -750,7 +795,7 @@ public: const partsize_t myTotalNewNbParticles = nbOldParticlesInside + nbNewFromLow + nbNewFromUp; std::unique_ptr<real_number[]> newArray(new real_number[myTotalNewNbParticles*size_particle_positions]); - std::unique_ptr<partsize_t[]> newArrayIndexes(new partsize_t[myTotalNewNbParticles]); + std::unique_ptr<partsize_t[]> newArrayIndexes(new partsize_t[myTotalNewNbParticles*size_particle_index]); std::vector<std::unique_ptr<real_number[]>> newArrayRhs(in_nb_rhs); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ newArrayRhs[idx_rhs].reset(new real_number[myTotalNewNbParticles*size_particle_rhs]); @@ -760,7 +805,7 @@ public: if(nbNewFromLow){ const particles_utils::fixed_copy fcp(0, 0, nbNewFromLow); fcp.copy(newArray, newParticlesLow, size_particle_positions); - fcp.copy(newArrayIndexes, newParticlesLowIndexes); + fcp.copy(newArrayIndexes, newParticlesLowIndexes, size_particle_index); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ fcp.copy(newArrayRhs[idx_rhs], newParticlesLowRhs[idx_rhs], size_particle_rhs); } @@ -770,7 +815,7 @@ public: { const particles_utils::fixed_copy fcp(nbNewFromLow, nbOutLower, nbOldParticlesInside); fcp.copy(newArray, (*inout_positions_particles), size_particle_positions); - fcp.copy(newArrayIndexes, (*inout_index_particles)); + fcp.copy(newArrayIndexes, (*inout_index_particles), size_particle_index); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ fcp.copy(newArrayRhs[idx_rhs], inout_rhs_particles[idx_rhs], size_particle_rhs); } @@ -780,7 +825,7 @@ public: if(nbNewFromUp){ const particles_utils::fixed_copy fcp(nbNewFromLow+nbOldParticlesInside, 0, nbNewFromUp); fcp.copy(newArray, newParticlesUp, size_particle_positions); - fcp.copy(newArrayIndexes, newParticlesUpIndexes); + fcp.copy(newArrayIndexes, newParticlesUpIndexes, size_particle_index); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ fcp.copy(newArrayRhs[idx_rhs], newParticlesUpRhs[idx_rhs], size_particle_rhs); } @@ -802,13 +847,14 @@ public: myTotalNbParticles,current_partition_size, current_my_nb_particles_per_partition, current_offset_particles_for_partition.get(), [&](const real_number& z_pos){ - const int partition_level = in_computer.pbc_field_layer(z_pos, IDX_Z); + const int partition_level = in_computer.pbc_field_layer(z_pos, IDXC_Z); assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); return partition_level - current_partition_interval.first; }, [&](const partsize_t idx1, const partsize_t idx2){ for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ - std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + std::swap((*inout_index_particles)[size_particle_index*idx1 + idx_val], + (*inout_index_particles)[size_particle_index*idx2 + idx_val]); } for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ @@ -824,7 +870,7 @@ public: assert(current_my_nb_particles_per_partition[idxPartition] == current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ - assert(in_computer.pbc_field_layer((*inout_positions_particles)[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); + assert(in_computer.pbc_field_layer((*inout_positions_particles)[idx*size_particle_positions+IDXC_Z], IDXC_Z)-current_partition_interval.first == idxPartition); } } } diff --git a/bfps/cpp/particles/particles_field_computer.hpp b/cpp/particles/particles_field_computer.hpp similarity index 68% rename from bfps/cpp/particles/particles_field_computer.hpp rename to cpp/particles/particles_field_computer.hpp index f68f2fc02b4ee40aa9583385c0bd18195b92b6dc..f6494ecd0b937b02038fb7eb8a498ee9f29212fd 100644 --- a/bfps/cpp/particles/particles_field_computer.hpp +++ b/cpp/particles/particles_field_computer.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_FIELD_COMPUTER_HPP #define PARTICLES_FIELD_COMPUTER_HPP @@ -12,6 +37,16 @@ template <class partsize_t, class interpolator_class, int interp_neighbours> class particles_field_computer { + // TODO but not critical, add in field: + // static const int nb_components = ncomp(fc); + // and use it as field_class::nb_components + // but failed up to now.... + template <typename rnumber, + field_backend be, + field_components fc> + static constexpr int nbcomp(const field<rnumber, be, fc>& /*field*/){ + return ncomp(fc); + } const std::array<int,3> field_grid_dim; const std::pair<int,int> current_partition_interval; @@ -34,9 +69,9 @@ public: : field_grid_dim({{int(in_field_grid_dim[0]),int(in_field_grid_dim[1]),int(in_field_grid_dim[2])}}), current_partition_interval(in_current_partitions), interpolator(in_interpolator), spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset), box_step_width(in_box_step_width){ - deriv[IDX_X] = 0; - deriv[IDX_Y] = 0; - deriv[IDX_Z] = 0; + deriv[IDXC_X] = 0; + deriv[IDXC_Y] = 0; + deriv[IDXC_Z] = 0; } //////////////////////////////////////////////////////////////////////// @@ -62,33 +97,35 @@ public: return pos_in_cell; } - template <class field_class, int size_particle_rhs> + template <class field_class, int size_particle_positions, int size_particle_rhs> void apply_computation(const field_class& field, const real_number particles_positions[], real_number particles_current_rhs[], const partsize_t nb_particles) const { + constexpr int nb_components_in_field = nbcomp(field); + static_assert(nb_components_in_field <= size_particle_rhs, "Cannot store all the component in the given array"); TIMEZONE("particles_field_computer::apply_computation"); - //DEBUG_MSG("just entered particles_field_computer::apply_computation\n"); + for(partsize_t idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ - const real_number reltv_x = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_X], IDX_X); - const real_number reltv_y = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Y], IDX_Y); - const real_number reltv_z = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Z], IDX_Z); + const real_number reltv_x = get_norm_pos_in_cell(particles_positions[idxPart*size_particle_positions+IDXC_X], IDXC_X); + const real_number reltv_y = get_norm_pos_in_cell(particles_positions[idxPart*size_particle_positions+IDXC_Y], IDXC_Y); + const real_number reltv_z = get_norm_pos_in_cell(particles_positions[idxPart*size_particle_positions+IDXC_Z], IDXC_Z); typename interpolator_class::real_number bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; - interpolator.compute_beta(deriv[IDX_X], reltv_x, bx); - interpolator.compute_beta(deriv[IDX_Y], reltv_y, by); - interpolator.compute_beta(deriv[IDX_Z], reltv_z, bz); + interpolator.compute_beta(deriv[IDXC_X], reltv_x, bx); + interpolator.compute_beta(deriv[IDXC_Y], reltv_y, by); + interpolator.compute_beta(deriv[IDXC_Z], reltv_z, bz); - const int partGridIdx_x = pbc_field_layer(particles_positions[idxPart*3+IDX_X], IDX_X); - const int partGridIdx_y = pbc_field_layer(particles_positions[idxPart*3+IDX_Y], IDX_Y); - const int partGridIdx_z = pbc_field_layer(particles_positions[idxPart*3+IDX_Z], IDX_Z); + const int partGridIdx_x = pbc_field_layer(particles_positions[idxPart*size_particle_positions+IDXC_X], IDXC_X); + const int partGridIdx_y = pbc_field_layer(particles_positions[idxPart*size_particle_positions+IDXC_Y], IDXC_Y); + const int partGridIdx_z = pbc_field_layer(particles_positions[idxPart*size_particle_positions+IDXC_Z], IDXC_Z); - assert(0 <= partGridIdx_x && partGridIdx_x < int(field_grid_dim[IDX_X])); - assert(0 <= partGridIdx_y && partGridIdx_y < int(field_grid_dim[IDX_Y])); - assert(0 <= partGridIdx_z && partGridIdx_z < int(field_grid_dim[IDX_Z])); + assert(0 <= partGridIdx_x && partGridIdx_x < int(field_grid_dim[IDXC_X])); + assert(0 <= partGridIdx_y && partGridIdx_y < int(field_grid_dim[IDXC_Y])); + assert(0 <= partGridIdx_z && partGridIdx_z < int(field_grid_dim[IDXC_Z])); const int interp_limit_mx = partGridIdx_x-interp_neighbours; const int interp_limit_x = partGridIdx_x+interp_neighbours+1; @@ -101,8 +138,8 @@ public: int nb_z_intervals; if((partGridIdx_z-interp_neighbours) < 0){ - assert(partGridIdx_z+interp_neighbours+1 < int(field_grid_dim[IDX_Z])); - interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours+int(field_grid_dim[IDX_Z])); + assert(partGridIdx_z+interp_neighbours+1 < int(field_grid_dim[IDXC_Z])); + interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours+int(field_grid_dim[IDXC_Z])); interp_limit_z[0] = current_partition_interval.second-1; interp_limit_mz[1] = std::max(0, current_partition_interval.first); @@ -110,12 +147,12 @@ public: nb_z_intervals = 2; } - else if(int(field_grid_dim[IDX_Z]) <= (partGridIdx_z+interp_neighbours+1)){ + else if(int(field_grid_dim[IDXC_Z]) <= (partGridIdx_z+interp_neighbours+1)){ interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours); - interp_limit_z[0] = std::min(int(field_grid_dim[IDX_Z])-1,current_partition_interval.second-1); + interp_limit_z[0] = std::min(int(field_grid_dim[IDXC_Z])-1,current_partition_interval.second-1); interp_limit_mz[1] = std::max(0, current_partition_interval.first); - interp_limit_z[1] = std::min(partGridIdx_z+interp_neighbours+1-int(field_grid_dim[IDX_Z]), current_partition_interval.second-1); + interp_limit_z[1] = std::min(partGridIdx_z+interp_neighbours+1-int(field_grid_dim[IDXC_Z]), current_partition_interval.second-1); nb_z_intervals = 2; } @@ -127,26 +164,27 @@ public: for(int idx_inter = 0 ; idx_inter < nb_z_intervals ; ++idx_inter){ for(int idx_z = interp_limit_mz[idx_inter] ; idx_z <= interp_limit_z[idx_inter] ; ++idx_z ){ - const int idx_z_pbc = (idx_z + field_grid_dim[IDX_Z])%field_grid_dim[IDX_Z]; + const int idx_z_pbc = (idx_z + field_grid_dim[IDXC_Z])%field_grid_dim[IDXC_Z]; assert(current_partition_interval.first <= idx_z_pbc && idx_z_pbc < current_partition_interval.second); - assert(((idx_z+field_grid_dim[IDX_Z]-interp_limit_mz_bz)%field_grid_dim[IDX_Z]) < interp_neighbours*2+2); + assert(((idx_z+field_grid_dim[IDXC_Z]-interp_limit_mz_bz)%field_grid_dim[IDXC_Z]) < interp_neighbours*2+2); for(int idx_x = interp_limit_mx ; idx_x <= interp_limit_x ; ++idx_x ){ - const int idx_x_pbc = (idx_x + field_grid_dim[IDX_X])%field_grid_dim[IDX_X]; + const int idx_x_pbc = (idx_x + field_grid_dim[IDXC_X])%field_grid_dim[IDXC_X]; assert(idx_x-interp_limit_mx < interp_neighbours*2+2); for(int idx_y = interp_limit_my ; idx_y <= interp_limit_y ; ++idx_y ){ - const int idx_y_pbc = (idx_y + field_grid_dim[IDX_Y])%field_grid_dim[IDX_Y]; + const int idx_y_pbc = (idx_y + field_grid_dim[IDXC_Y])%field_grid_dim[IDXC_Y]; assert(idx_y-interp_limit_my < interp_neighbours*2+2); - const real_number coef = (bz[((idx_z+field_grid_dim[IDX_Z]-interp_limit_mz_bz)%field_grid_dim[IDX_Z])] + const real_number coef = (bz[((idx_z+field_grid_dim[IDXC_Z]-interp_limit_mz_bz)%field_grid_dim[IDXC_Z])] * by[idx_y-interp_limit_my] * bx[idx_x-interp_limit_mx]); const ptrdiff_t tindex = field.get_rindex_from_global(idx_x_pbc, idx_y_pbc, idx_z_pbc); // getValue does not necessary return real_number - for(int idx_rhs_val = 0 ; idx_rhs_val < size_particle_rhs ; ++idx_rhs_val){ + // size_particle_rhs is just for the leading dimension of the array + for(int idx_rhs_val = 0 ; idx_rhs_val < nb_components_in_field ; ++idx_rhs_val){ particles_current_rhs[idxPart*size_particle_rhs+idx_rhs_val] += real_number(field.rval(tindex,idx_rhs_val))*coef; } } diff --git a/bfps/cpp/particles/particles_generic_interp.hpp b/cpp/particles/particles_generic_interp.hpp similarity index 82% rename from bfps/cpp/particles/particles_generic_interp.hpp rename to cpp/particles/particles_generic_interp.hpp index 98d0363d4fcfae8c05b6ceabef620e17c1263eee..da48641ca543dd853c24d675c1fea8b96f9da449 100644 --- a/bfps/cpp/particles/particles_generic_interp.hpp +++ b/cpp/particles/particles_generic_interp.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_GENERIC_INTERP_HPP #define PARTICLES_GENERIC_INTERP_HPP diff --git a/cpp/particles/particles_inner_computer.cpp b/cpp/particles/particles_inner_computer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3a841bee50f2849ef981cb5c585ee448570ae2ca --- /dev/null +++ b/cpp/particles/particles_inner_computer.cpp @@ -0,0 +1,193 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#include "base.hpp" +#include "particles_utils.hpp" +#include "particles_inner_computer.hpp" + +#include <cmath> + +template <class real_number, class partsize_t> +template <int size_particle_positions, int size_particle_rhs> +void particles_inner_computer<real_number, partsize_t>::compute_interaction( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[]) const{ + static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position"); + static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs"); + + #pragma omp parallel for + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + // Add attr × V0 to the field interpolation + rhs_part[idx_part*size_particle_rhs + IDXC_X] += pos_part[idx_part*size_particle_positions + 3+IDXC_X]*v0; + rhs_part[idx_part*size_particle_rhs + IDXC_Y] += pos_part[idx_part*size_particle_positions + 3+IDXC_Y]*v0; + rhs_part[idx_part*size_particle_rhs + IDXC_Z] += pos_part[idx_part*size_particle_positions + 3+IDXC_Z]*v0; + } +} + + // for given orientation and right-hand-side, recompute right-hand-side such + // that it is perpendicular to the current orientation. + // this is the job of the Lagrange multiplier terms, hence the + // "add_Lagrange_multipliers" name of the method. +template <> +template <> +void particles_inner_computer<double, long long>::add_Lagrange_multipliers<6,6>( + const long long nb_particles, + const double pos_part[], + double rhs_part[]) const{ + + #pragma omp parallel for + for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const long long idx0 = idx_part*6 + 3; + const long long idx1 = idx_part*6 + 3; + // check that orientation is unit vector: + double orientation_size = sqrt( + pos_part[idx0+IDXC_X]*pos_part[idx0+IDXC_X] + + pos_part[idx0+IDXC_Y]*pos_part[idx0+IDXC_Y] + + pos_part[idx0+IDXC_Z]*pos_part[idx0+IDXC_Z]); + variable_used_only_in_assert(orientation_size); + assert(orientation_size > 0.99); + assert(orientation_size < 1.01); + // I call "rotation" to be the right hand side of the orientation part of the ODE + // project rotation on orientation: + double projection = ( + pos_part[idx0+IDXC_X]*rhs_part[idx1+IDXC_X] + + pos_part[idx0+IDXC_Y]*rhs_part[idx1+IDXC_Y] + + pos_part[idx0+IDXC_Z]*rhs_part[idx1+IDXC_Z]); + + // now remove parallel bit. + rhs_part[idx1+IDXC_X] -= pos_part[idx0+IDXC_X]*projection; + rhs_part[idx1+IDXC_Y] -= pos_part[idx0+IDXC_Y]*projection; + rhs_part[idx1+IDXC_Z] -= pos_part[idx0+IDXC_Z]*projection; + + // DEBUG + // sanity check, for debugging purposes + // compute dot product between orientation and orientation change + //double dotproduct = ( + // rhs_part[idx1 + IDXC_X]*pos_part[idx0 + IDXC_X] + + // rhs_part[idx1 + IDXC_Y]*pos_part[idx0 + IDXC_Y] + + // rhs_part[idx1 + IDXC_Z]*pos_part[idx0 + IDXC_Z]); + //if (dotproduct > 0.1) + //{ + // DEBUG_MSG("dotproduct = %g, projection = %g\n" + // "pos_part[%d] = %g, pos_part[%d] = %g, pos_part[%d] = %g\n" + // "rhs_part[%d] = %g, rhs_part[%d] = %g, rhs_part[%d] = %g\n", + // dotproduct, + // projection, + // IDXC_X, pos_part[idx0 + IDXC_X], + // IDXC_Y, pos_part[idx0 + IDXC_Y], + // IDXC_Z, pos_part[idx0 + IDXC_Z], + // IDXC_X, rhs_part[idx1 + IDXC_X], + // IDXC_Y, rhs_part[idx1 + IDXC_Y], + // IDXC_Z, rhs_part[idx1 + IDXC_Z]); + // assert(false); + //} + //assert(dotproduct <= 0.1); + } + } + +template <> +template <> +void particles_inner_computer<double, long long>::compute_interaction_with_extra<6,6,3>( + const long long nb_particles, + const double pos_part[], + double rhs_part[], + const double rhs_part_extra[]) const{ + // call plain compute_interaction first + compute_interaction<6, 6>(nb_particles, pos_part, rhs_part); + + // now add vorticity term + #pragma omp parallel for + for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + // Cross product vorticity/orientation + rhs_part[idx_part*6 + 3+IDXC_X] += 0.5*(rhs_part_extra[idx_part*3 + IDXC_Y]*pos_part[idx_part*6 + 3+IDXC_Z] - + rhs_part_extra[idx_part*3 + IDXC_Z]*pos_part[idx_part*6 + 3+IDXC_Y]); + rhs_part[idx_part*6 + 3+IDXC_Y] += 0.5*(rhs_part_extra[idx_part*3 + IDXC_Z]*pos_part[idx_part*6 + 3+IDXC_X] - + rhs_part_extra[idx_part*3 + IDXC_X]*pos_part[idx_part*6 + 3+IDXC_Z]); + rhs_part[idx_part*6 + 3+IDXC_Z] += 0.5*(rhs_part_extra[idx_part*3 + IDXC_X]*pos_part[idx_part*6 + 3+IDXC_Y] - + rhs_part_extra[idx_part*3 + IDXC_Y]*pos_part[idx_part*6 + 3+IDXC_X]); + } +} + +template <> //Work here +template <> +void particles_inner_computer<double, long long>::compute_interaction_with_extra<6,6,9>( + const long long nb_particles, + const double pos_part[], + double rhs_part[], + const double rhs_part_extra[]) const{ + // call plain compute_interaction first + compute_interaction<6, 6>(nb_particles, pos_part, rhs_part); + const double ll2 = lambda*lambda; + + // now add vorticity term + #pragma omp parallel for + for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + long long idx_part6 = idx_part*6 + 3; + long long idx_part9 = idx_part*9; + rhs_part[idx_part6+IDXC_X] += ( + pos_part[idx_part6+IDXC_Z]*(ll2*rhs_part_extra[idx_part9 + IDXC_DZ_X]-rhs_part_extra[idx_part9 + IDXC_DX_Z]) + + pos_part[idx_part6+IDXC_Y]*(ll2*rhs_part_extra[idx_part9 + IDXC_DY_X]-rhs_part_extra[idx_part9 + IDXC_DX_Y]) + + pos_part[idx_part6+IDXC_X]*(ll2-1)*rhs_part_extra[idx_part9 + IDXC_DX_X]) / (ll2+1); + rhs_part[idx_part6+IDXC_Y] += ( + pos_part[idx_part6+IDXC_X]*(ll2*rhs_part_extra[idx_part9 + IDXC_DX_Y]-rhs_part_extra[idx_part9 + IDXC_DY_X]) + + pos_part[idx_part6+IDXC_Z]*(ll2*rhs_part_extra[idx_part9 + IDXC_DZ_Y]-rhs_part_extra[idx_part9 + IDXC_DY_Z]) + + pos_part[idx_part6+IDXC_Y]*(ll2-1)*rhs_part_extra[idx_part9 + IDXC_DY_Y]) / (ll2+1); + rhs_part[idx_part6+IDXC_Z] += ( + pos_part[idx_part6+IDXC_Y]*(ll2*rhs_part_extra[idx_part9 + IDXC_DY_Z]-rhs_part_extra[idx_part9 + IDXC_DZ_Y]) + + pos_part[idx_part6+IDXC_X]*(ll2*rhs_part_extra[idx_part9 + IDXC_DX_Z]-rhs_part_extra[idx_part9 + IDXC_DZ_X]) + + pos_part[idx_part6+IDXC_Z]*(ll2-1)*rhs_part_extra[idx_part9 + IDXC_DZ_Z]) / (ll2+1); + } +} + + +// meant to be called AFTER executing the time-stepping operation. +// once the particles have been moved, ensure that the orientation is a unit vector. +template <> +template <> +void particles_inner_computer<double, long long>::enforce_unit_orientation<6>( + const long long nb_particles, + double pos_part[]) const{ + #pragma omp parallel for + for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const long long idx0 = idx_part*6 + 3; + // compute orientation size: + double orientation_size = sqrt( + pos_part[idx0+IDXC_X]*pos_part[idx0+IDXC_X] + + pos_part[idx0+IDXC_Y]*pos_part[idx0+IDXC_Y] + + pos_part[idx0+IDXC_Z]*pos_part[idx0+IDXC_Z]); + // now renormalize + pos_part[idx0 + IDXC_X] /= orientation_size; + pos_part[idx0 + IDXC_Y] /= orientation_size; + pos_part[idx0 + IDXC_Z] /= orientation_size; + } +} + +template +void particles_inner_computer<double, long long>::compute_interaction<6, 6>( + const long long nb_particles, + const double pos_part[], + double rhs_part[]) const; + diff --git a/cpp/particles/particles_inner_computer.hpp b/cpp/particles/particles_inner_computer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7f30ad6829e5cfa0ac40bd59db7a9a09cbe8ac6f --- /dev/null +++ b/cpp/particles/particles_inner_computer.hpp @@ -0,0 +1,109 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef PARTICLES_INNER_COMPUTER_HPP +#define PARTICLES_INNER_COMPUTER_HPP + +#include <cstring> +#include <cassert> +#include <iostream> + +template <class real_number, class partsize_t> +class particles_inner_computer{ + bool isActive; + const real_number v0; + const real_number lambda; + const real_number lambda1; + const real_number lambda2; + const real_number lambda3; + +public: + explicit particles_inner_computer(const real_number inV0): + isActive(true), + v0(inV0), + lambda(0), + lambda1(0), + lambda2(0), + lambda3(0) + {} + explicit particles_inner_computer(const real_number inV0, const real_number inLambda): + isActive(true), + v0(inV0), + lambda(inLambda), + lambda1(0), + lambda2(0), + lambda3(0) + {} + explicit particles_inner_computer( + const real_number inV0, + const real_number inLambda1, + const real_number inLambda2, + const real_number inLambda3): + isActive(true), + v0(inV0), + lambda(0), + lambda1(inLambda1), + lambda2(inLambda2), + lambda3(inLambda3) + {} + + template <int size_particle_positions, int size_particle_rhs> + void compute_interaction( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[]) const; + // for given orientation and right-hand-side, recompute right-hand-side such + // that it is perpendicular to the current orientation. + // this is the job of the Lagrange multiplier terms, hence the + // "add_Lagrange_multipliers" name of the method. + template <int size_particle_positions, int size_particle_rhs> + void add_Lagrange_multipliers( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[]) const; + template <int size_particle_positions, int size_particle_rhs, int size_particle_rhs_extra> + void compute_interaction_with_extra( + const partsize_t nb_particles, + const real_number pos_part[], + real_number rhs_part[], + const real_number rhs_part_extra[]) const; + // meant to be called AFTER executing the time-stepping operation. + // once the particles have been moved, ensure that the orientation is a unit vector. + template <int size_particle_positions> + void enforce_unit_orientation( + const partsize_t nb_particles, + real_number pos_part[]) const; + + bool isEnable() const { + return isActive; + } + + void setEnable(const bool inIsActive) { + isActive = inIsActive; + } +}; + +#endif + diff --git a/cpp/particles/particles_inner_computer_empty.hpp b/cpp/particles/particles_inner_computer_empty.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a90d3aa1b9f5ca5e9e2085173c6c55a25809b469 --- /dev/null +++ b/cpp/particles/particles_inner_computer_empty.hpp @@ -0,0 +1,57 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef PARTICLES_INNER_COMPUTER_EMPTY_HPP +#define PARTICLES_INNER_COMPUTER_EMPTY_HPP + +#include <cstring> +#include <cassert> + +template <class real_number, class partsize_t> +class particles_inner_computer_empty{ +public: + template <int size_particle_positions, int size_particle_rhs> + void compute_interaction(const partsize_t /*nb_particles*/, real_number /*pos_part*/[], real_number /*rhs_part*/[]) const{ + } + + template <int size_particle_positions> + void enforce_unit_orientation(const partsize_t /*nb_particles*/, real_number /*pos_part*/[]) const{ + } + + template <int size_particle_positions, int size_particle_rhs> + void add_Lagrange_multipliers(const partsize_t /*nb_particles*/, real_number /*pos_part*/[], real_number /*rhs_part*/[]) const{ + } + + template <int size_particle_positions, int size_particle_rhs, int size_particle_rhs_extra> + void compute_interaction_with_extra(const partsize_t /*nb_particles*/, real_number /*pos_part*/[], real_number /*rhs_part*/[], + const real_number /*rhs_part_extra*/[]) const{ + } + + constexpr static bool isEnable() { + return false; + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_input_hdf5.hpp b/cpp/particles/particles_input_hdf5.hpp similarity index 62% rename from bfps/cpp/particles/particles_input_hdf5.hpp rename to cpp/particles/particles_input_hdf5.hpp index 32cfec05ad854cd7f3ffd88d771418d0552237d8..3f895be3613030fca0a0fce1a786bb6fc541fe9c 100644 --- a/bfps/cpp/particles/particles_input_hdf5.hpp +++ b/cpp/particles/particles_input_hdf5.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_INPUT_HDF5_HPP #define PARTICLES_INPUT_HDF5_HPP @@ -14,8 +39,6 @@ #include "scope_timer.hpp" -// why is "size_particle_rhs" a template parameter? -// I think it's safe to assume this will always be 3. template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_number> { const std::string filename; @@ -24,16 +47,19 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu int my_rank; int nb_processes; - hsize_t nb_total_particles; + hsize_t total_number_of_particles; hsize_t nb_rhs; partsize_t nb_particles_for_me; + std::vector<hsize_t> particle_file_layout; // to hold the shape of initial condition array std::unique_ptr<real_number[]> my_particles_positions; std::unique_ptr<partsize_t[]> my_particles_indexes; std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; - static std::vector<real_number> BuildLimitsAllProcesses(MPI_Comm mpi_comm, - const real_number my_spatial_low_limit, const real_number my_spatial_up_limit){ + static std::vector<real_number> BuildLimitsAllProcesses( + MPI_Comm mpi_comm, + const real_number my_spatial_low_limit, + const real_number my_spatial_up_limit){ int my_rank; int nb_processes; @@ -43,8 +69,15 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu std::vector<real_number> spatial_limit_per_proc(nb_processes*2); real_number intervalToSend[2] = {my_spatial_low_limit, my_spatial_up_limit}; - AssertMpi(MPI_Allgather(intervalToSend, 2, particles_utils::GetMpiType(real_number()), - spatial_limit_per_proc.data(), 2, particles_utils::GetMpiType(real_number()), mpi_comm)); + AssertMpi( + MPI_Allgather( + intervalToSend, + 2, + particles_utils::GetMpiType(real_number()), + spatial_limit_per_proc.data(), + 2, + particles_utils::GetMpiType(real_number()), + mpi_comm)); for(int idx_proc = 0; idx_proc < nb_processes-1 ; ++idx_proc){ assert(spatial_limit_per_proc[idx_proc*2] <= spatial_limit_per_proc[idx_proc*2+1]); @@ -58,18 +91,35 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu } public: - particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, - const std::string& inDatanameState, const std::string& inDatanameRhs, - const real_number my_spatial_low_limit, const real_number my_spatial_up_limit) - : particles_input_hdf5(in_mpi_comm, inFilename, inDatanameState, inDatanameRhs, - BuildLimitsAllProcesses(in_mpi_comm, my_spatial_low_limit, my_spatial_up_limit)){ + particles_input_hdf5( + const MPI_Comm in_mpi_comm, + const std::string& inFilename, + const std::string& inDatanameState, + const std::string& inDatanameRhs, + const real_number my_spatial_low_limit, + const real_number my_spatial_up_limit) + : particles_input_hdf5( + in_mpi_comm, + inFilename, + inDatanameState, + inDatanameRhs, + BuildLimitsAllProcesses( + in_mpi_comm, + my_spatial_low_limit, + my_spatial_up_limit)){ } - particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, - const std::string& inDatanameState, const std::string& inDatanameRhs, - const std::vector<real_number>& in_spatial_limit_per_proc) + particles_input_hdf5( + const MPI_Comm in_mpi_comm, + const std::string& inFilename, + const std::string& inDatanameState, + const std::string& inDatanameRhs, + const std::vector<real_number>& in_spatial_limit_per_proc) : filename(inFilename), - mpi_comm(in_mpi_comm), my_rank(-1), nb_processes(-1), nb_total_particles(0), + mpi_comm(in_mpi_comm), + my_rank(-1), + nb_processes(-1), + total_number_of_particles(0), nb_particles_for_me(0){ TIMEZONE("particles_input_hdf5"); @@ -81,6 +131,7 @@ public: assert(plist_id_par >= 0); { int retTest = H5Pset_fapl_mpio(plist_id_par, mpi_comm, MPI_INFO_NULL); + variable_used_only_in_assert(retTest); assert(retTest >= 0); } @@ -100,13 +151,17 @@ public: std::vector<hsize_t> state_dim_array(space_dim); int hdfret = H5Sget_simple_extent_dims(dspace, &state_dim_array[0], NULL); + variable_used_only_in_assert(hdfret); assert(hdfret >= 0); // Last value is the position dim of the particles assert(state_dim_array.back() == size_particle_positions); - nb_total_particles = 1; + // compute total number of particles, store initial condition array shape + total_number_of_particles = 1; + particle_file_layout.resize(state_dim_array.size()-1); for (size_t idx_dim = 0; idx_dim < state_dim_array.size()-1; ++idx_dim){ - nb_total_particles *= state_dim_array[idx_dim]; + total_number_of_particles *= state_dim_array[idx_dim]; + particle_file_layout[idx_dim] = state_dim_array[idx_dim]; } hdfret = H5Sclose(dspace); @@ -128,6 +183,7 @@ public: // Chichi comment: wouldn't &rhs_dim_array.front() be safer? int hdfret = H5Sget_simple_extent_dims(dspace, &rhs_dim_array[0], NULL); + variable_used_only_in_assert(hdfret); assert(hdfret >= 0); assert(rhs_dim_array.back() == size_particle_rhs); // Chichi comment: this assertion will fail in general @@ -140,30 +196,36 @@ public: assert(hdfret >= 0); } - particles_utils::IntervalSplitter<hsize_t> load_splitter(nb_total_particles, nb_processes, my_rank); + particles_utils::IntervalSplitter<hsize_t> load_splitter(total_number_of_particles, nb_processes, my_rank); static_assert(std::is_same<real_number, double>::value || std::is_same<real_number, float>::value, "real_number must be double or float"); const hid_t type_id = (sizeof(real_number) == 8?H5T_NATIVE_DOUBLE:H5T_NATIVE_FLOAT); /// Load the data - std::unique_ptr<real_number[]> split_particles_positions(new real_number[load_splitter.getMySize()*size_particle_positions]); + std::unique_ptr<real_number[]> split_particles_positions; + if(load_splitter.getMySize()){ + split_particles_positions.reset(new real_number[load_splitter.getMySize()*size_particle_positions]); + } + { TIMEZONE("state-read"); hid_t dset = H5Dopen(particle_file, inDatanameState.c_str(), H5P_DEFAULT); assert(dset >= 0); - hid_t rspace = H5Dget_space(dset); + hsize_t file_space_dims[2] = {total_number_of_particles, size_particle_positions}; + hid_t rspace = H5Screate_simple(2, file_space_dims, NULL); assert(rspace >= 0); hsize_t offset[2] = {load_splitter.getMyOffset(), 0}; - hsize_t mem_dims[2] = {load_splitter.getMySize(), 3}; + hsize_t mem_dims[2] = {load_splitter.getMySize(), size_particle_positions}; hid_t mspace = H5Screate_simple(2, &mem_dims[0], NULL); assert(mspace >= 0); int rethdf = H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, mem_dims, NULL); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); rethdf = H5Dread(dset, type_id, mspace, rspace, H5P_DEFAULT, split_particles_positions.get()); assert(rethdf >= 0); @@ -178,12 +240,14 @@ public: TIMEZONE("rhs-read"); hid_t dset = H5Dopen(particle_file, inDatanameRhs.c_str(), H5P_DEFAULT); assert(dset >= 0); + hsize_t file_space_dims[3] = {nb_rhs, total_number_of_particles, size_particle_rhs}; + hid_t rspace = H5Screate_simple(3, file_space_dims, NULL); + assert(rspace >= 0); for(hsize_t idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ - hid_t rspace = H5Dget_space(dset); - assert(rspace >= 0); - - split_particles_rhs[idx_rhs].reset(new real_number[load_splitter.getMySize()*size_particle_rhs]); + if(load_splitter.getMySize()){ + split_particles_rhs[idx_rhs].reset(new real_number[load_splitter.getMySize()*size_particle_rhs]); + } hsize_t offset[3] = {idx_rhs, load_splitter.getMyOffset(), 0}; hsize_t mem_dims[3] = {1, load_splitter.getMySize(), size_particle_rhs}; @@ -193,21 +257,26 @@ public: int rethdf = H5Sselect_hyperslab( rspace, H5S_SELECT_SET, offset, NULL, mem_dims, NULL); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); rethdf = H5Dread(dset, type_id, mspace, rspace, H5P_DEFAULT, split_particles_rhs[idx_rhs].get()); assert(rethdf >= 0); rethdf = H5Sclose(mspace); assert(rethdf >= 0); - - rethdf = H5Sclose(rspace); - assert(rethdf >= 0); } - int rethdf = H5Dclose(dset); + + int rethdf = H5Sclose(rspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dset); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); } - std::unique_ptr<partsize_t[]> split_particles_indexes(new partsize_t[load_splitter.getMySize()]); + std::unique_ptr<partsize_t[]> split_particles_indexes; + if(load_splitter.getMySize()){ + split_particles_indexes.reset(new partsize_t[load_splitter.getMySize()]); + } for(partsize_t idx_part = 0 ; idx_part < partsize_t(load_splitter.getMySize()) ; ++idx_part){ split_particles_indexes[idx_part] = idx_part + partsize_t(load_splitter.getMyOffset()); } @@ -227,7 +296,7 @@ public: &split_particles_positions[previousOffset*size_particle_positions], partsize_t(load_splitter.getMySize())-previousOffset, [&](const real_number val[]){ - const real_number shiftPos = val[IDX_Z]-spatial_box_offset; + const real_number shiftPos = val[IDXC_Z]-spatial_box_offset; const real_number nbRepeat = floor(shiftPos/spatial_box_width); const real_number posInBox = shiftPos - (spatial_box_width*nbRepeat); return posInBox < limitPartitionShifted; @@ -254,17 +323,23 @@ public: // nb_particles_per_processes cannot be used after due to move nb_particles_for_me = exchanger.getTotalToRecv(); - my_particles_positions.reset(new real_number[exchanger.getTotalToRecv()*size_particle_positions]); + if(nb_particles_for_me){ + my_particles_positions.reset(new real_number[exchanger.getTotalToRecv()*size_particle_positions]); + } exchanger.alltoallv<real_number>(split_particles_positions.get(), my_particles_positions.get(), size_particle_positions); - split_particles_positions.release(); + delete[] split_particles_positions.release(); - my_particles_indexes.reset(new partsize_t[exchanger.getTotalToRecv()]); + if(nb_particles_for_me){ + my_particles_indexes.reset(new partsize_t[exchanger.getTotalToRecv()]); + } exchanger.alltoallv<partsize_t>(split_particles_indexes.get(), my_particles_indexes.get()); - split_particles_indexes.release(); + delete[] split_particles_indexes.release(); my_particles_rhs.resize(nb_rhs); for(int idx_rhs = 0 ; idx_rhs < int(nb_rhs) ; ++idx_rhs){ - my_particles_rhs[idx_rhs].reset(new real_number[exchanger.getTotalToRecv()*size_particle_rhs]); + if(nb_particles_for_me){ + my_particles_rhs[idx_rhs].reset(new real_number[exchanger.getTotalToRecv()*size_particle_rhs]); + } exchanger.alltoallv<real_number>(split_particles_rhs[idx_rhs].get(), my_particles_rhs[idx_rhs].get(), size_particle_rhs); } } @@ -272,6 +347,7 @@ public: { TIMEZONE("close"); int hdfret = H5Fclose(particle_file); + variable_used_only_in_assert(hdfret); assert(hdfret >= 0); hdfret = H5Pclose(plist_id_par); assert(hdfret >= 0); @@ -282,7 +358,7 @@ public: } partsize_t getTotalNbParticles() final{ - return partsize_t(nb_total_particles); + return partsize_t(total_number_of_particles); } partsize_t getLocalNbParticles() final{ @@ -294,7 +370,7 @@ public: } std::unique_ptr<real_number[]> getMyParticles() final { - assert(my_particles_positions != nullptr); + assert(my_particles_positions != nullptr || nb_particles_for_me == 0); return std::move(my_particles_positions); } @@ -304,9 +380,13 @@ public: } std::unique_ptr<partsize_t[]> getMyParticlesIndexes() final { - assert(my_particles_indexes != nullptr); + assert(my_particles_indexes != nullptr || nb_particles_for_me == 0); return std::move(my_particles_indexes); } + + std::vector<hsize_t> getParticleFileLayout(){ + return std::vector<hsize_t>(this->particle_file_layout); + } }; #endif diff --git a/bfps/cpp/particles/particles_output_hdf5.hpp b/cpp/particles/particles_output_hdf5.hpp similarity index 71% rename from bfps/cpp/particles/particles_output_hdf5.hpp rename to cpp/particles/particles_output_hdf5.hpp index bc0a03690293668203dd78978680fdea03ab3a28..6be651799f1bf98e3215cc3b0988b77975706b19 100644 --- a/bfps/cpp/particles/particles_output_hdf5.hpp +++ b/cpp/particles/particles_output_hdf5.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_OUTPUT_HDF5_HPP #define PARTICLES_OUTPUT_HDF5_HPP @@ -10,21 +35,19 @@ template <class partsize_t, class real_number, - int size_particle_positions, - int size_particle_rhs> + int size_particle_positions> class particles_output_hdf5 : public abstract_particles_output<partsize_t, real_number, - size_particle_positions, - size_particle_rhs>{ + size_particle_positions>{ using Parent = abstract_particles_output<partsize_t, real_number, - size_particle_positions, - size_particle_rhs>; + size_particle_positions>; - const std::string particle_species_name; + std::string particle_species_name; hid_t file_id; const partsize_t total_nb_particles; + std::vector<hsize_t> particle_file_layout; // to hold the shape of initial condition array hid_t dset_id_state; hid_t dset_id_rhs; @@ -39,8 +62,7 @@ public: const bool in_use_collective_io = false) : abstract_particles_output<partsize_t, real_number, - size_particle_positions, - size_particle_rhs>( + size_particle_positions>( in_mpi_com, inTotalNbParticles, in_nb_rhs), @@ -63,6 +85,7 @@ public: plist_id_par, Parent::getComWriter(), MPI_INFO_NULL); + variable_used_only_in_assert(retTest); assert(retTest >= 0); // Parallel HDF5 write @@ -90,11 +113,18 @@ public: ~particles_output_hdf5(){} + void update_particle_species_name( + const std::string new_name) + { + this->particle_species_name.assign(new_name); + } + int close_file(void){ if(Parent::isInvolved()){ TIMEZONE("particles_output_hdf5::close_file"); int rethdf = H5Gclose(dset_id_state); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); rethdf = H5Gclose(dset_id_rhs); @@ -177,7 +207,8 @@ public: const real_number* particles_positions, const std::unique_ptr<real_number[]>* particles_rhs, const partsize_t nb_particles, - const partsize_t particles_idx_offset) final{ + const partsize_t particles_idx_offset, + const int size_particle_rhs) final{ assert(Parent::isInvolved()); TIMEZONE("particles_output_hdf5::write"); @@ -194,16 +225,14 @@ public: assert(plist_id >= 0); { int rethdf = H5Pset_dxpl_mpio(plist_id, use_collective_io ? H5FD_MPIO_COLLECTIVE : H5FD_MPIO_INDEPENDENT); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); } { - assert(total_nb_particles >= 0); - assert(size_particle_positions >= 0); - const hsize_t datacount[2] = { - hsize_t(total_nb_particles), - hsize_t(size_particle_positions)}; - hid_t dataspace = H5Screate_simple(2, datacount, NULL); + std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout); + datacount.push_back(size_particle_positions); + hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL); assert(dataspace >= 0); hid_t dataset_id = H5Dcreate( dset_id_state, @@ -222,7 +251,12 @@ public: hid_t memspace = H5Screate_simple(2, count, NULL); assert(memspace >= 0); - hid_t filespace = H5Dget_space(dataset_id); + assert(total_nb_particles >= 0); + assert(size_particle_positions >= 0); + const hsize_t file_count[2] = {hsize_t(total_nb_particles), size_particle_positions}; + hid_t filespace = H5Screate_simple(2, file_count, NULL); + assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab( filespace, H5S_SELECT_SET, @@ -230,6 +264,7 @@ public: NULL, count, NULL); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); herr_t status = H5Dwrite( @@ -239,6 +274,7 @@ public: filespace, plist_id, particles_positions); + variable_used_only_in_assert(status); assert(status >= 0); rethdf = H5Sclose(memspace); assert(rethdf >= 0); @@ -249,10 +285,10 @@ public: } { assert(size_particle_rhs >= 0); - const hsize_t datacount[3] = {hsize_t(Parent::getNbRhs()), - hsize_t(total_nb_particles), - hsize_t(size_particle_rhs)}; - hid_t dataspace = H5Screate_simple(3, datacount, NULL); + std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout); + datacount.insert(datacount.begin(), hsize_t(Parent::getNbRhs())); + datacount.push_back(size_particle_positions); + hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL); assert(dataspace >= 0); hid_t dataset_id = H5Dcreate( dset_id_rhs, @@ -277,8 +313,12 @@ public: hid_t memspace = H5Screate_simple(3, count, NULL); assert(memspace >= 0); - hid_t filespace = H5Dget_space(dataset_id); + assert(total_nb_particles >= 0); + assert(size_particle_positions >= 0); + const hsize_t file_count[3] = {hsize_t(Parent::getNbRhs()), hsize_t(total_nb_particles), size_particle_positions}; + hid_t filespace = H5Screate_simple(3, file_count, NULL); assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab( filespace, H5S_SELECT_SET, @@ -286,6 +326,7 @@ public: NULL, count, NULL); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); herr_t status = H5Dwrite( @@ -295,6 +336,7 @@ public: filespace, plist_id, particles_rhs[idx_rhs].get()); + variable_used_only_in_assert(status); assert(status >= 0); rethdf = H5Sclose(filespace); assert(rethdf >= 0); @@ -302,14 +344,27 @@ public: assert(rethdf >= 0); } int rethdf = H5Dclose(dataset_id); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); } { int rethdf = H5Pclose(plist_id); + variable_used_only_in_assert(rethdf); assert(rethdf >= 0); } } + + int setParticleFileLayout(std::vector<hsize_t> input_layout){ + this->particle_file_layout.resize(input_layout.size()); + for (unsigned int i=0; i<this->particle_file_layout.size(); i++) + this->particle_file_layout[i] = input_layout[i]; + return EXIT_SUCCESS; + } + + std::vector<hsize_t> getParticleFileLayout(void){ + return std::vector<hsize_t>(this->particle_file_layout); + } }; #endif//PARTICLES_OUTPUT_HDF5_HPP diff --git a/bfps/cpp/particles/particles_output_mpiio.hpp b/cpp/particles/particles_output_mpiio.hpp similarity index 68% rename from bfps/cpp/particles/particles_output_mpiio.hpp rename to cpp/particles/particles_output_mpiio.hpp index 77dae6ca2f9441948ccf04f8a72e4a53d249894b..b1c17898c3c2941e0ed161e40113a0d13c99b524 100644 --- a/bfps/cpp/particles/particles_output_mpiio.hpp +++ b/cpp/particles/particles_output_mpiio.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_OUTPUT_MPIIO #define PARTICLES_OUTPUT_MPIIO @@ -11,8 +36,8 @@ #include "particles_utils.hpp" template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> -class particles_output_mpiio : public abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>{ - using Parent = abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>; +class particles_output_mpiio : public abstract_particles_output<partsize_t, real_number, size_particle_positions>{ + using Parent = abstract_particles_output<partsize_t, real_number, size_particle_positions>; const std::string filename; const int nb_step_prealloc; @@ -24,7 +49,7 @@ class particles_output_mpiio : public abstract_particles_output<partsize_t, real public: particles_output_mpiio(MPI_Comm in_mpi_com, const std::string in_filename, const partsize_t inTotalNbParticles, const int in_nb_rhs, const int in_nb_step_prealloc = -1) - : abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>(in_mpi_com, inTotalNbParticles, in_nb_rhs), + : abstract_particles_output<partsize_t, real_number, size_particle_positions>(in_mpi_com, inTotalNbParticles, in_nb_rhs), filename(in_filename), nb_step_prealloc(in_nb_step_prealloc), current_step_in_file(0){ if(Parent::isInvolved()){ { diff --git a/cpp/particles/particles_output_sampling_hdf5.hpp b/cpp/particles/particles_output_sampling_hdf5.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ea3c94dcec25572dd324c643d9cc80468ba51680 --- /dev/null +++ b/cpp/particles/particles_output_sampling_hdf5.hpp @@ -0,0 +1,293 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef PARTICLES_OUTPUT_SAMPLING_HDF5_HPP +#define PARTICLES_OUTPUT_SAMPLING_HDF5_HPP + +#include "abstract_particles_output.hpp" + +#include <hdf5.h> + +template <class partsize_t, + class real_number, + int size_particle_positions> +class particles_output_sampling_hdf5 : public abstract_particles_output< + partsize_t, + real_number, + size_particle_positions>{ + using Parent = abstract_particles_output<partsize_t, + real_number, + size_particle_positions>; + + hid_t file_id, pgroup_id; + + std::string dataset_name; + std::vector<hsize_t> particle_file_layout; // to hold the shape of initial condition array + const bool use_collective_io; + +public: + static bool DatasetExistsCol(MPI_Comm in_mpi_com, + const std::string& in_filename, + const std::string& in_groupname, + const std::string& in_dataset_name){ + int my_rank; + AssertMpi(MPI_Comm_rank(in_mpi_com, &my_rank)); + + int dataset_exists = -1; + + if(my_rank == 0){ + hid_t file_id = H5Fopen( + in_filename.c_str(), + H5F_ACC_RDWR | H5F_ACC_DEBUG, + H5P_DEFAULT); + assert(file_id >= 0); + + dataset_exists = H5Lexists( + file_id, + (in_groupname + "/" + in_dataset_name).c_str(), + H5P_DEFAULT); + + int retTest = H5Fclose(file_id); + assert(retTest >= 0); + } + + AssertMpi(MPI_Bcast( &dataset_exists, 1, MPI_INT, 0, in_mpi_com )); + return dataset_exists; + } + + particles_output_sampling_hdf5( + MPI_Comm in_mpi_com, + const partsize_t inTotalNbParticles, + const std::string& in_filename, + const std::string& in_groupname, + const std::string& in_dataset_name, + const bool in_use_collective_io = false) + : Parent(in_mpi_com, inTotalNbParticles, 1), + dataset_name(in_dataset_name), + use_collective_io(in_use_collective_io){ + if(Parent::isInvolved()){ + // prepare parallel MPI access property list + hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS); + assert(plist_id_par >= 0); + int retTest = H5Pset_fapl_mpio( + plist_id_par, + Parent::getComWriter(), + MPI_INFO_NULL); + variable_used_only_in_assert(retTest); + assert(retTest >= 0); + + // open file for parallel HDF5 access + file_id = H5Fopen( + in_filename.c_str(), + H5F_ACC_RDWR | H5F_ACC_DEBUG, + plist_id_par); + assert(file_id >= 0); + retTest = H5Pclose(plist_id_par); + assert(retTest >= 0); + + // open group + pgroup_id = H5Gopen( + file_id, + in_groupname.c_str(), + H5P_DEFAULT); + assert(pgroup_id >= 0); + } + } + + ~particles_output_sampling_hdf5(){ + if(Parent::isInvolved()){ + // close group + int retTest = H5Gclose(pgroup_id); + variable_used_only_in_assert(retTest); + assert(retTest >= 0); + // close file + retTest = H5Fclose(file_id); + assert(retTest >= 0); + } + } + + int switch_to_group( + const std::string &in_groupname) + { + if(Parent::isInvolved()){ + // close old group + int retTest = H5Gclose(pgroup_id); + variable_used_only_in_assert(retTest); + assert(retTest >= 0); + + // open new group + pgroup_id = H5Gopen( + file_id, + in_groupname.c_str(), + H5P_DEFAULT); + assert(pgroup_id >= 0); + } + return EXIT_SUCCESS; + } + + template <int size_particle_rhs> + int save_dataset( + const std::string& in_groupname, + const std::string& in_dataset_name, + const real_number input_particles_positions[], + const std::unique_ptr<real_number[]> input_particles_rhs[], + const partsize_t index_particles[], + const partsize_t nb_particles, + const int idx_time_step) + { + // update group + int retTest = this->switch_to_group( + in_groupname); + variable_used_only_in_assert(retTest); + assert(retTest == EXIT_SUCCESS); + // update dataset name + dataset_name = in_dataset_name + "/" + std::to_string(idx_time_step); + int dataset_exists; + if (this->getMyRank() == 0) + dataset_exists = H5Lexists( + pgroup_id, + dataset_name.c_str(), + H5P_DEFAULT); + AssertMpi(MPI_Bcast(&dataset_exists, 1, MPI_INT, 0, this->getCom())); + if (dataset_exists == 0) + this->template save<size_particle_rhs>( + input_particles_positions, + input_particles_rhs, + index_particles, + nb_particles, + idx_time_step); + return EXIT_SUCCESS; + } + + void write( + const int /*idx_time_step*/, + const real_number* /*particles_positions*/, + const std::unique_ptr<real_number[]>* particles_rhs, + const partsize_t nb_particles, + const partsize_t particles_idx_offset, + const int size_particle_rhs) final{ + assert(Parent::isInvolved()); + + TIMEZONE("particles_output_hdf5::write"); + + assert(particles_idx_offset < Parent::getTotalNbParticles() || + (particles_idx_offset == Parent::getTotalNbParticles() && + nb_particles == 0)); + assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles()); + + static_assert(std::is_same<real_number, double>::value || + std::is_same<real_number, float>::value, + "real_number must be double or float"); + const hid_t type_id = (sizeof(real_number) == 8 ? + H5T_NATIVE_DOUBLE : + H5T_NATIVE_FLOAT); + + hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); + assert(plist_id >= 0); + { + int rethdf = H5Pset_dxpl_mpio( + plist_id, + (use_collective_io ? + H5FD_MPIO_COLLECTIVE : + H5FD_MPIO_INDEPENDENT)); + variable_used_only_in_assert(rethdf); + assert(rethdf >= 0); + } + { + assert(size_particle_rhs >= 0); + std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout); + datacount.push_back(size_particle_positions); + hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL); + assert(dataspace >= 0); + + hid_t dataset_id = H5Dcreate( pgroup_id, + dataset_name.c_str(), + type_id, + dataspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + assert(dataset_id >= 0); + + assert(particles_idx_offset >= 0); + const hsize_t count[2] = { + hsize_t(nb_particles), + hsize_t(size_particle_rhs)}; + const hsize_t offset[2] = { + hsize_t(particles_idx_offset), + 0}; + hid_t memspace = H5Screate_simple(2, count, NULL); + assert(memspace >= 0); + + const hsize_t file_count[2] = {hsize_t(Parent::getTotalNbParticles()), hsize_t(size_particle_rhs)}; + hid_t filespace = H5Screate_simple(2, file_count, NULL); + assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab( + filespace, + H5S_SELECT_SET, + offset, + NULL, + count, + NULL); + variable_used_only_in_assert(rethdf); + assert(rethdf >= 0); + + herr_t status = H5Dwrite( + dataset_id, + type_id, + memspace, + filespace, + plist_id, + particles_rhs[0].get()); + variable_used_only_in_assert(status); + assert(status >= 0); + rethdf = H5Sclose(filespace); + assert(rethdf >= 0); + rethdf = H5Sclose(memspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dataset_id); + assert(rethdf >= 0); + } + + { + int rethdf = H5Pclose(plist_id); + variable_used_only_in_assert(rethdf); + assert(rethdf >= 0); + } + } + + int setParticleFileLayout(std::vector<hsize_t> input_layout){ + this->particle_file_layout.resize(input_layout.size()); + for (unsigned int i=0; i<this->particle_file_layout.size(); i++) + this->particle_file_layout[i] = input_layout[i]; + return EXIT_SUCCESS; + } + + std::vector<hsize_t> getParticleFileLayout(void){ + return std::vector<hsize_t>(this->particle_file_layout); + } +}; + +#endif diff --git a/cpp/particles/particles_sampling.hpp b/cpp/particles/particles_sampling.hpp new file mode 100644 index 0000000000000000000000000000000000000000..672c080aea1b59e250109f94fd5fee388e199755 --- /dev/null +++ b/cpp/particles/particles_sampling.hpp @@ -0,0 +1,109 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef PARTICLES_SAMPLING_HPP +#define PARTICLES_SAMPLING_HPP + +#include <memory> +#include <string> + +#include "abstract_particles_system.hpp" +#include "particles_output_sampling_hdf5.hpp" + +#include "field.hpp" +#include "kspace.hpp" + + +template <class partsize_t, class particles_rnumber, class rnumber, field_backend be, field_components fc> +void sample_from_particles_system(const field<rnumber, be, fc>& in_field, // a pointer to a field<rnumber, FFTW, fc> + std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>& ps, // a pointer to an particles_system<double> + const std::string& filename, + const std::string& parent_groupname, + const std::string& fname){ + const std::string datasetname = fname + std::string("/") + std::to_string(ps->get_step_idx()); + const int size_particle_rhs = ncomp(fc); + + // Stop here if already exists + if(particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3>::DatasetExistsCol(MPI_COMM_WORLD, + filename, + parent_groupname, + datasetname)){ + return; + } + + const partsize_t nb_particles = ps->getLocalNbParticles(); + std::unique_ptr<particles_rnumber[]> sample_rhs(new particles_rnumber[size_particle_rhs*nb_particles]); + std::fill_n(sample_rhs.get(), size_particle_rhs*nb_particles, 0); + + ps->sample_compute_field(in_field, sample_rhs.get()); + + + + particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3> outputclass(MPI_COMM_WORLD, + ps->getGlobalNbParticles(), + filename, + parent_groupname, + datasetname); + outputclass.template save<size_particle_rhs>(ps->getParticlesState(), + &sample_rhs, + ps->getParticlesIndexes(), + ps->getLocalNbParticles(), + ps->get_step_idx()); +} + +template <class partsize_t, class particles_rnumber> +void sample_particles_system_position( + std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>& ps, // a pointer to an particles_system<double> + const std::string& filename, + const std::string& parent_groupname, + const std::string& fname){ + const std::string datasetname = fname + std::string("/") + std::to_string(ps->get_step_idx()); + + // Stop here if already exists + if(particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3>::DatasetExistsCol(MPI_COMM_WORLD, + filename, + parent_groupname, + datasetname)){ + return; + } + + const partsize_t nb_particles = ps->getLocalNbParticles(); + std::unique_ptr<particles_rnumber[]> sample_rhs(new particles_rnumber[3*nb_particles]); + std::copy(ps->getParticlesState(), ps->getParticlesState() + 3*nb_particles, sample_rhs.get()); + + particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3> outputclass(MPI_COMM_WORLD, + ps->getGlobalNbParticles(), + filename, + parent_groupname, + datasetname); + outputclass.template save<3>(ps->getParticlesState(), + &sample_rhs, + ps->getParticlesIndexes(), + ps->getLocalNbParticles(), + ps->get_step_idx()); +} + +#endif//PARTICLES_SAMPLING_HPP + diff --git a/bfps/cpp/particles/particles_system.hpp b/cpp/particles/particles_system.hpp similarity index 55% rename from bfps/cpp/particles/particles_system.hpp rename to cpp/particles/particles_system.hpp index 02767a8b433ecb8365f4a0577d1c0d6508c2bed1..a05175ca52c4c4b669f29f893913b3d7fcf6c484 100644 --- a/bfps/cpp/particles/particles_system.hpp +++ b/cpp/particles/particles_system.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_SYSTEM_HPP #define PARTICLES_SYSTEM_HPP @@ -12,9 +37,13 @@ #include "particles_adams_bashforth.hpp" #include "scope_timer.hpp" +#include "p2p_distr_mpi.hpp" + template <class partsize_t, class real_number, class field_rnumber, class field_class, class interpolator_class, int interp_neighbours, - int size_particle_rhs> + int size_particle_positions, int size_particle_rhs, class p2p_computer_class, class particles_inner_computer_class> class particles_system : public abstract_particles_system<partsize_t, real_number> { + static_assert(size_particle_positions >= 3, "There should be at least the positions X,Y,Z in the state"); + MPI_Comm mpi_com; const std::pair<int,int> current_partition_interval; @@ -24,12 +53,12 @@ class particles_system : public abstract_particles_system<partsize_t, real_numbe particles_distr_mpi<partsize_t, real_number> particles_distr; - particles_adams_bashforth<partsize_t, real_number, 3, size_particle_rhs> positions_updater; + particles_adams_bashforth<partsize_t, real_number, size_particle_positions, size_particle_rhs> positions_updater; using computer_class = particles_field_computer<partsize_t, real_number, interpolator_class, interp_neighbours>; computer_class computer; - field_class default_field; + const field_class& default_field; std::unique_ptr<partsize_t[]> current_my_nb_particles_per_partition; std::unique_ptr<partsize_t[]> current_offset_particles_for_partition; @@ -44,9 +73,14 @@ class particles_system : public abstract_particles_system<partsize_t, real_numbe partsize_t my_nb_particles; const partsize_t total_nb_particles; std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; + std::vector<hsize_t> particle_file_layout; int step_idx; + p2p_distr_mpi<partsize_t, real_number> distr_p2p; + p2p_computer_class computer_p2p; + particles_inner_computer_class computer_particules_inner; + public: particles_system(const std::array<size_t,3>& field_grid_dim, const std::array<real_number,3>& in_spatial_box_width, const std::array<real_number,3>& in_spatial_box_offset, @@ -57,9 +91,12 @@ public: const field_class& in_field, MPI_Comm in_mpi_com, const partsize_t in_total_nb_particles, + const real_number in_cutoff, + p2p_computer_class in_computer_p2p, + particles_inner_computer_class in_computer_particules_inner, const int in_current_iteration = 1) : mpi_com(in_mpi_com), - current_partition_interval({in_local_field_offset[IDX_Z], in_local_field_offset[IDX_Z] + in_local_field_dims[IDX_Z]}), + current_partition_interval({in_local_field_offset[IDXC_Z], in_local_field_offset[IDXC_Z] + in_local_field_dims[IDXC_Z]}), partition_interval_size(current_partition_interval.second - current_partition_interval.first), interpolator(), particles_distr(in_mpi_com, current_partition_interval,field_grid_dim), @@ -69,7 +106,9 @@ public: default_field(in_field), spatial_box_width(in_spatial_box_width), spatial_partition_width(in_spatial_partition_width), my_spatial_low_limit(in_my_spatial_low_limit), my_spatial_up_limit(in_my_spatial_up_limit), - my_nb_particles(0), total_nb_particles(in_total_nb_particles), step_idx(in_current_iteration){ + my_nb_particles(0), total_nb_particles(in_total_nb_particles), step_idx(in_current_iteration), + distr_p2p(in_mpi_com, current_partition_interval,field_grid_dim, spatial_box_width, in_spatial_box_offset, in_cutoff), + computer_p2p(std::move(in_computer_p2p)), computer_particules_inner(std::move(in_computer_particules_inner)){ current_my_nb_particles_per_partition.reset(new partsize_t[partition_interval_size]); current_offset_particles_for_partition.reset(new partsize_t[partition_interval_size+1]); @@ -87,15 +126,16 @@ public: my_nb_particles = particles_input.getLocalNbParticles(); for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me - const int partition_level = computer.pbc_field_layer(my_particles_positions[idx_part*3+IDX_Z], IDX_Z); + const int partition_level = computer.pbc_field_layer(my_particles_positions[idx_part*size_particle_positions+IDXC_Z], IDXC_Z); + variable_used_only_in_assert(partition_level); assert(partition_level >= current_partition_interval.first); assert(partition_level < current_partition_interval.second); } - particles_utils::partition_extra_z<partsize_t, 3>(&my_particles_positions[0], my_nb_particles, partition_interval_size, + particles_utils::partition_extra_z<partsize_t, size_particle_positions>(&my_particles_positions[0], my_nb_particles, partition_interval_size, current_my_nb_particles_per_partition.get(), current_offset_particles_for_partition.get(), [&](const real_number& z_pos){ - const int partition_level = computer.pbc_field_layer(z_pos, IDX_Z); + const int partition_level = computer.pbc_field_layer(z_pos, IDXC_Z); assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); return partition_level - current_partition_interval.first; }, @@ -114,16 +154,15 @@ public: assert(current_my_nb_particles_per_partition[idxPartition] == current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ - assert(computer.pbc_field_layer(my_particles_positions[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); + assert(computer.pbc_field_layer(my_particles_positions[idx*size_particle_positions+IDXC_Z], IDXC_Z)-current_partition_interval.first == idxPartition); } } } } - void compute() final { TIMEZONE("particles_system::compute"); - particles_distr.template compute_distr<computer_class, field_class, 3, size_particle_rhs>( + particles_distr.template compute_distr<computer_class, field_class, size_particle_positions, size_particle_rhs>( computer, default_field, current_my_nb_particles_per_partition.get(), my_particles_positions.get(), @@ -131,11 +170,64 @@ public: interp_neighbours); } + void compute_p2p() final { + // TODO P2P + if(computer_p2p.isEnable() == true){ + TIMEZONE("particles_system::compute_p2p"); + distr_p2p.template compute_distr<p2p_computer_class, size_particle_positions, size_particle_rhs>( + computer_p2p, current_my_nb_particles_per_partition.get(), + my_particles_positions.get(), my_particles_rhs.front().get(), + my_particles_positions_indexes.get()); + } + } + + void compute_particles_inner() final { + if(computer_particules_inner.isEnable() == true){ + TIMEZONE("particles_system::compute_particles_inner"); + computer_particules_inner.template compute_interaction<size_particle_positions, size_particle_rhs>( + my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get()); + } + } + + void add_Lagrange_multipliers() final { + if(computer_particules_inner.isEnable() == true){ + TIMEZONE("particles_system::add_Lagrange_multipliers"); + computer_particules_inner.template add_Lagrange_multipliers<size_particle_positions, size_particle_rhs>( + my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get()); + } + } + + void enforce_unit_orientation() final { + if(computer_particules_inner.isEnable() == true){ + TIMEZONE("particles_system::enforce_unit_orientation"); + computer_particules_inner.template enforce_unit_orientation<size_particle_positions>( + my_nb_particles, my_particles_positions.get()); + } + } + + void compute_sphere_particles_inner(const real_number particle_extra_field[]) final { + if(computer_particules_inner.isEnable() == true){ + TIMEZONE("particles_system::compute_sphere_particles_inner"); + computer_particules_inner.template compute_interaction_with_extra<size_particle_positions, size_particle_rhs, 3>( + my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get(), + particle_extra_field); + } + } + + void compute_ellipsoid_particles_inner(const real_number particle_extra_field[]) final { + if(computer_particules_inner.isEnable() == true){ + TIMEZONE("particles_system::compute_ellipsoid_particles_inner"); + computer_particules_inner.template compute_interaction_with_extra<size_particle_positions, size_particle_rhs, 9>( + my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get(), + particle_extra_field); + } + } + template <class sample_field_class, int sample_size_particle_rhs> void sample_compute(const sample_field_class& sample_field, real_number sample_rhs[]) { TIMEZONE("particles_system::compute"); - particles_distr.template compute_distr<computer_class, sample_field_class, 3, sample_size_particle_rhs>( + particles_distr.template compute_distr<computer_class, sample_field_class, size_particle_positions, sample_size_particle_rhs>( computer, sample_field, current_my_nb_particles_per_partition.get(), my_particles_positions.get(), @@ -179,7 +271,7 @@ public: void redistribute() final { TIMEZONE("particles_system::redistribute"); - particles_distr.template redistribute<computer_class, 3, size_particle_rhs, 1>( + particles_distr.template redistribute<computer_class, size_particle_positions, size_particle_rhs, 1>( computer, current_my_nb_particles_per_partition.get(), &my_nb_particles, @@ -210,16 +302,61 @@ public: void completeLoop(const real_number dt) final { TIMEZONE("particles_system::completeLoop"); compute(); + compute_p2p(); + compute_particles_inner(); move(dt); + enforce_unit_orientation(); redistribute(); inc_step_idx(); shift_rhs_vectors(); } - const real_number* getParticlesPositions() const final { + void completeLoopWithVorticity( + const real_number dt, + const real_number particle_extra_field[]) final { + TIMEZONE("particles_system::completeLoopWithVorticity"); + compute(); + compute_p2p(); + compute_sphere_particles_inner(particle_extra_field); + move(dt); + enforce_unit_orientation(); + redistribute(); + inc_step_idx(); + shift_rhs_vectors(); + } + + void completeLoopWithVelocityGradient( + const real_number dt, + const real_number particle_extra_field[]) final { + TIMEZONE("particles_system::completeLoopWithVelocityGradient"); + compute(); + compute_p2p(); + compute_ellipsoid_particles_inner(particle_extra_field); + move(dt); + enforce_unit_orientation(); + redistribute(); + inc_step_idx(); + shift_rhs_vectors(); + } + + const real_number* getParticlesState() const final { return my_particles_positions.get(); } + std::unique_ptr<real_number[]> extractParticlesState(const int firstState, const int lastState) const final { + const int nbStates = std::max(0,(std::min(lastState,size_particle_positions)-firstState)); + + std::unique_ptr<real_number[]> stateExtract(new real_number[my_nb_particles*nbStates]); + + for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ + for(int idxState = 0 ; idxState < nbStates ; ++idxState){ + stateExtract[idx_part*nbStates + idxState] = my_particles_positions[idx_part*size_particle_positions + idxState+firstState]; + } + } + + return stateExtract; + } + const std::unique_ptr<real_number[]>* getParticlesRhs() const final { return my_particles_rhs.data(); } @@ -240,11 +377,22 @@ public: return int(my_particles_rhs.size()); } + int setParticleFileLayout(std::vector<hsize_t> input_layout) final{ + this->particle_file_layout.resize(input_layout.size()); + for (unsigned int i=0; i<this->particle_file_layout.size(); i++) + this->particle_file_layout[i] = input_layout[i]; + return EXIT_SUCCESS; + } + + std::vector<hsize_t> getParticleFileLayout(void) final{ + return std::vector<hsize_t>(this->particle_file_layout); + } + void checkNan() const { // TODO remove for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me - assert(std::isnan(my_particles_positions[idx_part*3+IDX_X]) == false); - assert(std::isnan(my_particles_positions[idx_part*3+IDX_Y]) == false); - assert(std::isnan(my_particles_positions[idx_part*3+IDX_Z]) == false); + assert(std::isnan(my_particles_positions[idx_part*size_particle_positions+IDXC_X]) == false); + assert(std::isnan(my_particles_positions[idx_part*size_particle_positions+IDXC_Y]) == false); + assert(std::isnan(my_particles_positions[idx_part*size_particle_positions+IDXC_Z]) == false); for(int idx_rhs = 0 ; idx_rhs < my_particles_rhs.size() ; ++idx_rhs){ for(int idx_rhs_val = 0 ; idx_rhs_val < size_particle_rhs ; ++idx_rhs_val){ diff --git a/bfps/cpp/particles/particles_system_builder.hpp b/cpp/particles/particles_system_builder.hpp similarity index 57% rename from bfps/cpp/particles/particles_system_builder.hpp rename to cpp/particles/particles_system_builder.hpp index 7a2d49c07c3a6de21fb93d83b338609be858f0dc..6a6f4a26f3f0f08703b1a1ed8857f2445b641ac7 100644 --- a/bfps/cpp/particles/particles_system_builder.hpp +++ b/cpp/particles/particles_system_builder.hpp @@ -1,12 +1,40 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_SYSTEM_BUILDER_HPP #define PARTICLES_SYSTEM_BUILDER_HPP #include <string> +#include <cmath> #include "abstract_particles_system.hpp" #include "particles_system.hpp" #include "particles_input_hdf5.hpp" #include "particles_generic_interp.hpp" +#include "p2p_computer_empty.hpp" +#include "particles_inner_computer_empty.hpp" #include "field.hpp" #include "kspace.hpp" @@ -108,7 +136,8 @@ inline RetType evaluate(IterType1 value1, IterType2 value2, Args... args){ /// ////////////////////////////////////////////////////////////////////////////// -template <class partsize_t, class field_rnumber, field_backend be, field_components fc, class particles_rnumber> +template <class partsize_t, class field_rnumber, field_backend be, field_components fc, class particles_rnumber, class p2p_computer_class, + class particles_inner_computer_class, int size_particle_positions, int size_particle_rhs> struct particles_system_build_container { template <const int interpolation_size, const int spline_mode> static std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> instanciate( @@ -119,25 +148,28 @@ struct particles_system_build_container { const std::string& fname_input, // particles input filename const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names MPI_Comm mpi_comm, - const int in_current_iteration){ + const int in_current_iteration, + p2p_computer_class p2p_computer, + particles_inner_computer_class inner_computer, + const particles_rnumber cutoff = std::numeric_limits<particles_rnumber>::max()){ // The size of the field grid (global size) all_size seems std::array<size_t,3> field_grid_dim; - field_grid_dim[IDX_X] = fs_field->rlayout->sizes[FIELD_IDX_X];// nx - field_grid_dim[IDX_Y] = fs_field->rlayout->sizes[FIELD_IDX_Y];// nx - field_grid_dim[IDX_Z] = fs_field->rlayout->sizes[FIELD_IDX_Z];// nz + field_grid_dim[IDXC_X] = fs_field->rlayout->sizes[IDXV_X];// nx + field_grid_dim[IDXC_Y] = fs_field->rlayout->sizes[IDXV_Y];// nx + field_grid_dim[IDXC_Z] = fs_field->rlayout->sizes[IDXV_Z];// nz // The size of the local field grid (the field nodes that belong to current process) std::array<size_t,3> local_field_dims; - local_field_dims[IDX_X] = fs_field->rlayout->subsizes[FIELD_IDX_X]; - local_field_dims[IDX_Y] = fs_field->rlayout->subsizes[FIELD_IDX_Y]; - local_field_dims[IDX_Z] = fs_field->rlayout->subsizes[FIELD_IDX_Z]; + local_field_dims[IDXC_X] = fs_field->rlayout->subsizes[IDXV_X]; + local_field_dims[IDXC_Y] = fs_field->rlayout->subsizes[IDXV_Y]; + local_field_dims[IDXC_Z] = fs_field->rlayout->subsizes[IDXV_Z]; // The offset of the local field grid std::array<size_t,3> local_field_offset; - local_field_offset[IDX_X] = fs_field->rlayout->starts[FIELD_IDX_X]; - local_field_offset[IDX_Y] = fs_field->rlayout->starts[FIELD_IDX_Y]; - local_field_offset[IDX_Z] = fs_field->rlayout->starts[FIELD_IDX_Z]; + local_field_offset[IDXC_X] = fs_field->rlayout->starts[IDXV_X]; + local_field_offset[IDXC_Y] = fs_field->rlayout->starts[IDXV_Y]; + local_field_offset[IDXC_Z] = fs_field->rlayout->starts[IDXV_Z]; // Retreive split from fftw to know processes that have no work @@ -145,57 +177,60 @@ struct particles_system_build_container { AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank)); AssertMpi(MPI_Comm_size(mpi_comm, &nb_processes)); - const int split_step = (int(field_grid_dim[IDX_Z])+nb_processes-1)/nb_processes; - const int nb_processes_involved = (int(field_grid_dim[IDX_Z])+split_step-1)/split_step; + const int split_step = (int(field_grid_dim[IDXC_Z])+nb_processes-1)/nb_processes; + const int nb_processes_involved = (int(field_grid_dim[IDXC_Z])+split_step-1)/split_step; - assert((my_rank < nb_processes_involved && local_field_dims[IDX_Z] != 0) - || (nb_processes_involved <= my_rank && local_field_dims[IDX_Z] == 0)); - assert(nb_processes_involved <= int(field_grid_dim[IDX_Z])); + assert((my_rank < nb_processes_involved && local_field_dims[IDXC_Z] != 0) + || (nb_processes_involved <= my_rank && local_field_dims[IDXC_Z] == 0)); + assert(nb_processes_involved <= int(field_grid_dim[IDXC_Z])); // Make the idle processes starting from the limit (and not 0 as set by fftw) if(nb_processes_involved <= my_rank){ - local_field_offset[IDX_Z] = field_grid_dim[IDX_Z]; + local_field_offset[IDXC_Z] = field_grid_dim[IDXC_Z]; } // Ensure that 1D partitioning is used { - assert(local_field_offset[IDX_X] == 0); - assert(local_field_offset[IDX_Y] == 0); - assert(local_field_dims[IDX_X] == field_grid_dim[IDX_X]); - assert(local_field_dims[IDX_Y] == field_grid_dim[IDX_Y]); - - assert(my_rank >= nb_processes_involved || ((my_rank == 0 && local_field_offset[IDX_Z] == 0) - || (my_rank != 0 && local_field_offset[IDX_Z] != 0))); - assert(my_rank >= nb_processes_involved || ((my_rank == nb_processes_involved-1 && local_field_offset[IDX_Z]+local_field_dims[IDX_Z] == field_grid_dim[IDX_Z]) - || (my_rank != nb_processes_involved-1 && local_field_offset[IDX_Z]+local_field_dims[IDX_Z] != field_grid_dim[IDX_Z]))); + assert(local_field_offset[IDXC_X] == 0); + assert(local_field_offset[IDXC_Y] == 0); + assert(local_field_dims[IDXC_X] == field_grid_dim[IDXC_X]); + assert(local_field_dims[IDXC_Y] == field_grid_dim[IDXC_Y]); + + assert(my_rank >= nb_processes_involved || ((my_rank == 0 && local_field_offset[IDXC_Z] == 0) + || (my_rank != 0 && local_field_offset[IDXC_Z] != 0))); + assert(my_rank >= nb_processes_involved || ((my_rank == nb_processes_involved-1 && local_field_offset[IDXC_Z]+local_field_dims[IDXC_Z] == field_grid_dim[IDXC_Z]) + || (my_rank != nb_processes_involved-1 && local_field_offset[IDXC_Z]+local_field_dims[IDXC_Z] != field_grid_dim[IDXC_Z]))); } // The spatial box size (all particles should be included inside) std::array<particles_rnumber,3> spatial_box_width; - spatial_box_width[IDX_X] = 4 * acos(0) / (fs_kk->dkx); - spatial_box_width[IDX_Y] = 4 * acos(0) / (fs_kk->dky); - spatial_box_width[IDX_Z] = 4 * acos(0) / (fs_kk->dkz); + spatial_box_width[IDXC_X] = 4 * acos(0) / (fs_kk->dkx); + spatial_box_width[IDXC_Y] = 4 * acos(0) / (fs_kk->dky); + spatial_box_width[IDXC_Z] = 4 * acos(0) / (fs_kk->dkz); // Box is in the corner std::array<particles_rnumber,3> spatial_box_offset; - spatial_box_offset[IDX_X] = 0; - spatial_box_offset[IDX_Y] = 0; - spatial_box_offset[IDX_Z] = 0; + spatial_box_offset[IDXC_X] = 0; + spatial_box_offset[IDXC_Y] = 0; + spatial_box_offset[IDXC_Z] = 0; // The distance between two field nodes in z std::array<particles_rnumber,3> spatial_partition_width; - spatial_partition_width[IDX_X] = spatial_box_width[IDX_X]/particles_rnumber(field_grid_dim[IDX_X]); - spatial_partition_width[IDX_Y] = spatial_box_width[IDX_Y]/particles_rnumber(field_grid_dim[IDX_Y]); - spatial_partition_width[IDX_Z] = spatial_box_width[IDX_Z]/particles_rnumber(field_grid_dim[IDX_Z]); + spatial_partition_width[IDXC_X] = spatial_box_width[IDXC_X]/particles_rnumber(field_grid_dim[IDXC_X]); + spatial_partition_width[IDXC_Y] = spatial_box_width[IDXC_Y]/particles_rnumber(field_grid_dim[IDXC_Y]); + spatial_partition_width[IDXC_Z] = spatial_box_width[IDXC_Z]/particles_rnumber(field_grid_dim[IDXC_Z]); // The spatial interval of the current process - const particles_rnumber my_spatial_low_limit_z = particles_rnumber(local_field_offset[IDX_Z])*spatial_partition_width[IDX_Z]; - const particles_rnumber my_spatial_up_limit_z = particles_rnumber(local_field_offset[IDX_Z]+local_field_dims[IDX_Z])*spatial_partition_width[IDX_Z]; + const particles_rnumber my_spatial_low_limit_z = particles_rnumber(local_field_offset[IDXC_Z])*spatial_partition_width[IDXC_Z]; + const particles_rnumber my_spatial_up_limit_z = particles_rnumber(local_field_offset[IDXC_Z]+local_field_dims[IDXC_Z])*spatial_partition_width[IDXC_Z]; // Create the particles system using particles_system_type = particles_system<partsize_t, particles_rnumber, field_rnumber, field<field_rnumber, be, fc>, particles_generic_interp<particles_rnumber, interpolation_size,spline_mode>, - interpolation_size, ncomp(fc)>; + interpolation_size, + size_particle_positions, size_particle_rhs, + p2p_computer_class, + particles_inner_computer_class>; particles_system_type* part_sys = new particles_system_type(field_grid_dim, spatial_box_width, spatial_box_offset, @@ -207,10 +242,14 @@ struct particles_system_build_container { (*fs_field), mpi_comm, nparticles, + cutoff, + p2p_computer, + inner_computer, in_current_iteration); + // TODO P2P load particle data too // Load particles from hdf5 - particles_input_hdf5<partsize_t, particles_rnumber, 3,3> generator(mpi_comm, fname_input, + particles_input_hdf5<partsize_t, particles_rnumber, size_particle_positions, size_particle_rhs> generator(mpi_comm, fname_input, inDatanameState, inDatanameRhs, my_spatial_low_limit_z, my_spatial_up_limit_z); // Ensure parameters match the input file @@ -229,6 +268,9 @@ struct particles_system_build_container { assert(part_sys->getNbRhs() == nsteps); + // store particle file layout + part_sys->setParticleFileLayout(generator.getParticleFileLayout()); + // Return the created particles system return std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>(part_sys); } @@ -250,10 +292,44 @@ inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>, int, 1, 11, 1, // interpolation_size int, 0, 3, 1, // spline_mode - particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber>>( + particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber, + p2p_computer_empty<particles_rnumber,partsize_t>, + particles_inner_computer_empty<particles_rnumber,partsize_t>, + 3,3>>( + interpolation_size, // template iterator 1 + spline_mode, // template iterator 2 + fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration, + p2p_computer_empty<particles_rnumber,partsize_t>(), particles_inner_computer_empty<particles_rnumber,partsize_t>()); +} + +template <class partsize_t, class field_rnumber, field_backend be, field_components fc, + class p2p_computer_class, class particles_inner_computer_class, + class particles_rnumber = double> +inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> particles_system_builder_with_p2p( + const field<field_rnumber, be, fc>* fs_field, // (field object) + const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz) + const int nsteps, // to check coherency between parameters and hdf input file (nb rhs) + const partsize_t nparticles, // to check coherency between parameters and hdf input file + const std::string& fname_input, // particles input filename + const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names + const int interpolation_size, + const int spline_mode, + MPI_Comm mpi_comm, + const int in_current_iteration, + p2p_computer_class p2p_computer, + particles_inner_computer_class inner_computer, + const particles_rnumber cutoff){ + return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>, + int, 1, 11, 1, // interpolation_size + int, 0, 3, 1, // spline_mode + particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber, + p2p_computer_class, + particles_inner_computer_class, + 6,6>>( interpolation_size, // template iterator 1 spline_mode, // template iterator 2 - fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration); + fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration, + std::move(p2p_computer), std::move(inner_computer), cutoff); } diff --git a/bfps/cpp/particles/particles_utils.hpp b/cpp/particles/particles_utils.hpp similarity index 82% rename from bfps/cpp/particles/particles_utils.hpp rename to cpp/particles/particles_utils.hpp index 146dc4399477b72c30329edff587d35d7b44d69d..f1e0c790cd9c02ffb714bb555455662134346ee4 100644 --- a/bfps/cpp/particles/particles_utils.hpp +++ b/cpp/particles/particles_utils.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef PARTICLES_UTILS_HPP #define PARTICLES_UTILS_HPP @@ -19,16 +44,28 @@ #define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); } #endif -enum IDXS_3D { - IDX_X = 0, - IDX_Y = 1, - IDX_Z = 2 +enum IDX_COMPONENT_3D { + IDXC_X = 0, + IDXC_Y = 1, + IDXC_Z = 2 +}; + +enum IDX_COMPONENT_DEL_3D { + IDXC_DX_X = 0, + IDXC_DX_Y = 1, + IDXC_DX_Z = 2, + IDXC_DY_X = 3, + IDXC_DY_Y = 4, + IDXC_DY_Z = 5, + IDXC_DZ_X = 6, + IDXC_DZ_Y = 7, + IDXC_DZ_Z = 8, }; -enum FIELD_IDXS_3D { - FIELD_IDX_X = 2, - FIELD_IDX_Y = 1, - FIELD_IDX_Z = 0 +enum IDX_VARIABLE_3D { + IDXV_X = 2, + IDXV_Y = 1, + IDXV_Z = 0 }; namespace particles_utils { @@ -123,7 +160,7 @@ inline void partition_extra_z(real_number* array, const partsize_t size, const i if(nb_partitions == 2){ const partsize_t size_current = partition_extra<partsize_t, nb_values>(array, size, [&](const real_number inval[]){ - return partitions_levels(inval[IDX_Z]) == 0; + return partitions_levels(inval[IDXC_Z]) == 0; }, pdcswap); partitions_size[0] = size_current; partitions_size[1] = size-size_current; @@ -152,7 +189,7 @@ inline void partition_extra_z(real_number* array, const partsize_t size, const i const partsize_t size_current = partition_extra<partsize_t, nb_values>(&array[partitions_offset[current_part.first]*nb_values], size_unpart, [&](const real_number inval[]){ - return partitions_levels(inval[IDX_Z]) <= idx_middle; + return partitions_levels(inval[IDXC_Z]) <= idx_middle; }, pdcswap, partitions_offset[current_part.first]); partitions_offset[idx_middle+1] = size_current + partitions_offset[current_part.first]; diff --git a/bfps/cpp/scope_timer.cpp b/cpp/scope_timer.cpp similarity index 100% rename from bfps/cpp/scope_timer.cpp rename to cpp/scope_timer.cpp diff --git a/bfps/cpp/scope_timer.hpp b/cpp/scope_timer.hpp similarity index 99% rename from bfps/cpp/scope_timer.hpp rename to cpp/scope_timer.hpp index 2c48e2eda06ded74e668825181f0444eef22f647..890f522c415d7a102a0fff25c5292502cbcb459c 100644 --- a/bfps/cpp/scope_timer.hpp +++ b/cpp/scope_timer.hpp @@ -791,7 +791,8 @@ extern EventManager global_timer_manager; #define TIMEZONE(NAME) \ ScopeEvent TIMEZONE_Core_Pre_Merge(____TIMEZONE_AUTO_ID, __LINE__)( \ - NAME, global_timer_manager, ScopeEventUniqueKey); + NAME, global_timer_manager, ScopeEventUniqueKey); \ + DEBUG_MSG((NAME + std::string("\n")).c_str()); #define TIMEZONE_MULTI_REF(NAME) \ ScopeEvent TIMEZONE_Core_Pre_Merge(____TIMEZONE_AUTO_ID, __LINE__)( \ NAME, global_timer_manager, ScopeEventMultiRefKey); diff --git a/bfps/cpp/shared_array.hpp b/cpp/shared_array.hpp similarity index 62% rename from bfps/cpp/shared_array.hpp rename to cpp/shared_array.hpp index 1951e2f9838ccf37367d859206453d3db91e8e19..0245dc5df81e5bd1511b57583b9a4a86745a5d2c 100644 --- a/bfps/cpp/shared_array.hpp +++ b/cpp/shared_array.hpp @@ -1,3 +1,28 @@ +/****************************************************************************** +* * +* Copyright 2016 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + #ifndef SHAREDARRAY_HPP #define SHAREDARRAY_HPP diff --git a/cpp/spline.hpp b/cpp/spline.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ef990088566ec10f0bbf10937980705ffeb570dc --- /dev/null +++ b/cpp/spline.hpp @@ -0,0 +1,40 @@ +/****************************************************************************** +* * +* Copyright 2017 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + + +#ifndef SPLINE_HPP +#define SPLINE_HPP + +#include "spline_n1.hpp" +#include "spline_n2.hpp" +#include "spline_n3.hpp" +#include "spline_n4.hpp" +#include "spline_n5.hpp" +#include "spline_n6.hpp" +#include "spline_n7.hpp" +#include "spline_n8.hpp" +#include "spline_n9.hpp" +#include "spline_n10.hpp" + +#endif diff --git a/bfps/cpp/spline_n1.cpp b/cpp/spline_n1.cpp similarity index 100% rename from bfps/cpp/spline_n1.cpp rename to cpp/spline_n1.cpp diff --git a/bfps/cpp/spline_n1.hpp b/cpp/spline_n1.hpp similarity index 100% rename from bfps/cpp/spline_n1.hpp rename to cpp/spline_n1.hpp diff --git a/bfps/cpp/spline_n10.cpp b/cpp/spline_n10.cpp similarity index 100% rename from bfps/cpp/spline_n10.cpp rename to cpp/spline_n10.cpp diff --git a/bfps/cpp/spline_n10.hpp b/cpp/spline_n10.hpp similarity index 100% rename from bfps/cpp/spline_n10.hpp rename to cpp/spline_n10.hpp diff --git a/bfps/cpp/spline_n2.cpp b/cpp/spline_n2.cpp similarity index 100% rename from bfps/cpp/spline_n2.cpp rename to cpp/spline_n2.cpp diff --git a/bfps/cpp/spline_n2.hpp b/cpp/spline_n2.hpp similarity index 100% rename from bfps/cpp/spline_n2.hpp rename to cpp/spline_n2.hpp diff --git a/bfps/cpp/spline_n3.cpp b/cpp/spline_n3.cpp similarity index 100% rename from bfps/cpp/spline_n3.cpp rename to cpp/spline_n3.cpp diff --git a/bfps/cpp/spline_n3.hpp b/cpp/spline_n3.hpp similarity index 100% rename from bfps/cpp/spline_n3.hpp rename to cpp/spline_n3.hpp diff --git a/bfps/cpp/spline_n4.cpp b/cpp/spline_n4.cpp similarity index 100% rename from bfps/cpp/spline_n4.cpp rename to cpp/spline_n4.cpp diff --git a/bfps/cpp/spline_n4.hpp b/cpp/spline_n4.hpp similarity index 100% rename from bfps/cpp/spline_n4.hpp rename to cpp/spline_n4.hpp diff --git a/bfps/cpp/spline_n5.cpp b/cpp/spline_n5.cpp similarity index 100% rename from bfps/cpp/spline_n5.cpp rename to cpp/spline_n5.cpp diff --git a/bfps/cpp/spline_n5.hpp b/cpp/spline_n5.hpp similarity index 100% rename from bfps/cpp/spline_n5.hpp rename to cpp/spline_n5.hpp diff --git a/bfps/cpp/spline_n6.cpp b/cpp/spline_n6.cpp similarity index 100% rename from bfps/cpp/spline_n6.cpp rename to cpp/spline_n6.cpp diff --git a/bfps/cpp/spline_n6.hpp b/cpp/spline_n6.hpp similarity index 100% rename from bfps/cpp/spline_n6.hpp rename to cpp/spline_n6.hpp diff --git a/bfps/cpp/spline_n7.cpp b/cpp/spline_n7.cpp similarity index 100% rename from bfps/cpp/spline_n7.cpp rename to cpp/spline_n7.cpp diff --git a/bfps/cpp/spline_n7.hpp b/cpp/spline_n7.hpp similarity index 100% rename from bfps/cpp/spline_n7.hpp rename to cpp/spline_n7.hpp diff --git a/bfps/cpp/spline_n8.cpp b/cpp/spline_n8.cpp similarity index 100% rename from bfps/cpp/spline_n8.cpp rename to cpp/spline_n8.cpp diff --git a/bfps/cpp/spline_n8.hpp b/cpp/spline_n8.hpp similarity index 100% rename from bfps/cpp/spline_n8.hpp rename to cpp/spline_n8.hpp diff --git a/bfps/cpp/spline_n9.cpp b/cpp/spline_n9.cpp similarity index 100% rename from bfps/cpp/spline_n9.cpp rename to cpp/spline_n9.cpp diff --git a/bfps/cpp/spline_n9.hpp b/cpp/spline_n9.hpp similarity index 100% rename from bfps/cpp/spline_n9.hpp rename to cpp/spline_n9.hpp diff --git a/bfps/cpp/vorticity_equation.cpp b/cpp/vorticity_equation.cpp similarity index 74% rename from bfps/cpp/vorticity_equation.cpp rename to cpp/vorticity_equation.cpp index 737db2c47e89624065f3d29a1657575bac5ea786..ead9345af5a2f0555e7fa6e2b6ee45cecd9f3624 100644 --- a/bfps/cpp/vorticity_equation.cpp +++ b/cpp/vorticity_equation.cpp @@ -26,12 +26,14 @@ #define NDEBUG +#include <limits> #include <cassert> #include <cmath> #include <cstring> #include "fftw_tools.hpp" #include "vorticity_equation.hpp" #include "scope_timer.hpp" +#include "shared_array.hpp" @@ -151,6 +153,7 @@ vorticity_equation<rnumber, be>::vorticity_equation( this->nu = 0.1; this->fmode = 1; this->famplitude = 1.0; + this->friction_coefficient = 1.0; this->fk0 = 2.0; this->fk1 = 4.0; } @@ -188,13 +191,6 @@ void vorticity_equation<rnumber, be>::compute_vorticity() this->cvorticity->cval(cindex,1,1) = (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0)); this->cvorticity->cval(cindex,2,0) = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1)); this->cvorticity->cval(cindex,2,1) = (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0)); - //ptrdiff_t tindex = 3*cindex; - //this->cvorticity->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]); - //this->cvorticity->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]); - //this->cvorticity->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]); - //this->cvorticity->get_cdata()[tindex+0][1] = (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]); - //this->cvorticity->get_cdata()[tindex+1][1] = (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]); - //this->cvorticity->get_cdata()[tindex+2][1] = (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]); } else std::fill_n((rnumber*)(this->cvorticity->get_cdata()+3*cindex), 6, 0.0); @@ -223,13 +219,6 @@ void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE> this->u->cval(cindex,1,1) = (this->kk->kz[zindex]*vorticity->cval(cindex,0,0) - this->kk->kx[xindex]*vorticity->cval(cindex,2,0)) / k2; this->u->cval(cindex,2,0) = -(this->kk->kx[xindex]*vorticity->cval(cindex,1,1) - this->kk->ky[yindex]*vorticity->cval(cindex,0,1)) / k2; this->u->cval(cindex,2,1) = (this->kk->kx[xindex]*vorticity->cval(cindex,1,0) - this->kk->ky[yindex]*vorticity->cval(cindex,0,0)) / k2; - //ptrdiff_t tindex = 3*cindex; - //this->u->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][1]) / k2; - //this->u->get_cdata()[tindex+0][1] = (this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][0]) / k2; - //this->u->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][1]) / k2; - //this->u->get_cdata()[tindex+1][1] = (this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][0]) / k2; - //this->u->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][1]) / k2; - //this->u->get_cdata()[tindex+2][1] = (this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][0]) / k2; } else std::fill_n((rnumber*)(this->u->get_cdata()+3*cindex), 6, 0.0); @@ -238,49 +227,231 @@ void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE> this->u->symmetrize(); } +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::add_Kolmogorov_forcing( + field<rnumber, be, THREE> *dst, + int fmode, + double famplitude) +{ + TIMEZONE("vorticity_equation::add_Kolmogorov_forcing"); + ptrdiff_t cindex; + if (dst->clayout->myrank == dst->clayout->rank[0][fmode]) + { + cindex = dst->get_cindex(0, (fmode - dst->clayout->starts[0]), 0); + dst->cval(cindex,2, 0) -= famplitude/2; + } + if (dst->clayout->myrank == dst->clayout->rank[0][dst->clayout->sizes[0] - fmode]) + { + cindex = dst->get_cindex(0, (dst->clayout->sizes[0] - fmode - dst->clayout->starts[0]), 0); + dst->cval(cindex, 2, 0) -= famplitude/2; + } +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::add_field_band( + field<rnumber, be, THREE> *dst, + field<rnumber, be, THREE> *src, + double k0, double k1, + double prefactor) +{ + TIMEZONE("vorticity_equation::add_field_band"); + this->kk->CLOOP( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex){ + double knorm = sqrt(this->kk->kx[xindex]*this->kk->kx[xindex] + + this->kk->ky[yindex]*this->kk->ky[yindex] + + this->kk->kz[zindex]*this->kk->kz[zindex]); + if ((k0 <= knorm) && + (k1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + dst->cval(cindex,c,i) += prefactor*src->cval(cindex,c,i); + } + ); +} + template <class rnumber, field_backend be> void vorticity_equation<rnumber, be>::add_forcing( field<rnumber, be, THREE> *dst, - field<rnumber, be, THREE> *vort_field, - rnumber factor) + field<rnumber, be, THREE> *vort_field) { TIMEZONE("vorticity_equation::add_forcing"); - if (strcmp(this->forcing_type, "none") == 0) - return; if (strcmp(this->forcing_type, "Kolmogorov") == 0) { - ptrdiff_t cindex; - if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->fmode]) - { - cindex = ((this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2]; - dst->cval(cindex,2, 0) -= this->famplitude*factor/2; - //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2; - } - if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->cvorticity->clayout->sizes[0] - this->fmode]) - { - cindex = ((this->cvorticity->clayout->sizes[0] - this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2]; - dst->cval(cindex, 2, 0) -= this->famplitude*factor/2; - //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2; - } + this->add_Kolmogorov_forcing(dst, this->fmode, this->famplitude); + return; + } + if (strcmp(this->forcing_type, "2Kolmogorov") == 0) + { + // 2 Kolmogorov forces + // first one wavenumber fk0, amplitude 1 - A + double amplitude = 1 - this->famplitude; + int fmode = int(this->fk0 / this->kk->dky); + this->add_Kolmogorov_forcing(dst, fmode, amplitude); + // second one wavenumber fk1, amplitude A + amplitude = this->famplitude * pow(int(this->fk1) / double(int(this->fk0)), 3); + fmode = int(this->fk1 / this->kk->dky); + this->add_Kolmogorov_forcing(dst, fmode, amplitude); + return; + } + if (strcmp(this->forcing_type, "Kolmogorov_and_drag") == 0) + { + this->add_Kolmogorov_forcing(dst, this->fmode, this->famplitude); + this->add_field_band( + dst, vort_field, + this->fk0, this->fk1, + -this->friction_coefficient); + return; + } + if (strcmp(this->forcing_type, "Kolmogorov_and_compensated_drag") == 0) + { + double amplitude = this->famplitude * ( + 1 + this->friction_coefficient / sqrt(this->fmode * this->famplitude)); + this->add_Kolmogorov_forcing(dst, this->fmode, amplitude); + this->add_field_band( + dst, vort_field, + this->fk0, this->fk1, + -this->friction_coefficient); return; } if (strcmp(this->forcing_type, "linear") == 0) { - this->kk->CLOOP( + this->add_field_band( + dst, vort_field, + this->fk0, this->fk1, + this->famplitude); + return; + } + if ((strcmp(this->forcing_type, "fixed_energy_injection_rate") == 0) || + (strcmp(this->forcing_type, "fixed_energy_injection_rate_and_drag") == 0)) + { + // first, compute energy in shell + shared_array<double> local_energy_in_shell(1); + double energy_in_shell = 0; + this->kk->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + double knorm = sqrt(k2); + if ((k2 > 0) && + (this->fk0 <= knorm) && + (this->fk1 >= knorm)) + *local_energy_in_shell.getMine() += nxmodes*( + vort_field->cval(cindex, 0, 0)*vort_field->cval(cindex, 0, 0) + vort_field->cval(cindex, 0, 1)*vort_field->cval(cindex, 0, 1) + + vort_field->cval(cindex, 1, 0)*vort_field->cval(cindex, 1, 0) + vort_field->cval(cindex, 1, 1)*vort_field->cval(cindex, 1, 1) + + vort_field->cval(cindex, 2, 0)*vort_field->cval(cindex, 2, 0) + vort_field->cval(cindex, 2, 1)*vort_field->cval(cindex, 2, 1) + ) / k2; + } + ); + local_energy_in_shell.mergeParallel(); + MPI_Allreduce( + local_energy_in_shell.getMasterData(), + &energy_in_shell, + 1, + MPI_DOUBLE, + MPI_SUM, + vort_field->comm); + // we should divide by 2, if we wanted energy; + // but then we would need to multiply the amplitude by 2 anyway, + // because what we really care about is force dotted into velocity, + // without the division by 2. + + // now, modify amplitudes + if (energy_in_shell < 10*std::numeric_limits<rnumber>::epsilon()) + energy_in_shell = 1; + double temp_famplitude = this->injection_rate / energy_in_shell; + this->add_field_band( + dst, vort_field, + this->fk0, this->fk1, + temp_famplitude); + // and add drag if desired + if (strcmp(this->forcing_type, "fixed_energy_injection_rate_and_drag") == 0) + this->add_field_band( + dst, vort_field, + this->fmode, this->fmode + (this->fk1 - this->fk0), + -this->friction_coefficient); + return; + } + if (strcmp(this->forcing_type, "fixed_energy") == 0) + return; +} + +template <class rnumber, + field_backend be> +void vorticity_equation<rnumber, be>::impose_forcing( + field<rnumber, be, THREE> *onew, + field<rnumber, be, THREE> *oold) +{ + TIMEZONE("vorticity_equation::impose_forcing"); + if (strcmp(this->forcing_type, "fixed_energy") == 0) + { + // first, compute energy in shell + shared_array<double> local_energy_in_shell(1); + shared_array<double> local_total_energy(1); + double energy_in_shell, total_energy; + this->kk->CLOOP_K2_NXMODES( [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, - ptrdiff_t zindex){ - double knorm = sqrt(this->kk->kx[xindex]*this->kk->kx[xindex] + - this->kk->ky[yindex]*this->kk->ky[yindex] + - this->kk->kz[zindex]*this->kk->kz[zindex]); + ptrdiff_t zindex, + double k2, + int nxmodes){ + if (k2 > 0) + { + double mode_energy = nxmodes*( + onew->cval(cindex, 0, 0)*onew->cval(cindex, 0, 0) + onew->cval(cindex, 0, 1)*onew->cval(cindex, 0, 1) + + onew->cval(cindex, 1, 0)*onew->cval(cindex, 1, 0) + onew->cval(cindex, 1, 1)*onew->cval(cindex, 1, 1) + + onew->cval(cindex, 2, 0)*onew->cval(cindex, 2, 0) + onew->cval(cindex, 2, 1)*onew->cval(cindex, 2, 1) + ) / k2; + *local_total_energy.getMine() += mode_energy; + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && (this->fk1 >= knorm)) + *local_energy_in_shell.getMine() += mode_energy; + } + } + ); + local_total_energy.mergeParallel(); + local_energy_in_shell.mergeParallel(); + MPI_Allreduce( + local_energy_in_shell.getMasterData(), + &energy_in_shell, + 1, + MPI_DOUBLE, + MPI_SUM, + onew->comm); + MPI_Allreduce( + local_total_energy.getMasterData(), + &total_energy, + 1, + MPI_DOUBLE, + MPI_SUM, + onew->comm); + // divide by 2, because we want energy + total_energy /= 2; + energy_in_shell /= 2; + // now, add forcing term + // see Michael's thesis, page 38 + double temp_famplitude = sqrt((this->energy - total_energy + energy_in_shell) / energy_in_shell); + this->kk->CLOOP_K2( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2){ + double knorm = sqrt(k2); if ((this->fk0 <= knorm) && - (this->fk1 >= knorm)) + (this->fk1 >= knorm)) for (int c=0; c<3; c++) for (int i=0; i<2; i++) - dst->cval(cindex,c,i) += this->famplitude*vort_field->cval(cindex,c,i)*factor; - //dst->get_cdata()[cindex*3+c][i] += this->famplitude*vort_field->get_cdata()[cindex*3+c][i]*factor; + onew->cval(cindex,c,i) *= temp_famplitude; } ); return; @@ -306,16 +477,12 @@ void vorticity_equation<rnumber, be>::omega_nonlin( ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ - //ptrdiff_t tindex = 3*rindex; rnumber tmp[3]; for (int cc=0; cc<3; cc++) tmp[cc] = (this->u->rval(rindex,(cc+1)%3)*this->rvorticity->rval(rindex,(cc+2)%3) - this->u->rval(rindex,(cc+2)%3)*this->rvorticity->rval(rindex,(cc+1)%3)); - //tmp[cc][0] = (this->u->get_rdata()[tindex+(cc+1)%3]*this->rvorticity->get_rdata()[tindex+(cc+2)%3] - - // this->u->get_rdata()[tindex+(cc+2)%3]*this->rvorticity->get_rdata()[tindex+(cc+1)%3]); for (int cc=0; cc<3; cc++) this->u->rval(rindex,cc) = tmp[cc] / this->u->npoints; - //this->u->get_rdata()[(3*rindex)+cc] = tmp[cc][0] / this->u->npoints; } ); /* go back to Fourier space */ @@ -337,22 +504,13 @@ void vorticity_equation<rnumber, be>::omega_nonlin( tmp[1][1] = (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0)); tmp[2][1] = (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0)); } - //ptrdiff_t tindex = 3*cindex; - //{ - // tmp[0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]); - // tmp[1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]); - // tmp[2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]); - // tmp[0][1] = (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]); - // tmp[1][1] = (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]); - // tmp[2][1] = (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]); - //} for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) this->u->cval(cindex, cc, i) = tmp[cc][i]; - //this->u->get_cdata()[3*cindex+cc][i] = tmp[cc][i]; } ); - this->add_forcing(this->u, this->v[src], 1.0); + this->add_forcing(this->u, this->v[src]); this->kk->template force_divfree<rnumber>(this->u->get_cdata()); + this->u->symmetrize(); } template <class rnumber, @@ -377,12 +535,10 @@ void vorticity_equation<rnumber, be>::step(double dt) this->v[1]->cval(cindex,cc,i) = ( this->v[0]->cval(cindex,cc,i) + dt*this->u->cval(cindex,cc,i))*factor0; - //this->v[1]->get_cdata()[3*cindex+cc][i] = ( - // this->v[0]->get_cdata()[3*cindex+cc][i] + - // dt*this->u->get_cdata()[3*cindex+cc][i])*factor0; } } ); + this->impose_forcing(this->v[1], this->v[0]); this->omega_nonlin(1); this->kk->CLOOP_K2( @@ -401,15 +557,14 @@ void vorticity_equation<rnumber, be>::step(double dt) 3*this->v[0]->cval(cindex,cc,i)*factor0 + ( this->v[1]->cval(cindex,cc,i) + dt*this->u->cval(cindex,cc,i))*factor1)*0.25; - //this->v[2]->get_cdata()[3*cindex+cc][i] = ( - // 3*this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 + - // (this->v[1]->get_cdata()[3*cindex+cc][i] + - // dt*this->u->get_cdata()[3*cindex+cc][i])*factor1)*0.25; } } ); + this->impose_forcing(this->v[2], this->v[0]); this->omega_nonlin(2); + // store old vorticity + *this->v[1] = *this->v[0]; this->kk->CLOOP_K2( [&](ptrdiff_t cindex, ptrdiff_t xindex, @@ -425,13 +580,10 @@ void vorticity_equation<rnumber, be>::step(double dt) this->v[0]->cval(cindex,cc,i)*factor0 + 2*(this->v[2]->cval(cindex,cc,i) + dt*this->u->cval(cindex,cc,i)))*factor0/3; - //this->v[3]->get_cdata()[3*cindex+cc][i] = ( - // this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 + - // 2*(this->v[2]->get_cdata()[3*cindex+cc][i] + - // dt*this->u->get_cdata()[3*cindex+cc][i]))*factor0/3; } } ); + this->impose_forcing(this->v[0], this->v[1]); this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata()); this->cvorticity->symmetrize(); @@ -456,7 +608,6 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> * //ptrdiff_t tindex = 3*rindex; for (int cc=0; cc<3; cc++) this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,cc); - //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+cc]; } ); //this->clean_up_real_space(this->rv[1], 3); @@ -493,7 +644,6 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> * //ptrdiff_t tindex = 3*rindex; for (int cc=0; cc<3; cc++) this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,(cc+1)%3); - //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+(cc+1)%3]; } ); //this->clean_up_real_space(this->rv[1], 3); @@ -529,14 +679,20 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> * template <class rnumber, field_backend be> void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration( - field<rnumber, be, THREE> *acceleration) + field<rnumber, be, THREE> *acceleration, + field<rnumber, be, ONE> *pressure) { - field<rnumber, be, ONE> *pressure = new field<rnumber, be, ONE>( + bool own_pressure = false; + if (pressure == NULL) + { + pressure = new field<rnumber, be, ONE>( this->cvelocity->rlayout->sizes[2], this->cvelocity->rlayout->sizes[1], this->cvelocity->rlayout->sizes[0], this->cvelocity->rlayout->comm, this->cvelocity->fftw_plan_rigor); + own_pressure = true; + } this->compute_velocity(this->cvorticity); this->cvelocity->ift(); this->compute_pressure(pressure); @@ -574,7 +730,8 @@ void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration( acceleration->get_cdata()[tindex+2][1] -= this->kk->kz[zindex]*pressure->get_cdata()[cindex][0]; } }); - delete pressure; + if (own_pressure) + delete pressure; } template <class rnumber, @@ -626,7 +783,6 @@ void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration( for (int cc=0; cc<3; cc++) this->v[1]->rval(rindex,cc) = \ this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,cc) / this->cvelocity->npoints; - //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+cc] / this->cvelocity->npoints; } ); this->v[1]->dft(); @@ -666,7 +822,6 @@ void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration( for (int cc=0; cc<3; cc++) this->v[1]->rval(rindex,cc) = \ this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,(cc+1)%3) / this->cvelocity->npoints; - //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+(cc+1)%3] / this->cvelocity->npoints; } ); this->v[1]->dft(); diff --git a/bfps/cpp/vorticity_equation.hpp b/cpp/vorticity_equation.hpp similarity index 72% rename from bfps/cpp/vorticity_equation.hpp rename to cpp/vorticity_equation.hpp index e8bd1d843f730d39439bc99703956dc623ca4e42..cbff223e2d8119b37ef30b4e6b739aa64eff7039 100644 --- a/bfps/cpp/vorticity_equation.hpp +++ b/cpp/vorticity_equation.hpp @@ -28,7 +28,6 @@ #include <iostream> #include "field.hpp" -#include "field_descriptor.hpp" #ifndef VORTICITY_EQUATION @@ -67,9 +66,12 @@ class vorticity_equation /* physical parameters */ double nu; - int fmode; // for Kolmogorov flow - double famplitude; // both for Kflow and band forcing - double fk0, fk1; // for band forcing + int fmode; // for Kolmogorov flow + double famplitude; // both for Kflow and band forcing + double fk0, fk1; // for band forcing + double injection_rate; // for fixed energy injection rate + double energy; // for fixed energy + double friction_coefficient; // for Kolmogorov_and_drag char forcing_type[128]; /* constructor, destructor */ @@ -88,9 +90,36 @@ class vorticity_equation void omega_nonlin(int src); void step(double dt); void impose_zero_modes(void); + + /** \brief Method that computes force and adds it to the right hand side of the NS equations. + * + * If the force has an explicit expression, as for instance in the case of Kolmogorov forcing, + * the term should be added to the nonlinear term for the purposes of time-stepping, since + * otherwise a custom time-stepping scheme would need to be implemented for each forcing type. + * + */ void add_forcing(field<rnumber, be, THREE> *dst, - field<rnumber, be, THREE> *src_vorticity, - rnumber factor); + field<rnumber, be, THREE> *src_vorticity); + + void add_Kolmogorov_forcing(field<rnumber, be, THREE> *dst, + int fmode, + double famplitude); + void add_field_band( + field<rnumber, be, THREE> *dst, + field<rnumber, be, THREE> *src, + double k0, double k1, + double prefactor); + + /** \brief Method that imposes action of forcing on new vorticity field. + * + * If the force is implicit, in the sense that kinetic energy must be + * preserved or something similar, then the action must be imposed + * after the non-linear term has been added. + * + */ + void impose_forcing( + field<rnumber, be, THREE> *omega_new, + field<rnumber, be, THREE> *omega_old); void compute_vorticity(void); void compute_velocity(field<rnumber, be, THREE> *vorticity); @@ -124,13 +153,16 @@ class vorticity_equation this->kk->template low_pass<rnumber, THREE>(this->cvorticity->get_cdata(), this->kk->kM); this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata()); #endif + this->cvorticity->symmetrize(); } } /* statistics and general postprocessing */ void compute_pressure(field<rnumber, be, ONE> *pressure); void compute_Eulerian_acceleration(field<rnumber, be, THREE> *acceleration); - void compute_Lagrangian_acceleration(field<rnumber, be, THREE> *acceleration); + void compute_Lagrangian_acceleration( + field<rnumber, be, THREE> *acceleration, + field<rnumber, be, ONE> *pressure = NULL); }; #endif//VORTICITY_EQUATION diff --git a/documentation/_static/overview.rst b/documentation/_static/overview.rst index afe7a753666e6ea5911ce1266d0803aa25ea5c45..58af5653cab860961c71d057d92a21c9b99e6ddc 100644 --- a/documentation/_static/overview.rst +++ b/documentation/_static/overview.rst @@ -184,16 +184,17 @@ available, called ``bfps``, that you can execute. Just executing it will run a small test DNS on a real space grid of size :math:`32 \times 32 \times 32`, in the current folder, with the simulation name ``test``. -So, open a console, and type ``bfps NavierStokes``: +So, open a console, and type ``bfps DNS NSVE``: .. code:: bash # depending on how curious you are, you may have a look at the # options first: bfps --help - bfps NavierStokes --help + bfps DNS --help + bfps DNS NSVE --help # or you may just run it: - bfps NavierStokes + bfps DNS NSVE The simulation itself should not take more than a few seconds, since this is just a :math:`32^3` simulation run for 8 iterations. @@ -205,9 +206,9 @@ the following: .. code:: python import numpy as np - from bfps import NavierStokes + from bfps import DNS - c = NavierStokes( + c = DNS( work_dir = '/location/of/simulation/data', simname = 'simulation_name_goes_here') c.compute_statistics() @@ -223,7 +224,7 @@ the following: data_file['iteration'].value*c.parameters['dt'] / c.statistics['Tint'], data_file['iteration'].value*c.parameters['dt'] / c.statistics['tauK'])) -:func:`compute_statistics <bfps.NavierStokes.NavierStokes.compute_statistics>` +:func:`compute_statistics <bfps.DNS.DNS.compute_statistics>` will read the data file generated by the DNS, compute a bunch of basic statistics, for example the Taylor scale Reynolds number :math:`R_\lambda` that we're @@ -233,7 +234,7 @@ What happens is that the DNS will have generated an ``HDF5`` file containing a bunch of specific datasets (spectra, moments of real space representations, etc). The function -:func:`compute_statistics <bfps.NavierStokes.NavierStokes.compute_statistics>` +:func:`compute_statistics <bfps.DNS.DNS.compute_statistics>` performs simple postprocessing that may however be expensive, therefore it also saves some data into a ``<simname>_postprocess.h5`` file, and then it also performs some time averages, yielding the ``statistics`` @@ -242,6 +243,8 @@ dictionary that is used in the above code. Behind the scenes ----------------- +TODO FIXME obsolete documentation + In brief the following takes place: 1. An instance ``c`` of diff --git a/get_version.py b/get_version.py new file mode 100644 index 0000000000000000000000000000000000000000..fe545a6796333774366e99f9a1416b5b1c1bc62f --- /dev/null +++ b/get_version.py @@ -0,0 +1,63 @@ +################################################################################ +# # +# Copyright 2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ + + + +import datetime +import subprocess + +def main(): + # get current time + now = datetime.datetime.now() + # obtain version + try: + git_branch = subprocess.check_output(['git', + 'rev-parse', + '--abbrev-ref', + 'HEAD']).strip().split()[-1].decode() + git_revision = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip() + git_date = datetime.datetime.fromtimestamp(int(subprocess.check_output(['git', 'log', '-1', '--format=%ct']).strip())) + except: + git_revision = '' + git_branch = '' + git_date = now + if git_branch == '': + # there's no git available or something + VERSION = '{0:0>4}{1:0>2}{2:0>2}.{3:0>2}{4:0>2}{5:0>2}'.format( + git_date.year, git_date.month, git_date.day, + git_date.hour, git_date.minute, git_date.second) + else: + VERSION = subprocess.check_output(['git', 'describe', '--tags']).strip().decode().split('-')[0] + if (('develop' in git_branch) or + ('feature' in git_branch) or + ('bugfix' in git_branch)): + VERSION_py = subprocess.check_output( + ['git', 'describe', '--tags', '--dirty']).strip().decode().replace('-g', '+g').replace('-dirty', '.dirty').replace('-', '.post') + else: + VERSION_py = VERSION + print(VERSION) + return VERSION_py + +if __name__ == '__main__': + main() + diff --git a/meta/count_nmodes.py b/meta/count_nmodes.py new file mode 100644 index 0000000000000000000000000000000000000000..19af4ab332067ba72758bbc5244b33c8ea569dc0 --- /dev/null +++ b/meta/count_nmodes.py @@ -0,0 +1,34 @@ +import numpy as np + +def count_expensive(fk0, fk1): + kcomponent = np.arange(-np.floor(fk1)-1, np.floor(fk1)+2, 1).astype(np.float) + ksize = (kcomponent[:, None, None]**2 + + kcomponent[None, :, None]**2 + + kcomponent[None, None, :]**2)**.5 + #print(ksize[0]) + + good_indices = np.where(np.logical_and( + ksize >= fk0, + ksize <= fk1)) + #print(ksize[good_indices]) + #print(good_indices[0].shape) + return np.unique(ksize[good_indices].flatten(), return_counts = True) + +def main(): + for ff in [[1, 2], + [1.4, 2.3], + [1.4, 2.2]]: + modes, counts = count_expensive(ff[0], ff[1]) + nmodes = np.sum(counts) + print(1 / ff[1], ff, nmodes) + modes_str = '' + counts_str = '' + for ii in range(counts.shape[0]): + modes_str += '{0:>5g}\t'.format(modes[ii]) + counts_str += '{0:>5g}\t'.format(counts[ii]) + print(modes_str + '\n' + counts_str + '\n') + return None + +if __name__ == '__main__': + main() + diff --git a/pc_host_info.py b/pc_host_info.py new file mode 100644 index 0000000000000000000000000000000000000000..dec9db6410b54cd8db31c3bca21843be0edd41b1 --- /dev/null +++ b/pc_host_info.py @@ -0,0 +1,51 @@ +################################################################################ +# # +# Copyright 2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ + + +host_info = {'type' : 'pc'} + + +# info_template = {'type' : info_template_type, +# 'MPI' : info_template_MPI, +# 'environment' : info_template_environment, +# 'deltanprocs' : info_template_deltanprocs +# 'mail_address': info_template_mail_address} + +# info_template_type can be one of: +# 'pc' --- jobs run interactively +# 'cluster' --- cluster with SGE queueing system +# 'SLURM' --- cluster with SLURM queueing system +# 'IBMLoadLeveler --- cluster with IBM Load Leveler queueing system + +# info_template_MPI can be one of: +# 'openmpi' --- it means mpirun takes "x" as the parameter to set an environment variable +# not defined --- use "env" instead of "x" + +# info_template_environment, relevant for clusters, +# is the default queue to which jobs are submitted + +# info_template_deltanprocs, relevant for clusters, +# is the number of cores per node + +# info_template_mail_address, relevant for clusters, +# is the contact e-mail address placed in the job scripts. diff --git a/setup.py b/setup.py index 9bba17014aabf36c685395843b806f650604face..0b70e6d14f96d36da0eafd7e5af30e1e93c4aa49 100644 --- a/setup.py +++ b/setup.py @@ -1,26 +1,25 @@ -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### +################################################################################ +# # +# Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +################################################################################ @@ -34,147 +33,12 @@ import sys import subprocess import pickle - -### compiler configuration -# check if .config/bfps/machine_settings.py file exists, create it if not -homefolder = os.path.expanduser('~') -bfpsfolder = os.path.join(homefolder, '.config', 'bfps') -if not os.path.exists(os.path.join(bfpsfolder, 'machine_settings.py')): - if not os.path.isdir(bfpsfolder): - os.mkdir(bfpsfolder) - shutil.copyfile('./machine_settings_py.py', os.path.join(bfpsfolder, 'machine_settings.py')) -# check if .config/bfps/host_information.py file exists, create it if not -if not os.path.exists(os.path.join(bfpsfolder, 'host_information.py')): - if not os.path.isdir(bfpsfolder): - os.mkdir(bfpsfolder) - open(os.path.join(bfpsfolder, 'host_information.py'), - 'w').write('host_info = {\'type\' : \'none\'}\n') - shutil.copyfile('./machine_settings_py.py', os.path.join(bfpsfolder, 'machine_settings.py')) -sys.path.insert(0, bfpsfolder) -# import stuff required for compilation of static library -from machine_settings import compiler, include_dirs, library_dirs, extra_compile_args, extra_libraries - - ### package versioning -# get current time -now = datetime.datetime.now() -# obtain version -try: - git_branch = subprocess.check_output(['git', - 'rev-parse', - '--abbrev-ref', - 'HEAD']).strip().split()[-1].decode() - git_revision = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip() - git_date = datetime.datetime.fromtimestamp(int(subprocess.check_output(['git', 'log', '-1', '--format=%ct']).strip())) -except: - git_revision = '' - git_branch = '' - git_date = now -if git_branch == '': - # there's no git available or something - VERSION = '{0:0>4}{1:0>2}{2:0>2}.{3:0>2}{4:0>2}{5:0>2}'.format( - git_date.year, git_date.month, git_date.day, - git_date.hour, git_date.minute, git_date.second) -else: - if (('develop' in git_branch) or - ('feature' in git_branch) or - ('bugfix' in git_branch)): - VERSION = subprocess.check_output( - ['git', 'describe', '--tags', '--dirty']).strip().decode().replace('-g', '+g').replace('-dirty', '.dirty').replace('-', '.post') - else: - VERSION = subprocess.check_output(['git', 'describe', '--tags']).strip().decode().split('-')[0] +import get_version +VERSION = get_version.main() print('This is bfps version ' + VERSION) - -### lists of files and MANIFEST.in -src_file_list = ['full_code/joint_acc_vel_stats', - 'full_code/test', - 'full_code/filter_test', - 'hdf5_tools', - 'full_code/get_rfields', - 'full_code/NSVE_field_stats', - 'full_code/native_binary_to_hdf5', - 'full_code/postprocess', - 'full_code/code_base', - 'full_code/direct_numerical_simulation', - 'full_code/NSVE', - 'field_binary_IO', - 'vorticity_equation', - 'field', - 'kspace', - 'field_layout', - 'field_descriptor', - 'rFFTW_distributed_particles', - 'distributed_particles', - 'particles', - 'particles_base', - 'rFFTW_interpolator', - 'interpolator', - 'interpolator_base', - 'fluid_solver', - 'fluid_solver_base', - 'fftw_tools', - 'spline_n1', - 'spline_n2', - 'spline_n3', - 'spline_n4', - 'spline_n5', - 'spline_n6', - 'spline_n7', - 'spline_n8', - 'spline_n9', - 'spline_n10', - 'Lagrange_polys', - 'scope_timer', - 'full_code/NSVEparticles'] - -particle_headers = [ - 'cpp/particles/particles_distr_mpi.hpp', - 'cpp/particles/abstract_particles_input.hpp', - 'cpp/particles/abstract_particles_output.hpp', - 'cpp/particles/abstract_particles_system.hpp', - 'cpp/particles/alltoall_exchanger.hpp', - 'cpp/particles/particles_adams_bashforth.hpp', - 'cpp/particles/particles_field_computer.hpp', - 'cpp/particles/particles_input_hdf5.hpp', - 'cpp/particles/particles_generic_interp.hpp', - 'cpp/particles/particles_output_hdf5.hpp', - 'cpp/particles/particles_output_mpiio.hpp', - 'cpp/particles/particles_system_builder.hpp', - 'cpp/particles/particles_system.hpp', - 'cpp/particles/particles_utils.hpp', - 'cpp/particles/particles_output_sampling_hdf5.hpp', - 'cpp/particles/particles_sampling.hpp', - 'cpp/particles/env_utils.hpp'] - -full_code_headers = ['cpp/full_code/main_code.hpp', - 'cpp/full_code/codes_with_no_output.hpp', - 'cpp/full_code/NSVE_no_output.hpp', - 'cpp/full_code/NSVEparticles_no_output.hpp'] - -header_list = (['cpp/base.hpp'] + - ['cpp/fftw_interface.hpp'] + - ['cpp/bfps_timer.hpp'] + - ['cpp/omputils.hpp'] + - ['cpp/shared_array.hpp'] + - ['cpp/spline.hpp'] + - ['cpp/' + fname + '.hpp' - for fname in src_file_list] + - particle_headers + - full_code_headers) - -with open('MANIFEST.in', 'w') as manifest_in_file: - for fname in (['bfps/cpp/' + ff + '.cpp' for ff in src_file_list] + - ['bfps/' + ff for ff in header_list]): - manifest_in_file.write('include {0}\n'.format(fname)) - - - -### libraries -libraries = extra_libraries - - import distutils.cmd class CompileLibCommand(distutils.cmd.Command): @@ -182,74 +46,25 @@ class CompileLibCommand(distutils.cmd.Command): user_options = [ ('timing-output=', None, 'Toggle timing output.'), ('fftw-estimate=', None, 'Use FFTW ESTIMATE.'), + ('split-fftw-many=', None, 'Turn on SPLIT_FFTW_MANY.'), ('disable-fftw-omp=', None, 'Turn Off FFTW OpenMP.'), ] def initialize_options(self): self.timing_output = 0 self.fftw_estimate = 0 self.disable_fftw_omp = 0 + self.split_fftw_many = 0 return None def finalize_options(self): self.timing_output = (int(self.timing_output) == 1) + self.split_fftw_many = (int(self.split_fftw_many) == 1) self.fftw_estimate = (int(self.fftw_estimate) == 1) self.disable_fftw_omp = (int(self.disable_fftw_omp) == 1) return None def run(self): - if not os.path.isdir('obj'): - os.makedirs('obj') - need_to_compile = True - if not os.path.isdir('obj/full_code'): - os.makedirs('obj/full_code') - need_to_compile = True - if not os.path.isfile('bfps/libbfps.a'): - need_to_compile = True - else: - ofile = 'bfps/libbfps.a' - libtime = datetime.datetime.fromtimestamp(os.path.getctime(ofile)) - latest = libtime - for fname in header_list: - latest = max(latest, - datetime.datetime.fromtimestamp(os.path.getctime('bfps/' + fname))) - need_to_compile = (latest > libtime) - eca = extra_compile_args - eca += ['-fPIC'] - if self.timing_output: - eca += ['-DUSE_TIMINGOUTPUT'] - if self.fftw_estimate: - eca += ['-DUSE_FFTWESTIMATE'] - if self.disable_fftw_omp: - eca += ['-DNO_FFTWOMP'] - for fname in src_file_list: - ifile = 'bfps/cpp/' + fname + '.cpp' - ofile = 'obj/' + fname + '.o' - if not os.path.exists(ofile): - need_to_compile_file = True - else: - need_to_compile_file = (need_to_compile or - (datetime.datetime.fromtimestamp(os.path.getctime(ofile)) < - datetime.datetime.fromtimestamp(os.path.getctime(ifile)))) - if need_to_compile_file: - command_strings = [compiler, '-c'] - command_strings += ['bfps/cpp/' + fname + '.cpp'] - command_strings += ['-o', 'obj/' + fname + '.o'] - command_strings += eca - command_strings += ['-I' + idir for idir in include_dirs] - command_strings.append('-Ibfps/cpp/') - print(' '.join(command_strings)) - subprocess.check_call(command_strings) - command_strings = ['ar', 'rvs', 'bfps/libbfps.a'] - command_strings += ['obj/' + fname + '.o' for fname in src_file_list] - print(' '.join(command_strings)) - subprocess.check_call(command_strings) - ### save compiling information pickle.dump( - {'include_dirs' : include_dirs, - 'library_dirs' : library_dirs, - 'compiler' : compiler, - 'extra_compile_args' : eca, - 'libraries' : libraries, - 'install_date' : now, + {'install_date' : now, 'VERSION' : VERSION, 'git_revision' : git_revision}, open('bfps/install_info.pickle', 'wb'), @@ -262,23 +77,24 @@ setup( name = 'bfps', packages = ['bfps', 'bfps/test'], install_requires = ['numpy>=1.8', 'h5py>=2.2.1'], - cmdclass={'compile_library' : CompileLibCommand}, - package_data = {'bfps': header_list + - ['libbfps.a', - 'install_info.pickle'] + - ['test/B32p1e4_checkpoint_0.h5']}, + package_data = {'bfps': ['test/B32p1e4_checkpoint_0.h5']}, entry_points = { 'console_scripts': [ 'bfps = bfps.__main__:main', 'bfps1 = bfps.__main__:main', - 'bfps.test_NSVEparticles = bfps.test.test_bfps_NSVEparticles:main'], + 'bfps.test_NSVEparticles = bfps.test.test_bfps_NSVEparticles:main', + 'bfps.test_particles = bfps.test.test_particles:main', + 'bfps.test_Parseval = bfps.test.test_Parseval:main', + 'bfps.test_fftw = bfps.test.test_fftw:main'], }, version = VERSION, ######################################################################## # useless stuff folows +# if anyone knows how to open the README when calling this script from +# cmake, please let me know. ######################################################################## description = 'Big Fluid and Particle Simulator', - long_description = open('README.rst', 'r').read(), + #long_description = open('${PROJECT_SOURCE_DIR}/README.rst', 'r').read(), author = AUTHOR, author_email = AUTHOR_EMAIL, license = 'GPL version 3.0') diff --git a/tests/DNS/test_scaling.py b/tests/DNS/test_scaling.py index 1d4b12a5e3eb4aa322d68ba276437d1a641f7eae..3ae1d299ae9ab78dffb4d252d142e13f47adade6 100644 --- a/tests/DNS/test_scaling.py +++ b/tests/DNS/test_scaling.py @@ -12,7 +12,12 @@ def get_DNS_parameters( nprocesses = 1, output_on = False, cores_per_node = 16, - nparticles = int(1e5)): + nparticles = int(1e5), + environment = 'express', + minutes = '29', + no_submit = True, + src_dirname = '/draco/ptmp/clalescu/scaling'): + assert (N in [1024, 2048, 4096]) simname = (DNS_type + '{0:0>4d}'.format(N)) if output_on: simname = DNS_type + simname @@ -25,15 +30,13 @@ def get_DNS_parameters( work_dir = 'nn{0:0>4d}np{1}'.format(nnodes, nprocesses) if not output_on: class_name += '_no_output' - src_simname = 'N{0:0>4d}_kMeta2'.format(N) - src_iteration = -1 - if N == 512: - src_iteration = 3072 + src_simname = 'fb3_N{0:0>4d}_kMeta1.5'.format(N) if N == 1024: - src_iteration = 0x4000 + src_iteration = 32*1024 if N == 2048: - src_iteration = 0x6000 + src_iteration = 20*1024 if N == 4096: + src_simname = 'fb3_N2048x2_kMeta1.5' src_iteration = 0 DNS_parameters = [ class_name, @@ -45,9 +48,8 @@ def get_DNS_parameters( '--niter_todo', '12', '--niter_out', '12', '--niter_stat', '3'] - if src_iteration >= 0: - DNS_parameters += [ - '--src-wd', 'database', + DNS_parameters += [ + '--src-wd', src_dirname, '--src-simname', src_simname, '--src-iteration', '{0}'.format(src_iteration)] if DNS_type != 'A': @@ -63,6 +65,10 @@ def get_DNS_parameters( '--tracers0_neighbours', '{0}'.format(nneighbours), '--tracers0_smoothness', '{0}'.format(smoothness), '--particle-rand-seed', '2'] + if no_submit: + DNS_parameters += ['--no-submit'] + DNS_parameters += ['--environment', environment, + '--minutes', '{0}'.format(minutes)] return simname, work_dir, DNS_parameters def main(): @@ -86,27 +92,50 @@ def main(): parser.add_argument( '--nnodes', type = int, + help = 'how many nodes to use', dest = 'nnodes', default = 1) parser.add_argument( '--nprocesses', type = int, + help = 'how many MPI processes to use', dest = 'nprocesses', default = 1) parser.add_argument( '--ncores', type = int, + help = 'how many cores there are per node', dest = 'ncores', - default = 4) + default = 40) parser.add_argument( '--output-on', action = 'store_true', dest = 'output_on') + parser.add_argument( + '--submit', + action = 'store_true', + dest = 'submit') parser.add_argument( '--nparticles', type = int, dest = 'nparticles', default = int(1e5)) + parser.add_argument( + '--environment', + type = str, + dest = 'environment', + default = 'express') + parser.add_argument( + '--minutes', + type = int, + dest = 'minutes', + default = 29, + help = 'If environment supports it, this is the requested wall-clock-limit.') + parser.add_argument( + '--src-wd', + type = str, + dest = 'src_dirname', + default = '/draco/ptmp/clalescu/scaling') opt = parser.parse_args(sys.argv[1:]) simname, work_dir, params = get_DNS_parameters( DNS_type = opt.DNS_setup, @@ -115,7 +144,11 @@ def main(): nprocesses = opt.nprocesses, output_on = opt.output_on, nparticles = opt.nparticles, - cores_per_node = opt.ncores) + cores_per_node = opt.ncores, + no_submit = not opt.submit, + minutes = opt.minutes, + environment = opt.environment, + src_dirname = opt.src_dirname) print(work_dir + '/' + simname) print(' '.join(params)) # these following 2 lines actually launch something diff --git a/tests/base.py b/tests/base.py index 1c06974e836d2a348bf1e4b260f2b018ec3ab7af..542679733757b5213193f3b7f6ad02cda7e0617b 100644 --- a/tests/base.py +++ b/tests/base.py @@ -33,7 +33,6 @@ import numpy as np import matplotlib.pyplot as plt import bfps -from bfps import FluidResize from bfps.tools import particle_finite_diff_test as acceleration_test import argparse @@ -50,6 +49,9 @@ def get_parser(base_class = bfps.NavierStokes, parser.add_argument('-n', type = int, dest = 'n', default = n) + parser.add_argument('--np', + type = int, dest = 'np', + default = ncpu) parser.add_argument('--ncpu', type = int, dest = 'ncpu', default = ncpu) @@ -89,33 +91,13 @@ parser.add_argument( dest = 'kMeta', default = 2.0) -def double(opt): - old_simname = 'N{0:0>3x}'.format(opt.n) - new_simname = 'N{0:0>3x}'.format(opt.n*2) - c = FluidResize(fluid_precision = opt.precision) - c.launch( - args = ['--simname', old_simname + '_double', - '--wd', opt.work_dir, - '--nx', '{0}'.format(opt.n), - '--ny', '{0}'.format(opt.n), - '--nz', '{0}'.format(opt.n), - '--dst_nx', '{0}'.format(2*opt.n), - '--dst_ny', '{0}'.format(2*opt.n), - '--dst_nz', '{0}'.format(2*opt.n), - '--dst_simname', new_simname, - '--src_simname', old_simname, - '--src_iteration', '0', - '--src_wd', './', - '--niter_todo', '0']) - return None - def launch( opt, nu = None, dt = None, tracer_state_file = None, vorticity_field = None, - code_class = bfps.NavierStokes, + code_class = bfps.DNS, particle_class = 'particles', interpolator_class = 'rFFTW_interpolator'): c = code_class( diff --git a/tests/ci-scripts/test.sh b/tests/ci-scripts/test.sh index ddde2489e431412c260752f800640812ead91167..bb6eaa859fa40d8ffa975e693dc6351ebbbd63d5 100644 --- a/tests/ci-scripts/test.sh +++ b/tests/ci-scripts/test.sh @@ -5,41 +5,47 @@ set -x # stops when fails set -e -# Init -export destdir=$(pwd)"/ci-installdir" -export pythonbin=/home/ubuntu/anaconda3/bin/python3 -export bfpspythonpath=$destdir/lib/python3.6/site-packages/ -export PYTHONPATH=:$bfpspythonpath$PYTHONPATH -export PATH=$destdir/bin/:/home/ubuntu/hdf5/install/bin/:$PATH -export LD_LIBRARY_PATH=/home/ubuntu/hdf5/install/lib/:/home/ubuntu/fftw/install/lib/ - -echo "destdir = $destdir" -echo "pythonbin = $pythonbin" -echo "bfpspythonpath = $bfpspythonpath" - -# Remove possible previous installation -if [[ -d $destdir ]] ; then - rm -rf $destdir ; -fi - -# Create install path -if [[ ! -d $bfpspythonpath ]] ; then - mkdir -p $bfpspythonpath ; -fi - -# Build -$pythonbin setup.py compile_library --timing-output 1 -# Install -$pythonbin setup.py install --prefix=$destdir - -# Test -ls $destdir -ls $destdir/bin/ - -$pythonbin $destdir/bin/bfps.test_NSVEparticles - -# Clean -if [[ -d $destdir ]] ; then - rm -rf $destdir ; -fi +echo "please check VM before turning tests back on" + +## Init +#export destdir=$(pwd)"/ci-installdir" +#export pythonbin=/home/ubuntu/anaconda3/bin/python3 +#export bfpspythonpath=$destdir/lib/python3.6/site-packages/ +#export PYTHONPATH=:$bfpspythonpath$PYTHONPATH +#export PATH=$destdir/bin/:/home/ubuntu/hdf5/install/bin/:$PATH +#export LD_LIBRARY_PATH=/home/ubuntu/hdf5/install/lib/:/home/ubuntu/fftw/install/lib/ +# +#echo "destdir = $destdir" +#echo "pythonbin = $pythonbin" +#echo "bfpspythonpath = $bfpspythonpath" +# +## Remove possible previous installation +#if [[ -d $destdir ]] ; then +# rm -rf $destdir ; +#fi +# +## Create install path +#if [[ ! -d $bfpspythonpath ]] ; then +# mkdir -p $bfpspythonpath ; +#fi +# +## Build +#$pythonbin setup.py compile_library --timing-output 1 +## Install +#$pythonbin setup.py install --prefix=$destdir +# +## Test +#ls $destdir +#ls $destdir/bin/ +# +#$pythonbin $destdir/bin/bfps.test_fftw +# +#$pythonbin $destdir/bin/bfps.test_Parseval +# +#$pythonbin $destdir/bin/bfps.test_NSVEparticles +# +## Clean +#if [[ -d $destdir ]] ; then +# rm -rf $destdir ; +#fi diff --git a/tests/misc/makefile b/tests/misc/makefile new file mode 100644 index 0000000000000000000000000000000000000000..d44b9f04a10bdcb46fcf88ec9c858832c3c6e4df --- /dev/null +++ b/tests/misc/makefile @@ -0,0 +1,15 @@ +test_fftw: test_fftw.c + mpicc \ + -DFFTW_PLAN_RIGOR=FFTW_ESTIMATE \ + -I/stuff/ext_installs/include \ + -fopenmp \ + test_fftw.c \ + -o test_fftw \ + -L/stuff/ext_installs/lib \ + -lfftw3_mpi \ + -lfftw3 \ + -lfftw3f_mpi \ + -lfftw3f \ + -lfftw3_threads \ + -lfftw3f_threads \ + -lm diff --git a/tests/misc/pow_overflow.cpp b/tests/misc/pow_overflow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48cc8aaeff5b7cacb9f9175970eeeaf2112d299f --- /dev/null +++ b/tests/misc/pow_overflow.cpp @@ -0,0 +1,30 @@ +#include <cfenv> +#include <cmath> +#include <iostream> +#include <limits> + +int main() +{ + feenableexcept(FE_ALL_EXCEPT); + double p0 = 3.54; + double p4 = 122; + double p5 = 0.836; + double ell = 1.0; + double result = 0.; + double argument = 0.; + + for (int k = 0; k<128; k++) + { + argument = p0*k*ell; + // double exponent = p4*pow(ell, p5); + // //if (exponent*log(argument) <2*std::numeric_limits<double>::min()) + // // result = 0.; + // //else + // //{ + // // double result0 = pow(p0*argument, p4*pow(ell, p5)); + // // result = exp(-0.5*result0); + // //} + } + std::cout << argument << std::endl; + return 0; +} diff --git a/tests/misc/run.sh b/tests/misc/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..ada649cac9355848e903c19778785aaf28a935fd --- /dev/null +++ b/tests/misc/run.sh @@ -0,0 +1,2 @@ +make +mpirun -np 2 -x OMP_NUM_THREADS=1 test_fftw diff --git a/tests/misc/test_fftw.c b/tests/misc/test_fftw.c new file mode 100644 index 0000000000000000000000000000000000000000..af9fef7b6564bdc4b5b3db0908cda43ec3dd9945 --- /dev/null +++ b/tests/misc/test_fftw.c @@ -0,0 +1,341 @@ +#include <fftw3-mpi.h> +#include <omp.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <math.h> + +#ifndef FFTW_PLAN_RIGOR + +#define FFTW_PLAN_RIGOR FFTW_ESTIMATE + +#endif + +//#define NO_FFTWOMP + +#define NX 36 +#define NY 36 +#define NZ 12 + +const int nx = NX; +const int ny = NY; +const int nz = NZ; +const int npoints = NX*NY*NZ; + +const double dkx = 1.0; +const double dky = 1.0; +const double dkz = 1.0; + +int myrank, nprocs; + +int main( + int argc, + char *argv[]) +{ + //////////////////////////////////// + /* initialize MPI environment */ +#ifdef NO_FFTWOMP + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + fftw_mpi_init(); + fftwf_mpi_init(); + printf("There are %d processes\n", nprocs); +#else + int mpiprovided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &mpiprovided); + assert(mpiprovided >= MPI_THREAD_FUNNELED); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + const int nThreads = omp_get_max_threads(); + printf("Number of threads for the FFTW = %d\n", + nThreads); + if (nThreads > 1){ + fftw_init_threads(); + fftwf_init_threads(); + } + fftw_mpi_init(); + fftwf_mpi_init(); + printf("There are %d processes and %d threads\n", + nprocs, + nThreads); + if (nThreads > 1){ + fftw_plan_with_nthreads(nThreads); + fftwf_plan_with_nthreads(nThreads); + } +#endif + + //////////////////////////////////// + /* do useful work */ + + // declarations + ptrdiff_t nfftw[3]; + ptrdiff_t tmp_local_size; + ptrdiff_t local_n0, local_0_start; + ptrdiff_t local_n1, local_1_start; + ptrdiff_t local_size; + ptrdiff_t ix, iy, iz; + ptrdiff_t jx, jy, jz; + ptrdiff_t rindex, cindex; + int cc; + float *data0, *data; + fftwf_complex *cdata; + double L2norm0, L2norm1, L2norm2, L2normk; + double local_L2norm0, local_L2norm1; + fftwf_plan c2r_plan, r2c_plan; + double *kx, *ky, *kz; + + // get sizes + nfftw[0] = nz; + nfftw[1] = ny; + nfftw[2] = nx; + tmp_local_size = fftwf_mpi_local_size_many_transposed( + 3, nfftw, 3, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, MPI_COMM_WORLD, + &local_n0, &local_0_start, + &local_n1, &local_1_start); + + local_size = local_n1 * nz * nx * 3 * 2; + + // allocate field + data = fftwf_alloc_real( + local_size); + data0 = fftwf_alloc_real( + local_size); + cdata = (fftwf_complex*)(data); + + c2r_plan = fftwf_mpi_plan_many_dft_c2r( + 3, nfftw, 3, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + cdata, + data, + MPI_COMM_WORLD, + FFTW_PLAN_RIGOR | FFTW_MPI_TRANSPOSED_IN); + + r2c_plan = fftwf_mpi_plan_many_dft_r2c( + 3, nfftw, 3, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + data, + cdata, + MPI_COMM_WORLD, + FFTW_PLAN_RIGOR | FFTW_MPI_TRANSPOSED_OUT); + + kx = (double*)malloc(sizeof(double)*(nx/2+1)); + ky = (double*)malloc(sizeof(double)*local_n1); + kz = (double*)malloc(sizeof(double)*nz); + + // generate wavenumbers + for (jy = 0; jy < local_n1; jy++) + { + if (jy + local_1_start <= ny/2) + ky[jy] = dky*(jy + local_1_start); + else + ky[jy] = dky*((jy + local_1_start) - ny); } + for (jz = 0; jz < nz; jz++) + { + if (jz <= nz/2) + kz[jz] = dkz*jz; + else + kz[jz] = dkz*(jz - nz); + } + for (jx = 0; jx < nx/2+1; jx++) + { + kx[jx] = dkx*jx; + } + + // fill field with random numbers + // I'm generating cindex the stupid way, but we can also use + // cindex = (jy*nz + jz)*(nx/2+1) + jx + cindex = 0; + for (jy = 0; jy < local_n1; jy++) + for (jz = 0; jz < nz; jz++) + { + for (jx = 0; jx < nx/2+1; jx++) + { + double k2 = (kx[jx]*kx[jx] + + ky[jy]*ky[jy] + + kz[jz]*kz[jz]); + if (jx == 0 && (jy + local_1_start) == 0 && jz == 0) + k2 = dkx*dkx + dky*dky + dkz*dkz; + for (cc = 0; cc<3; cc++) + { + cdata[cindex*3+cc][0] = (drand48()-0.5) / sqrt(k2); + cdata[cindex*3+cc][1] = (drand48()-0.5) / sqrt(k2); + } + cindex++; + } + } + + // go back and forth so that the + // Fourier space representation is properly symmetrized + fftwf_execute(c2r_plan); + fftwf_execute(r2c_plan); + // normalize, compute Fourier space L2 norm + cindex = 0; + local_L2norm0 = 0; + for (jy = 0; jy < local_n1; jy++) + for (jz = 0; jz < nz; jz++) + { + for (cc = 0; cc<3; cc++) + { + cdata[cindex*3+cc][0] /= npoints; + cdata[cindex*3+cc][1] /= npoints; + local_L2norm0 += (cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] + + cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]); + } + cindex++; + for (jx = 1; jx < nx/2+1; jx++) + { + for (cc = 0; cc<3; cc++) + { + cdata[cindex*3+cc][0] /= npoints; + cdata[cindex*3+cc][1] /= npoints; + local_L2norm0 += 2*(cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] + + cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]); + } + cindex++; + } + } + MPI_Allreduce( + &local_L2norm0, + &L2normk, + 1, + MPI_DOUBLE, + MPI_SUM, + MPI_COMM_WORLD); + L2normk = sqrt(L2normk); + + // go to real space + fftwf_execute(c2r_plan); + + // rindex = (iz*ny + iy)*(nx+2) + ix + rindex = 0; + local_L2norm0 = 0; + for (iz = 0; iz < local_n0; iz++) + for (iy = 0; iy < ny; iy++) + { + for (ix = 0; ix < nx; ix++) + { + for (cc = 0; cc<3; cc++) + { + local_L2norm0 += data[rindex*3+cc]*data[rindex*3+cc]; + } + rindex++; + } + for (ix = nx; ix < nx+2; ix++) + { + rindex++; + } + } + MPI_Allreduce( + &local_L2norm0, + &L2norm1, + 1, + MPI_DOUBLE, + MPI_SUM, + MPI_COMM_WORLD); + L2norm1 = sqrt(L2norm1 / npoints); + + //fftwf_execute(r2c_plan); + + //cindex = 0; + //local_L2norm0 = 0; + //for (jy = 0; jy < local_n1; jy++) + // for (jz = 0; jz < nz; jz++) + // { + // for (cc = 0; cc<3; cc++) + // { + // local_L2norm0 += (cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] + + // cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]); + // } + // cindex++; + // // I am not adding the energy from mode nx/2 as a matter of principle. + // for (jx = 1; jx < nx/2+1; jx++) + // { + // for (cc = 0; cc<3; cc++) + // { + // local_L2norm0 += 2*(cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] + + // cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]); + // } + // cindex++; + // } + // } + //MPI_Allreduce( + // &local_L2norm0, + // &L2normk, + // 1, + // MPI_DOUBLE, + // MPI_SUM, + // MPI_COMM_WORLD); + //L2normk = sqrt(L2normk) / (nx*ny*nz); + //fftwf_execute(c2r_plan); + + //// normalize + //rindex = 0; + //local_L2norm0 = 0; + //local_L2norm1 = 0; + //for (iz = 0; iz < local_n0; iz++) + // for (iy = 0; iy < ny; iy++) + // { + // for (ix = 0; ix < nx; ix++) + // { + // for (cc = 0; cc<3; cc++) + // { + // data[rindex*3+cc] /= (nx*ny*nz); + // local_L2norm0 += data[rindex*3+cc]*data[rindex*3+cc]; + // local_L2norm1 += ((data0[rindex*3+cc] - data[rindex*3+cc])* + // (data0[rindex*3+cc] - data[rindex*3+cc])); + // } + // rindex++; + // } + // for (ix = nx; ix < nx+2; ix++) + // { + // rindex++; + // } + // } + //MPI_Allreduce( + // &local_L2norm0, + // &L2norm1, + // 1, + // MPI_DOUBLE, + // MPI_SUM, + // MPI_COMM_WORLD); + //MPI_Allreduce( + // &local_L2norm1, + // &L2norm2, + // 1, + // MPI_DOUBLE, + // MPI_SUM, + // MPI_COMM_WORLD); + //L2norm1 = sqrt(L2norm1 / (nx*ny*nz)); + //L2norm2 = sqrt(L2norm2 / (nx*ny*nz)); + + printf("FFTW_PLAN_RIGOR=%d\n", FFTW_PLAN_RIGOR); + printf("L2normk = %g, L2norm1 = %g, relative error = %g\n", + L2normk, L2norm1, fabs(L2normk - L2norm1) / (L2normk)); + + // deallocate + fftwf_destroy_plan(r2c_plan); + fftwf_destroy_plan(c2r_plan); + fftwf_free(data); + fftwf_free(data0); + free(kx); + free(ky); + free(kz); + + //////////////////////////////////// + /* clean up */ + fftwf_mpi_cleanup(); + fftw_mpi_cleanup(); + +#ifndef NO_FFTWOMP + if (nThreads > 1){ + fftw_cleanup_threads(); + fftwf_cleanup_threads(); + } +#endif + + MPI_Finalize(); + return EXIT_SUCCESS; +} + diff --git a/tests/run_all_tests.sh b/tests/run_all_tests.sh new file mode 100644 index 0000000000000000000000000000000000000000..7865a8a3d9a3b7d56194b0dcda2bc24925aaeafd --- /dev/null +++ b/tests/run_all_tests.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e + +bfps.test_fftw +bfps.test_Parseval +bfps.test_NSVEparticles + +# test postprocessing +bfps PP field_single_to_double --simname dns_nsveparticles --iter0 32 --iter1 32 +bfps PP get_rfields --simname dns_nsveparticles --iter0 0 --iter1 64 +bfps PP joint_acc_vel_stats --simname dns_nsveparticles --iter0 0 --iter1 64 +bfps PP resize --simname dns_nsveparticles --new_nx 96 --new_ny 96 --new_nz 96 --new_simname dns_nsveparticles_resized diff --git a/tests/test_io_03_run.py b/tests/test_io_03_run.py index a789ac66fd99d8e5525ce69b1e861f609d969212..5b4905ba8973299b44a3dd7ef5f9fa07294e7a8b 100644 --- a/tests/test_io_03_run.py +++ b/tests/test_io_03_run.py @@ -35,5 +35,5 @@ if __name__ == '__main__': c.write_src() c.write_par() c.set_host_info(bfps.host_info) - c.run() + c.run(opt.ncpu, 1) diff --git a/tests/test_plain.py b/tests/test_plain.py deleted file mode 100644 index ad30224f869fc724758cc95d8b9e10da7b4ca2d4..0000000000000000000000000000000000000000 --- a/tests/test_plain.py +++ /dev/null @@ -1,156 +0,0 @@ -#! /usr/bin/env python3 -####################################################################### -# # -# Copyright 2015 Max Planck Institute # -# for Dynamics and Self-Organization # -# # -# This file is part of bfps. # -# # -# bfps is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published # -# by the Free Software Foundation, either version 3 of the License, # -# or (at your option) any later version. # -# # -# bfps is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with bfps. If not, see <http://www.gnu.org/licenses/> # -# # -# Contact: Cristian.Lalescu@ds.mpg.de # -# # -####################################################################### - - - -#from base import * -import bfps -from bfps.tools import particle_finite_diff_test as acceleration_test -import sys - -import numpy as np -import matplotlib.pyplot as plt - -#parser.add_argument('--multiplejob', -# dest = 'multiplejob', action = 'store_true') -# -#parser.add_argument( -# '--particle-class', -# default = 'particles', -# dest = 'particle_class', -# type = str) -# -#parser.add_argument( -# '--interpolator-class', -# default = 'interpolator', -# dest = 'interpolator_class', -# type = str) - -class NSPlain(bfps.NavierStokes): - def specific_parser_arguments( - self, - parser): - bfps.NavierStokes.specific_parser_arguments(self, parser) - parser.add_argument( - '--particle-class', - default = 'rFFTW_distributed_particles', - dest = 'particle_class', - type = str) - parser.add_argument( - '--interpolator-class', - default = 'rFFTW_interpolator', - dest = 'interpolator_class', - type = str) - parser.add_argument('--neighbours', - type = int, - dest = 'neighbours', - default = 3) - parser.add_argument('--smoothness', - type = int, - dest = 'smoothness', - default = 2) - return None - def launch( - self, - args = [], - **kwargs): - opt = self.prepare_launch(args = args) - self.fill_up_fluid_code() - if type(opt.nparticles) == int: - if opt.nparticles > 0: - self.add_3D_rFFTW_field( - name = 'rFFTW_acc') - self.add_interpolator( - name = 'spline', - neighbours = opt.neighbours, - smoothness = opt.smoothness, - class_name = opt.interpolator_class) - self.add_particles( - kcut = ['fs->kM/2', 'fs->kM/3'], - integration_steps = 3, - interpolator = 'spline', - class_name = opt.particle_class) - self.add_particles( - integration_steps = [2, 3, 4, 6], - interpolator = 'spline', - acc_name = 'rFFTW_acc', - class_name = opt.particle_class) - self.finalize_code() - self.launch_jobs(opt = opt) - return None - -def plain(args): - wd = opt.work_dir - opt.work_dir = wd + '/N{0:0>3x}_1'.format(opt.n) - c0 = launch(opt, dt = 0.2/opt.n, - particle_class = opt.particle_class, - interpolator_class = opt.interpolator_class) - c0.compute_statistics() - print ('Re = {0:.0f}'.format(c0.statistics['Re'])) - print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda'])) - print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK'])) - print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK'])) - print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta'])) - for s in range(c0.particle_species): - acceleration_test(c0, species = s, m = 1) - if not opt.multiplejob: - return None - assert(opt.niter_todo % 3 == 0) - opt.work_dir = wd + '/N{0:0>3x}_2'.format(opt.n) - opt.njobs *= 2 - opt.niter_todo = opt.niter_todo//2 - c1 = launch(opt, dt = c0.parameters['dt'], - particle_class = opt.particle_class, - interpolator_class = opt.interpolator_class) - c1.compute_statistics() - opt.work_dir = wd + '/N{0:0>3x}_3'.format(opt.n) - opt.njobs = 3*opt.njobs//2 - opt.niter_todo = 2*opt.niter_todo//3 - c2 = launch(opt, dt = c0.parameters['dt'], - particle_class = opt.particle_class, - interpolator_class = opt.interpolator_class) - c2.compute_statistics() - compare_stats(opt, c0, c1) - compare_stats(opt, c0, c2) - return None - -if __name__ == '__main__': - c0 = NSPlain() - c0.launch( - ['-n', '32', - '--ncpu', '4', - '--nparticles', '1000', - '--niter_todo', '48', - '--wd', 'data/single'] + - sys.argv[1:]) - c0.compute_statistics() - print ('Re = {0:.0f}'.format(c0.statistics['Re'])) - print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda'])) - print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK'])) - print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK'])) - print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta'])) - for s in range(c0.particle_species): - acceleration_test(c0, species = s, m = 1) - diff --git a/tests/test_vorticity_equation.py b/tests/test_vorticity_equation.py index dfaccb8bf352bdd252e5edf29f6e7d711689f7dc..e492bfa5c75d0f2f2b9989cccef49964b8bc90b4 100644 --- a/tests/test_vorticity_equation.py +++ b/tests/test_vorticity_equation.py @@ -273,12 +273,13 @@ def main(): particle_initial_condition[..., 2] = yvals[None, :, None] particle_initial_condition = particle_initial_condition.reshape(-1, 3) nparticles = nparticles**2 - c = bfps.NavierStokes(simname = 'fluid_solver') + c = bfps.DNS(simname = 'fluid_solver') if run_NS: run_NSVE = True subprocess.call('rm *fluid_solver* NavierStokes*', shell = True) c.launch( - ['-n', '32', + ['NSVE', + '-n', '32', '--simname', 'fluid_solver', '--ncpu', '4', '--niter_todo', '{0}'.format(niterations), @@ -298,9 +299,10 @@ def main(): f = h5py.File('vorticity_equation_checkpoint_0.h5', 'w') f['vorticity/complex/0'] = data f.close() - c = bfps.NSVorticityEquation() + c = bfps.DNS() c.launch( - ['-n', '32', + ['NSVEparticles', + '-n', '32', '--simname', 'vorticity_equation', '--np', '4', '--ntpp', '1',