diff --git a/.clang-format b/.clang-format index 5b53d80d4c201a794b933a02d5772baa83b9d6cb..a1b8305ab8beda3b2e75ae6a1bf3bbf8c859dbc9 100644 --- a/.clang-format +++ b/.clang-format @@ -9,5 +9,6 @@ AlignConsecutiveAssignments: true ColumnLimit: 135 ReflowComments: true SortUsingDeclarations: true +IncludeBlocks: Preserve ... diff --git a/Makefile b/Makefile index aa7f2b23260cea448b9a2a3e6368d18f7735c915..1fb6676a0ff96aa3f03660ca7a9cdfc5ba5e089a 100644 --- a/Makefile +++ b/Makefile @@ -94,7 +94,7 @@ $(info EXEC: $(EXEC)) $(info ) -PYTHON = /usr/bin/python +PYTHON = python RESULT := $(shell CONFIG=$(CONFIG) PYTHON=$(PYTHON) BUILD_DIR=$(BUILD_DIR) SRC_DIR=$(SRC_DIR) CURDIR=$(CURDIR) make -f buildsystem/Makefile.config) $(info $(RESULT)) @@ -440,6 +440,12 @@ GSL_LIBS += -lgsl -lgslcblas HDF5_LIBS += -lhdf5 -lz MATH_LIBS = -lm +ifneq ($(SYSTYPE),"Darwin") +ifeq (ALLOCATE_SHARED_MEMORY_VIA_POSIX,$(findstring ALLOCATE_SHARED_MEMORY_VIA_POSIX,$(CONFIGVARS))) +SHMEM_LIBS = -lrt +endif +endif + MAKEFILES = $(MAKEFILE_LIST) buildsystem/Makefile.config ########################## @@ -448,7 +454,7 @@ MAKEFILES = $(MAKEFILE_LIST) buildsystem/Makefile.config CFLAGS = $(OPTIMIZE) $(OPT) $(HDF5_INCL) $(GSL_INCL) $(FFTW_INCL) $(HWLOC_INCL) $(VTUNE_INCL) $(MAPS_INCL) -I$(BUILD_DIR) -I$(SRC_DIR) -LIBS = $(MATH_LIBS) $(HDF5_LIBS) $(GSL_LIBS) $(FFTW_LIBS) $(HWLOC_LIBS) $(VTUNE_LIBS) $(TEST_LIBS) $(MAPS_LIBS) +LIBS = $(MATH_LIBS) $(HDF5_LIBS) $(GSL_LIBS) $(FFTW_LIBS) $(HWLOC_LIBS) $(VTUNE_LIBS) $(TEST_LIBS) $(MAPS_LIBS) $(SHMEM_LIBS) SUBDIRS := $(addprefix $(BUILD_DIR)/,$(SUBDIRS)) @@ -501,6 +507,8 @@ $(BUILD_DIR)/compile_time_info.o: $(BUILD_DIR)/compile_time_info.cc $(MAKEFILES) $(BUILD_DIR)/compile_time_info_hdf5.o: $(BUILD_DIR)/compile_time_info_hdf5.cc $(MAKEFILES) $(CPP) $(CFLAGS) -c $< -o $@ +$(BUILD_DIR)/version.o: $(BUILD_DIR)/version.cc $(MAKEFILES) + $(CPP) $(CFLAGS) -c $< -o $@ check: $(CONFIG_CHECK) diff --git a/Template-Config.sh b/Template-Config.sh index a6d557c80cfc1b12c9ba968df6c524bb0ac97744..6141e83226932b39f41efef61fe9db2c0156c3bd 100644 --- a/Template-Config.sh +++ b/Template-Config.sh @@ -167,8 +167,10 @@ INITIAL_CONDITIONS_CONTAIN_ENTROPY #LIGHTCONE_PARTICLES # produces particle lightcones #LIGHTCONE_MASSMAPS # produces mass shells on the lightcone #LIGHTCONE_PARTICLES_GROUPS # computes groups for particles buffered on the lightcone +#LIGHTCONE_PARTICLES_SKIP_SAVING # prevents that particle data is saved along with the found groups on the lightcone #LIGHTCONE_OUTPUT_ACCELERATIONS # stores accelerations for particles on lightcone #LIGHTCONE_IMAGE_COMP_HSML_VELDISP # option for computing densities and smoothing length for lightcones in postprocessing +#LIGHTCONE_MULTIPLE_ORIGINS # switch this on if you want to be able to define lightcone origins different from (0,0,0) #REARRANGE_OPTION # special option to reorder lightcone data in mergertree order @@ -202,7 +204,9 @@ INITIAL_CONDITIONS_CONTAIN_ENTROPY #USE_MPIALLTOALLV_IN_DOMAINDECOMP # replaces hypercube communication in domain particle exchance with a single MPI_Allgatherv (can be less stable) #MPI_HYPERCUBE_ALLGATHERV # if your MPI-library uses too much internal storage for MPI_Allgatherv, this uses a hypercube as a work-around #MPI_MESSAGE_SIZELIMIT_IN_MB=200 # limit the message size of very large MPI transfers +#MPI_HYPERCUBE_ALLTOALL # use a robust hyercube for MPI_Alltoall instead the native algorithm if the MPI library #ISEND_IRECV_IN_DOMAIN # uses asynchronous communication instead of synchronous communication in hypercube pattern (can be less stable) +#ALLOCATE_SHARED_MEMORY_VIA_POSIX # if this is set, do use POSIX directly to allocated shared memory instead of MPI-3 calls #---------------------------------------- Testing and Debugging options diff --git a/buildsystem/check.py b/buildsystem/check.py index 1aaf4188c0774cab1d27885cabef69355381984c..eef684577dd7fd3281e0ed520b41c173fa3f6af8 100644 --- a/buildsystem/check.py +++ b/buildsystem/check.py @@ -49,10 +49,21 @@ def parseIf(string, defines, fin): def filter_code(fin): defines = set() + first_encountered = False + line = fin.readline() while line != "": s = line.lstrip() - #print s + + if s.startswith("#include"): + if first_encountered == False: + m = re.search("gadgetconfig",s) + if m is not None: + first_encountered = True + else: + print("First header file included ('%s') is not gadgetconfig.h -- please change this\n"%s.rstrip()) + exit(1) + if s.startswith("#if "): parseIf(s[4:],defines,fin) elif s.startswith("#elseif "): @@ -288,7 +299,7 @@ def check_parameters(fin, fout, fdoc): write(used,fout) exit(0) - + if __name__ == "__main__": if len(sys.argv) < 3: exit(1) @@ -333,5 +344,4 @@ if __name__ == "__main__": doc = open(sys.argv[4],'r') check_parameters(fin, fout, doc) - - + diff --git a/buildsystem/config.py b/buildsystem/config.py index 642e2b57410694985c1989661e8b3d51e618472f..0f291e7449dc02ad26402e44b47567b55baeb413 100644 --- a/buildsystem/config.py +++ b/buildsystem/config.py @@ -32,11 +32,12 @@ def out1(options, fname): def out2(options, fname): f = open(fname, "w") - str = """#include <mpi.h> + str = """ +#include \"gadgetconfig.h\" +#include <mpi.h> #include <stdio.h> #include <stdlib.h> #include <math.h> -#include \"gadgetconfig.h\" #include \"data/dtypes.h\" #include \"data/allvars.h\" #include \"main/main.h\" @@ -61,14 +62,15 @@ printf( def out3(options, fname): f = open(fname, "w") - str = """#include <mpi.h> + str = """ +#include \"gadgetconfig.h\" +#include <mpi.h> #include <stdio.h> #include <hdf5.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <math.h> -#include \"gadgetconfig.h\" #include \"data/constants.h\" #include \"data/dtypes.h\" #include \"data/macros.h\" diff --git a/data/TREECOOL b/data/TREECOOL new file mode 100755 index 0000000000000000000000000000000000000000..730a1f8b997e59448606759bf4e4f219903669c1 --- /dev/null +++ b/data/TREECOOL @@ -0,0 +1,171 @@ + 0.000 3.03516e-14 1.37296e-14 3.04873e-16 1.74434e-25 1.76233e-25 1.00198e-26 + 0.005 3.20557e-14 1.47386e-14 3.14717e-16 1.85463e-25 1.87090e-25 1.03701e-26 + 0.010 3.37379e-14 1.57232e-14 3.24518e-16 1.96306e-25 1.97710e-25 1.07195e-26 + 0.015 3.54076e-14 1.66914e-14 3.34310e-16 2.07032e-25 2.08173e-25 1.10691e-26 + 0.020 3.70746e-14 1.76519e-14 3.44133e-16 2.17717e-25 2.18565e-25 1.14202e-26 + 0.025 3.87497e-14 1.86137e-14 3.54027e-16 2.28440e-25 2.28979e-25 1.17740e-26 + 0.030 4.04442e-14 1.95867e-14 3.64036e-16 2.39288e-25 2.39514e-25 1.21319e-26 + 0.035 4.21704e-14 2.05814e-14 3.74207e-16 2.50352e-25 2.50277e-25 1.24953e-26 + 0.040 4.39415e-14 2.16092e-14 3.84589e-16 2.61733e-25 2.61381e-25 1.28660e-26 + 0.045 4.57713e-14 2.26820e-14 3.95237e-16 2.73534e-25 2.72948e-25 1.32455e-26 + 0.050 4.76748e-14 2.38127e-14 4.06206e-16 2.85868e-25 2.85108e-25 1.36356e-26 + 0.055 4.96649e-14 2.50126e-14 4.17547e-16 2.98833e-25 2.97975e-25 1.40379e-26 + 0.060 5.17433e-14 2.62842e-14 4.29269e-16 3.12445e-25 3.11577e-25 1.44526e-26 + 0.065 5.39090e-14 2.76279e-14 4.41369e-16 3.26695e-25 3.25918e-25 1.48795e-26 + 0.070 5.61603e-14 2.90436e-14 4.53847e-16 3.41576e-25 3.41001e-25 1.53185e-26 + 0.075 5.84958e-14 3.05314e-14 4.66697e-16 3.57078e-25 3.56831e-25 1.57694e-26 + 0.080 6.09147e-14 3.20921e-14 4.79923e-16 3.73198e-25 3.73418e-25 1.62321e-26 + 0.085 6.34210e-14 3.37301e-14 4.93541e-16 3.89966e-25 3.90806e-25 1.67070e-26 + 0.090 6.60202e-14 3.54508e-14 5.07577e-16 4.07420e-25 4.09053e-25 1.71947e-26 + 0.095 6.87178e-14 3.72597e-14 5.22055e-16 4.25603e-25 4.28216e-25 1.76960e-26 + 0.100 7.15197e-14 3.91629e-14 5.37001e-16 4.44557e-25 4.48359e-25 1.82115e-26 + 0.105 7.44316e-14 4.11660e-14 5.52443e-16 4.64323e-25 4.69541e-25 1.87419e-26 + 0.110 7.74567e-14 4.32728e-14 5.68396e-16 4.84924e-25 4.91808e-25 1.92875e-26 + 0.115 8.05977e-14 4.54866e-14 5.84877e-16 5.06376e-25 5.15205e-25 1.98486e-26 + 0.120 8.38574e-14 4.78108e-14 6.01902e-16 5.28699e-25 5.39777e-25 2.04256e-26 + 0.125 8.72386e-14 5.02490e-14 6.19488e-16 5.51911e-25 5.65571e-25 2.10187e-26 + 0.130 9.07449e-14 5.28052e-14 6.37654e-16 5.76035e-25 5.92640e-25 2.16283e-26 + 0.135 9.43816e-14 5.54857e-14 6.56428e-16 6.01112e-25 6.21054e-25 2.22550e-26 + 0.140 9.81554e-14 5.82973e-14 6.75838e-16 6.27188e-25 6.50889e-25 2.28995e-26 + 0.145 1.02073e-13 6.12473e-14 6.95918e-16 6.54310e-25 6.82228e-25 2.35627e-26 + 0.150 1.06141e-13 6.43433e-14 7.16699e-16 6.82530e-25 7.15153e-25 2.42451e-26 + 0.155 1.10366e-13 6.75924e-14 7.38214e-16 7.11896e-25 7.49749e-25 2.49477e-26 + 0.160 1.14754e-13 7.09995e-14 7.60494e-16 7.42442e-25 7.86090e-25 2.56710e-26 + 0.165 1.19310e-13 7.45687e-14 7.83569e-16 7.74196e-25 8.24246e-25 2.64158e-26 + 0.170 1.24039e-13 7.83043e-14 8.07471e-16 8.07192e-25 8.64293e-25 2.71828e-26 + 0.175 1.28947e-13 8.22108e-14 8.32231e-16 8.41460e-25 9.06308e-25 2.79726e-26 + 0.180 1.34040e-13 8.62930e-14 8.57885e-16 8.77036e-25 9.50374e-25 2.87861e-26 + 0.185 1.39325e-13 9.05574e-14 8.84475e-16 9.13966e-25 9.96583e-25 2.96241e-26 + 0.190 1.44808e-13 9.50112e-14 9.12045e-16 9.52302e-25 1.04503e-24 3.04876e-26 + 0.195 1.50497e-13 9.96618e-14 9.40641e-16 9.92097e-25 1.09583e-24 3.13776e-26 + 0.200 1.56400e-13 1.04517e-13 9.70311e-16 1.03341e-24 1.14907e-24 3.22951e-26 + 0.205 1.62524e-13 1.09584e-13 1.00111e-15 1.07629e-24 1.20488e-24 3.32411e-26 + 0.210 1.68878e-13 1.14873e-13 1.03309e-15 1.12079e-24 1.26336e-24 3.42171e-26 + 0.215 1.75471e-13 1.20392e-13 1.06634e-15 1.16696e-24 1.32463e-24 3.52245e-26 + 0.220 1.82312e-13 1.26150e-13 1.10090e-15 1.21487e-24 1.38882e-24 3.62649e-26 + 0.225 1.89411e-13 1.32158e-13 1.13687e-15 1.26456e-24 1.45604e-24 3.73398e-26 + 0.230 1.96778e-13 1.38425e-13 1.17430e-15 1.31609e-24 1.52642e-24 3.84509e-26 + 0.235 2.04420e-13 1.44957e-13 1.21328e-15 1.36952e-24 1.60010e-24 3.95998e-26 + 0.240 2.12348e-13 1.51762e-13 1.25387e-15 1.42491e-24 1.67719e-24 4.07880e-26 + 0.245 2.20569e-13 1.58846e-13 1.29613e-15 1.48232e-24 1.75781e-24 4.20173e-26 + 0.250 2.29093e-13 1.66216e-13 1.34015e-15 1.54182e-24 1.84212e-24 4.32893e-26 + 0.255 2.37931e-13 1.73879e-13 1.38603e-15 1.60347e-24 1.93024e-24 4.46063e-26 + 0.260 2.47094e-13 1.81844e-13 1.43393e-15 1.66735e-24 2.02234e-24 4.59720e-26 + 0.265 2.56597e-13 1.90119e-13 1.48409e-15 1.73355e-24 2.11859e-24 4.73907e-26 + 0.270 2.66456e-13 1.98715e-13 1.53671e-15 1.80214e-24 2.21919e-24 4.88670e-26 + 0.275 2.76684e-13 2.07640e-13 1.59204e-15 1.87323e-24 2.32432e-24 5.04058e-26 + 0.280 2.87295e-13 2.16903e-13 1.65028e-15 1.94689e-24 2.43415e-24 5.20114e-26 + 0.285 2.98297e-13 2.26504e-13 1.71157e-15 2.02315e-24 2.54877e-24 5.36861e-26 + 0.290 3.09693e-13 2.36443e-13 1.77598e-15 2.10204e-24 2.66824e-24 5.54317e-26 + 0.295 3.21489e-13 2.46719e-13 1.84363e-15 2.18357e-24 2.79263e-24 5.72499e-26 + 0.300 3.33687e-13 2.57331e-13 1.91459e-15 2.26777e-24 2.92200e-24 5.91426e-26 + 0.305 3.46292e-13 2.68275e-13 1.98896e-15 2.35463e-24 3.05640e-24 6.11109e-26 + 0.310 3.59296e-13 2.79549e-13 2.06668e-15 2.44414e-24 3.19585e-24 6.31526e-26 + 0.315 3.72693e-13 2.91149e-13 2.14768e-15 2.53628e-24 3.34037e-24 6.52651e-26 + 0.320 3.86474e-13 3.03070e-13 2.23188e-15 2.63101e-24 3.48997e-24 6.74449e-26 + 0.325 4.00628e-13 3.15309e-13 2.31918e-15 2.72828e-24 3.64466e-24 6.96887e-26 + 0.330 4.15148e-13 3.27859e-13 2.40950e-15 2.82807e-24 3.80444e-24 7.19932e-26 + 0.335 4.30037e-13 3.40716e-13 2.50279e-15 2.93035e-24 3.96932e-24 7.43572e-26 + 0.340 4.45297e-13 3.53874e-13 2.59903e-15 3.03515e-24 4.13931e-24 7.67797e-26 + 0.345 4.60935e-13 3.67327e-13 2.69817e-15 3.14243e-24 4.31441e-24 7.92600e-26 + 0.350 4.76953e-13 3.81068e-13 2.80019e-15 3.25221e-24 4.49461e-24 8.17967e-26 + 0.355 4.93350e-13 3.95087e-13 2.90502e-15 3.36442e-24 4.67986e-24 8.43878e-26 + 0.360 5.10092e-13 4.09358e-13 3.01249e-15 3.47887e-24 4.86986e-24 8.70267e-26 + 0.365 5.27139e-13 4.23848e-13 3.12243e-15 3.59530e-24 5.06427e-24 8.97056e-26 + 0.370 5.44447e-13 4.38524e-13 3.23464e-15 3.71346e-24 5.26271e-24 9.24160e-26 + 0.375 5.61966e-13 4.53349e-13 3.34890e-15 3.83305e-24 5.46475e-24 9.51489e-26 + 0.380 5.79656e-13 4.68292e-13 3.46479e-15 3.95381e-24 5.67002e-24 9.78927e-26 + 0.385 5.97501e-13 4.83345e-13 3.58126e-15 4.07565e-24 5.87839e-24 1.00628e-25 + 0.390 6.15495e-13 4.98501e-13 3.69703e-15 4.19851e-24 6.08976e-24 1.03333e-25 + 0.395 6.33628e-13 5.13759e-13 3.81073e-15 4.32233e-24 6.30403e-24 1.05985e-25 + 0.400 6.51892e-13 5.29112e-13 3.92091e-15 4.44704e-24 6.52109e-24 1.08557e-25 + 0.405 6.70266e-13 5.44548e-13 4.02635e-15 4.57250e-24 6.74074e-24 1.11029e-25 + 0.410 6.88697e-13 5.60022e-13 4.12706e-15 4.69834e-24 6.96240e-24 1.13388e-25 + 0.415 7.07118e-13 5.75479e-13 4.22336e-15 4.82413e-24 7.18542e-24 1.15628e-25 + 0.420 7.25457e-13 5.90861e-13 4.31558e-15 4.94937e-24 7.40907e-24 1.17741e-25 + 0.425 7.43639e-13 6.06104e-13 4.40409e-15 5.07356e-24 7.63257e-24 1.19718e-25 + 0.430 7.61593e-13 6.21150e-13 4.48926e-15 5.19622e-24 7.85520e-24 1.21555e-25 + 0.435 7.79283e-13 6.35966e-13 4.57121e-15 5.31709e-24 8.07643e-24 1.23264e-25 + 0.440 7.96680e-13 6.50521e-13 4.65009e-15 5.43595e-24 8.29579e-24 1.24861e-25 + 0.445 8.13752e-13 6.64782e-13 4.72602e-15 5.55253e-24 8.51278e-24 1.26364e-25 + 0.450 8.30465e-13 6.78717e-13 4.79916e-15 5.66661e-24 8.72686e-24 1.27789e-25 + 0.455 8.46779e-13 6.92287e-13 4.86956e-15 5.77787e-24 8.93742e-24 1.29153e-25 + 0.460 8.62630e-13 7.05442e-13 4.93682e-15 5.88589e-24 9.14370e-24 1.30445e-25 + 0.465 8.77947e-13 7.18124e-13 5.00044e-15 5.99020e-24 9.34488e-24 1.31651e-25 + 0.470 8.92654e-13 7.30275e-13 5.05989e-15 6.09029e-24 9.54006e-24 1.32757e-25 + 0.475 9.06670e-13 7.41833e-13 5.11460e-15 6.18564e-24 9.72833e-24 1.33747e-25 + 0.480 9.19911e-13 7.52732e-13 5.16403e-15 6.27569e-24 9.90870e-24 1.34606e-25 + 0.485 9.32296e-13 7.62912e-13 5.20780e-15 6.35990e-24 1.00802e-23 1.35328e-25 + 0.490 9.43739e-13 7.72311e-13 5.24556e-15 6.43770e-24 1.02419e-23 1.35906e-25 + 0.495 9.54150e-13 7.80863e-13 5.27693e-15 6.50848e-24 1.03926e-23 1.36336e-25 + 0.500 9.63436e-13 7.88500e-13 5.30153e-15 6.57161e-24 1.05313e-23 1.36613e-25 + 0.505 9.71506e-13 7.95155e-13 5.31900e-15 6.62650e-24 1.06569e-23 1.36730e-25 + 0.510 9.78299e-13 8.00783e-13 5.32906e-15 6.67274e-24 1.07686e-23 1.36681e-25 + 0.515 9.83755e-13 8.05339e-13 5.33145e-15 6.70994e-24 1.08658e-23 1.36463e-25 + 0.520 9.87817e-13 8.08778e-13 5.32590e-15 6.73772e-24 1.09477e-23 1.36068e-25 + 0.525 9.90421e-13 8.11053e-13 5.31214e-15 6.75566e-24 1.10135e-23 1.35491e-25 + 0.530 9.91509e-13 8.12119e-13 5.28993e-15 6.76339e-24 1.10625e-23 1.34728e-25 + 0.535 9.91034e-13 8.11944e-13 5.25917e-15 6.76061e-24 1.10942e-23 1.33775e-25 + 0.540 9.88954e-13 8.10498e-13 5.21981e-15 6.74704e-24 1.11081e-23 1.32634e-25 + 0.545 9.85225e-13 8.07751e-13 5.17181e-15 6.72241e-24 1.11036e-23 1.31305e-25 + 0.550 9.79804e-13 8.03673e-13 5.11512e-15 6.68643e-24 1.10803e-23 1.29786e-25 + 0.555 9.72656e-13 7.98241e-13 5.04976e-15 6.63889e-24 1.10377e-23 1.28080e-25 + 0.560 9.63786e-13 7.91453e-13 4.97591e-15 6.57980e-24 1.09756e-23 1.26189e-25 + 0.565 9.53210e-13 7.83316e-13 4.89379e-15 6.50925e-24 1.08941e-23 1.24117e-25 + 0.570 9.40948e-13 7.73839e-13 4.80367e-15 6.42734e-24 1.07932e-23 1.21870e-25 + 0.575 9.27023e-13 7.63033e-13 4.70584e-15 6.33420e-24 1.06729e-23 1.19453e-25 + 0.580 9.11479e-13 7.50925e-13 4.60072e-15 6.23011e-24 1.05334e-23 1.16874e-25 + 0.585 8.94420e-13 7.37592e-13 4.48914e-15 6.11574e-24 1.03758e-23 1.14153e-25 + 0.590 8.75971e-13 7.23127e-13 4.37207e-15 5.99193e-24 1.02012e-23 1.11313e-25 + 0.595 8.56270e-13 7.07630e-13 4.25056e-15 5.85960e-24 1.00109e-23 1.08379e-25 + 0.600 8.35464e-13 6.91213e-13 4.12575e-15 5.71971e-24 9.80648e-24 1.05377e-25 + 0.605 8.13689e-13 6.73973e-13 3.99881e-15 5.57316e-24 9.58915e-24 1.02335e-25 + 0.610 7.91014e-13 6.55949e-13 3.87085e-15 5.42035e-24 9.35945e-24 9.92793e-26 + 0.615 7.67495e-13 6.37169e-13 3.74305e-15 5.26160e-24 9.11776e-24 9.62355e-26 + 0.620 7.43195e-13 6.17664e-13 3.61667e-15 5.09725e-24 8.86454e-24 9.32324e-26 + 0.625 7.18183e-13 5.97467e-13 3.49305e-15 4.92770e-24 8.60025e-24 9.03005e-26 + 0.630 6.92510e-13 5.76609e-13 3.37306e-15 4.75324e-24 8.32529e-24 8.74586e-26 + 0.635 6.66160e-13 5.55097e-13 3.25568e-15 4.57383e-24 8.03966e-24 8.46801e-26 + 0.640 6.39103e-13 5.32935e-13 3.13941e-15 4.38935e-24 7.74325e-24 8.19269e-26 + 0.645 6.11305e-13 5.10128e-13 3.02264e-15 4.19968e-24 7.43598e-24 7.91577e-26 + 0.650 5.82733e-13 4.86680e-13 2.90365e-15 4.00470e-24 7.11776e-24 7.63286e-26 + 0.655 5.53398e-13 4.62628e-13 2.78116e-15 3.80458e-24 6.78897e-24 7.34068e-26 + 0.660 5.23475e-13 4.38110e-13 2.65588e-15 3.60049e-24 6.45155e-24 7.04071e-26 + 0.665 4.93190e-13 4.13298e-13 2.52895e-15 3.39394e-24 6.10796e-24 6.73562e-26 + 0.670 4.62781e-13 3.88374e-13 2.40165e-15 3.18652e-24 5.76080e-24 6.42827e-26 + 0.675 4.32506e-13 3.63534e-13 2.27531e-15 2.97993e-24 5.41289e-24 6.12171e-26 + 0.680 4.02608e-13 3.38965e-13 2.15110e-15 2.77581e-24 5.06689e-24 5.81862e-26 + 0.685 3.73229e-13 3.14778e-13 2.02940e-15 2.57509e-24 4.72449e-24 5.51973e-26 + 0.690 3.44498e-13 2.91080e-13 1.91041e-15 2.37866e-24 4.38723e-24 5.22542e-26 + 0.695 3.16553e-13 2.67978e-13 1.79434e-15 2.18744e-24 4.05678e-24 4.93605e-26 + 0.700 2.89539e-13 2.45592e-13 1.68142e-15 2.00239e-24 3.73490e-24 4.65201e-26 + 0.705 2.63589e-13 2.24030e-13 1.57187e-15 1.82444e-24 3.42325e-24 4.37371e-26 + 0.710 2.38785e-13 2.03361e-13 1.46587e-15 1.65414e-24 3.12293e-24 4.10162e-26 + 0.715 2.15194e-13 1.83648e-13 1.36361e-15 1.49198e-24 2.83496e-24 3.83623e-26 + 0.720 1.92888e-13 1.64957e-13 1.26529e-15 1.33849e-24 2.56041e-24 3.57804e-26 + 0.725 1.71944e-13 1.47356e-13 1.17110e-15 1.19420e-24 2.30039e-24 3.32758e-26 + 0.730 1.52421e-13 1.30901e-13 1.08119e-15 1.05956e-24 2.05587e-24 3.08532e-26 + 0.735 1.34328e-13 1.15602e-13 9.95540e-16 9.34625e-25 1.82712e-24 2.85142e-26 + 0.740 1.17654e-13 1.01458e-13 9.14061e-16 8.19349e-25 1.61426e-24 2.62600e-26 + 0.745 1.02391e-13 8.84631e-14 8.36666e-16 7.13671e-25 1.41738e-24 2.40916e-26 + 0.750 8.85234e-14 7.66120e-14 7.63247e-16 6.17513e-25 1.23658e-24 2.20098e-26 + 0.755 7.60273e-14 6.58897e-14 6.93737e-16 5.30720e-25 1.07181e-24 2.00163e-26 + 0.760 6.48482e-14 5.62575e-14 6.28219e-16 4.52939e-25 9.22646e-25 1.81158e-26 + 0.765 5.49201e-14 4.76680e-14 5.66814e-16 3.83742e-25 7.88556e-25 1.63134e-26 + 0.770 4.61705e-14 4.00688e-14 5.09642e-16 3.22657e-25 6.68935e-25 1.46149e-26 + 0.775 3.85199e-14 3.34019e-14 4.56825e-16 2.69166e-25 5.63114e-25 1.30260e-26 + 0.780 3.18847e-14 2.76053e-14 4.08322e-16 2.22719e-25 4.70327e-25 1.15496e-26 + 0.785 2.61848e-14 2.26161e-14 3.63550e-16 1.82784e-25 3.89653e-25 1.01791e-26 + 0.790 2.13351e-14 1.83663e-14 3.21775e-16 1.48789e-25 3.20070e-25 8.90547e-27 + 0.795 1.72430e-14 1.47819e-14 2.82204e-16 1.20115e-25 2.60465e-25 7.71863e-27 + 0.800 1.38082e-14 1.17826e-14 2.43984e-16 9.60883e-26 2.09630e-25 6.60776e-27 + 0.805 1.09348e-14 9.28974e-15 2.06562e-16 7.60575e-26 1.66418e-25 5.56549e-27 + 0.810 8.55931e-15 7.24513e-15 1.70516e-16 5.95623e-26 1.30119e-25 4.59785e-27 + 0.815 6.62143e-15 5.59106e-15 1.36700e-16 4.61541e-26 1.00067e-25 3.71403e-27 + 0.820 5.05572e-15 4.26510e-15 1.06025e-16 3.53472e-26 7.55377e-26 2.92375e-27 + 0.825 3.79149e-15 3.19987e-15 7.94636e-17 2.66172e-26 5.57487e-26 2.23726e-27 + 0.830 2.76009e-15 2.33100e-15 5.76164e-17 1.94610e-26 3.98834e-26 1.65825e-27 + 0.835 1.91144e-15 1.61533e-15 3.97497e-17 1.35327e-26 2.71614e-26 1.16780e-27 + 0.840 1.19633e-15 1.01154e-15 2.47998e-17 8.49865e-27 1.67609e-26 7.41001e-28 + 0.845 5.61822e-16 4.75209e-16 1.16223e-17 4.00000e-27 7.79785e-27 3.51094e-28 + 0.850 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 diff --git a/defines_extra b/defines_extra index efebea56f7b44cb52644382096e4b1d3bc8d45a4..fe5cce55d1c7f950d63620902b0126c35c6ffbe6 100644 --- a/defines_extra +++ b/defines_extra @@ -27,7 +27,6 @@ SYMTENSORS_H FMM_H SUBREADID_IO_H FMM_PRIVATE_H -LGALAXIES_GALSNAP_IO_H IO_H IDSTORAGE_H LIGHTCONE_MASSMAP_IO_H @@ -40,17 +39,13 @@ SPH_H LCPARTDATA_H LIGHTCONE_IO_H FOF_IO_H -LGALAXIES_GALSNAP_FINISH_IO_H SIMPART_H SPHPARTDATA_H SHAREDMEM_H -LGALAXIES_H READTREES_MBOUND_IO_H FOFTREE_H -LGALAXIES_OPTIONS_H GRAVITY_EWALDTENSORS_H SETCOMM_H -LGALAXIES_TREE_IO_H PM_PERIODIC_H PINNING_H IO_STREAMCOUNT_H @@ -146,41 +141,13 @@ ALLOC_TOLERANCE #to be moved to Template COnfig, or removed from code - +NDEBUG FMM_DEBUG CHECK_LOCAL_RANK - -LGALAXIES_COMPUTE_OBS_MAGS -LGALAXIES_HALOPROPERTIES -LGALAXIES_ICL -LGALAXIES_METALS_SELF -LGALAXIES_OUTPUT_MOMAF_INPUTS -LGALAXIES_OUTPUT_REST_MAGS -LGALAXIES_TRACK_BURST -LGALAXIES_GUO10 -LGALAXIES_GUO13 -LGALAXIES_GALAXYTREE -LGALAXIES_MASS_CHECKS -LGALAXIES_OVERWRITE_OUTPUT -LGALAXIES_HENRIQUES13 -LGALAXIES_CHIEFFI -LGALAXIES_DTD -LGALAXIES_PORTINARI -LGALAXIES_KITZBICHLER -LGALAXIES_LIGHT_OUTPUT -LGALAXIES_OUTPUT_L_CONE_INPUTS -LGALAXIES_OUTPUT_OBS_MAGS -LGALAXIES_STAR_FORMATION_HISTORY -LGALAXIES_POST_PROCESS_MAGS -LGALAXIES_PHOTTABLES_PRECOMPUTED -LGALAXIES_SPEC_PHOTABLES_ON_THE_FLY -LGALAXIES_CONTINUOUS_TREES - -COMPUTE_SPECPHOT_PROPERTIES USE_MEMORY_TO_MINIMIZE_IO CORRECT_CIC DIFFERENT_TRANSFER_FUNC MULTICOMPONENTGLASSFILE -NEUTRINOS + diff --git a/documentation/04_config-options.md b/documentation/04_config-options.md index 27441255edea2edceee0915ad5e1658f23af0c8a..1999012347372c6fd7db5dc7aa45efa62b1377e8 100644 --- a/documentation/04_config-options.md +++ b/documentation/04_config-options.md @@ -432,9 +432,11 @@ expensive. This can be used to override the maximum radius out to which the short-range tree-force is evaluated in case the TreePM/FMM-PM -algorithm is used. The default value is 4.5, given in -mesh-cells. Going much beyond 6 should not yield further improvements -in the way the force matching region is treated. +algorithm is used. The conservative default value is 7.0 for this +parameter, given in mesh-cells. Going much beyond 6.0 does however +not yield much further improvement in the way the force matching region +is treated, and reducing this value to 4.5 will give higher performance +while being typically sufficiently accurate for most applications. ------- @@ -1280,6 +1282,14 @@ lightcone particle data before they are written to disk. Requires the ------- +**LIGHTCONE_PARTICLES_SKIP_SAVING** + +In case `LIGHTCONE_PARTICLES_GROUPS` is used, this option can be used to +avoid that actual particle data is saved along with the groups that are +found. + +------- + **LIGHTCONE_IMAGE_COMP_HSML_VELDISP** This special option is only relevant for lightcone image creation, and @@ -1288,6 +1298,16 @@ dispersions. ------- +**LIGHTCONE_MULTIPLE_ORIGINS** + +If this is enabled, origins of lightcones different from (0, 0, 0) can +be defined. Possible origins need to be listed in a separate file with +the name `LightConeOriginsFile`. The light cone definitions file then needs +be augmented with a further number at the end of each lightcone +definition, and this serves as an index into the list of lightcone origins. + +------- + **REARRANGE_OPTION** This option needs to be enabled to allow the rearrange lightcone @@ -1433,7 +1453,7 @@ course, is to increase the number of MPI ranks. This option can be used to replace the default communication pattern used in the domain decomposition (and also in FOF and SUBFIND) which -is based on a hypercube with synchronous MPI_Sendrecv() calls, with a +is based on a hypercube with synchronous myMPI_Sendrecv() calls, with a bunch of asynchronous communications. This should be faster in principle, but it also tends to result in a huge number of simultaneously open communication requests which can also choke the @@ -1451,7 +1471,7 @@ function. This is done when this option is set, and one then effectively hopes that the internal algorithm used by Alltoallv is the most robust and fastest for the communication task at hand. This may be the case, but there is no guarantee for it. The default algorithm -of GADGET-4 (hypercube with synchronous MPI_Sendrecv), which is used +of GADGET-4 (hypercube with synchronous myMPI_Sendrecv), which is used when this option is not used, should always be a reliable alternative, however. @@ -1463,10 +1483,28 @@ Another issue with some MPI-libraries is that they may use quite a bit of internal storage for carrying out MPI_Allgatherv. If this turns out to be a problem, one can set this option. The code will then replace all uses of MPI_Allgatherv() with a simpler communication pattern that -uses hypercubes with MPI_Sendrecv as a work-around. +uses hypercubes with myMPI_Sendrecv as a work-around. ------- +**MPI_HYPERCUBE_ALLTOALL** + +Some MPI libraries tend to be unstable for their myMPI_Alltoall. This is +replacing this with a robust hypercube communication pattern. Not +necessarily the fastest, but very robust, scalable and with decent speed. + +------- + +**ALLOCATE_SHARED_MEMORY_VIA_POSIX** + +If this is set, try to use POSIX directly to allocated shared memory in +the virtual filesystem /dev/shm, instead of relying on the MPI-3 call +MPI_Win_allocate_shared() which on some systems executes in a sluggish +way. + +------- + + Testing and Debugging options {#tests} ============================= diff --git a/documentation/05_parameterfile.md b/documentation/05_parameterfile.md index dd54c285f84890043e5070d913b26f385c108dcb..a031c5d596a1b3362f07649d18bc4796c72b3fb7 100644 --- a/documentation/05_parameterfile.md +++ b/documentation/05_parameterfile.md @@ -1127,6 +1127,16 @@ An example for a lightcone definition file could look like this: This would define a full-sky light cone from z=1 to z=0, and an octant covering positive x>0,y>0,z>0 from redshift z=1.5 to z=0. +------- + +**LightConeOriginsFile** lightcone_origins.txt + +Only when `LIGHTCONE_MULTIPLE_ORIGINS` is activated, this option is +required. One can then supply a list of coordinate triples, each of +which is a possible origin of a lightcone as defined above. The +invividual lightcone defintions from above then require one additional +number for each lightcone at the end. This number is an index into the +listed origins, and thus selected the corresponding origin. ------- diff --git a/documentation/09_special_modules.md b/documentation/09_special_modules.md index f3797fc8d478fb5091ceafb12e3f27be586a0d53..5e750ca3e0c6ef9b74cf561e7eb926984346964e 100644 --- a/documentation/09_special_modules.md +++ b/documentation/09_special_modules.md @@ -108,7 +108,8 @@ timestepping procedure. This option is activated with the `LIGHTCONE` switch, and needs to be active while the simulation is run. In this case, additional particle -outputs are created, which have a structure similar to snapshot files. +outputs are created, which have a structure similar to snapshot files, +except that the velocities are stored directly as peculiar velocities. While it is possible also here to use the file format 1 or 2, it is highly recommended to not bother with this but rather use HDF5 diff --git a/src/cooling_sfr/cooling.h b/src/cooling_sfr/cooling.h index 40eb0568e158f2aa0f0d4aa30025471c8f5c775d..df992b4105363ad370f7bb19452a1d0594558261 100644 --- a/src/cooling_sfr/cooling.h +++ b/src/cooling_sfr/cooling.h @@ -12,6 +12,8 @@ #ifndef COOLING_H #define COOLING_H +#include "gadgetconfig.h" + #ifdef COOLING #include "../data/simparticles.h" diff --git a/src/data/allvars.cc b/src/data/allvars.cc index 684a12f7dae3d429e1360df7876d42bca47c786c..777c6276e9620f9dba42ab983be6fafad666ac76 100644 --- a/src/data/allvars.cc +++ b/src/data/allvars.cc @@ -9,9 +9,12 @@ * \brief instance and code for an object dealing with global parameters and variables */ +// clang-format off #include "gadgetconfig.h" +// clang-format on #include "../data/allvars.h" + #include "../data/constants.h" #include "../data/dtypes.h" #include "../data/macros.h" @@ -122,22 +125,22 @@ void global_data_all_processes::register_parameters(void) for(int i = 0; i < NSOFTCLASSES; i++) { - char buf_l[100]; - sprintf(buf_l, "SofteningComovingClass%d", i); + char buf_l[MAXLEN_PARAM_TAG]; + snprintf(buf_l, MAXLEN_PARAM_TAG, "SofteningComovingClass%d", i); add_param(buf_l, &SofteningComoving[i], PARAM_DOUBLE, PARAM_FIXED); } for(int i = 0; i < NSOFTCLASSES; i++) { - char buf_l[100]; - sprintf(buf_l, "SofteningMaxPhysClass%d", i); + char buf_l[MAXLEN_PARAM_TAG]; + snprintf(buf_l, MAXLEN_PARAM_TAG, "SofteningMaxPhysClass%d", i); add_param(buf_l, &SofteningMaxPhys[i], PARAM_DOUBLE, PARAM_FIXED); } for(int i = 0; i < NTYPES; i++) { - char buf_l[100]; - sprintf(buf_l, "SofteningClassOfPartType%d", i); + char buf_l[MAXLEN_PARAM_TAG]; + snprintf(buf_l, MAXLEN_PARAM_TAG, "SofteningClassOfPartType%d", i); add_param(buf_l, &SofteningClassOfPartType[i], PARAM_INT, PARAM_FIXED); } @@ -161,6 +164,9 @@ void global_data_all_processes::register_parameters(void) #ifdef LIGHTCONE_PARTICLES add_param("LightConeDefinitionFile", LightConeDefinitionFile, PARAM_STRING, PARAM_CHANGEABLE); +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + add_param("LightConeOriginsFile", LightConeOriginsFile, PARAM_STRING, PARAM_CHANGEABLE); +#endif #endif #ifdef LIGHTCONE_MASSMAPS diff --git a/src/data/allvars.h b/src/data/allvars.h index d031178cfafc7db80c01a85753894e9d240e5891..b596c21c17407608496b93ec19e82e5b7697986d 100644 --- a/src/data/allvars.h +++ b/src/data/allvars.h @@ -97,7 +97,6 @@ struct global_data_all_processes : public parameters double InitGasU; /**< the same, but converted to thermal energy per unit mass */ double MinEgySpec; /**< the minimum allowed temperature expressed as energy per unit mass */ - /* some force counters */ long long TotNumOfForces; /**< counts total number of force computations */ @@ -137,6 +136,7 @@ struct global_data_all_processes : public parameters double Omega0; /**< matter density in units of the critical density (at z=0) */ double OmegaLambda; /**< vaccum energy density relative to crictical density (at z=0) */ double OmegaBaryon; /**< baryon density in units of the critical density (at z=0) */ + double OmegaCurvature; /**< curvature relative to crictical density (at z=0) */ #ifdef RADIATION double OmegaR; /**< radiation density in units of the critical density (at z=0) */ #ifdef NGENIC @@ -298,6 +298,10 @@ struct global_data_all_processes : public parameters #ifdef LIGHTCONE_PARTICLES char LightConeDefinitionFile[MAXLEN_PATH]; int LightconeFileCount; + +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + char LightConeOriginsFile[MAXLEN_PATH]; +#endif #endif #ifdef LIGHTCONE_MASSMAPS diff --git a/src/data/constants.h b/src/data/constants.h index a66aea27b283b0722eb3d8e221886ea174023cb8..6058906ce20e5e0d3c120c14abc29ae651b1acf2 100644 --- a/src/data/constants.h +++ b/src/data/constants.h @@ -282,6 +282,14 @@ #error "EXTERNALGRAVITY_STATICHQ only works when EXTERNALGRAVITY is activated" #endif +#if defined(LIGHTCONE_MULTIPLE_ORIGINS) && defined(LIGHTCONE_PARTICLES_GROUPS) +#error "Presently, the option LIGHTCONE_MULTIPLE_ORIGINS cannot be used yet together with LIGHTCONE_PARTICLES_GROUPS" +#endif + +#if defined(LIGHTCONE_MULTIPLE_ORIGINS) && defined(LIGHTCONE_MASSMAPS) +#error "Presently, the option LIGHTCONE_MULTIPLE_ORIGINS cannot be used yet together with LIGHTCONE_MASSMAPS" +#endif + #ifndef ASMTH /** ASMTH gives the scale of the short-range/long-range force split in units of FFT-mesh cells */ #define ASMTH 1.25 diff --git a/src/data/dtypes.h b/src/data/dtypes.h index 1d293b6cc07c9016760d654423a5b89a4f28ff2b..8d58f3ad2b667337484f9d0e8c97621fcf35d759 100644 --- a/src/data/dtypes.h +++ b/src/data/dtypes.h @@ -12,7 +12,10 @@ #ifndef DTYPES_H #define DTYPES_H +#include "gadgetconfig.h" + #include <stdint.h> +#include <atomic> #include <cstddef> #ifdef EXPLICIT_VECTORIZATION #include "../vectorclass/vectorclass.h" @@ -354,6 +357,35 @@ struct thread_data int *Exportflag; }; +template <typename T> +struct copyable_atomic : std::atomic<T> +{ + using std::atomic<T>::atomic; + + copyable_atomic(const copyable_atomic &ca) noexcept : std::atomic<T>(ca.load()) {} + + using std::atomic<T>::operator=; + + copyable_atomic &operator=(const copyable_atomic &other) noexcept + { + this->store(other.load()); + return *this; + } +}; + +struct copyable_atomic_flag : std::atomic_flag +{ + using std::atomic_flag::atomic_flag; + + copyable_atomic_flag(const copyable_atomic_flag &ca) noexcept { this->clear(); } + + copyable_atomic_flag &operator=(const copyable_atomic_flag &other) noexcept + { + this->clear(); + return *this; + } +}; + #ifdef LONG_X_BITS #define LONG_X (1 << (LONG_X_BITS)) #define MAX_LONG_X_BITS LONG_X_BITS diff --git a/src/data/idstorage.h b/src/data/idstorage.h index 2f1964575e8b8bdb0445ce77ca4e33742d25e680..dbb13083539db96d95272db93c38062dd544c9df 100644 --- a/src/data/idstorage.h +++ b/src/data/idstorage.h @@ -12,16 +12,20 @@ #ifndef IDSTORAGE_H #define IDSTORAGE_H +#include "gadgetconfig.h" + #include <climits> #if !defined(IDS_48BIT) #define ID_MSB ((MyIDType)(~((MyIDType)(~((MyIDType)0)) >> ((MyIDType)1)))) #define ID_MSK ((MyIDType)(((MyIDType)(~((MyIDType)0)) >> ((MyIDType)1)))) #define HALONR_MAX ((MyIDType)(((MyIDType)(~((MyIDType)0)) >> ((MyIDType)1)))) +#define ID_MAX ID_MSK #else #define ID_MSB ((unsigned short)(~((unsigned short)(~((unsigned short)0)) >> ((unsigned short)1)))) #define ID_MSK ((unsigned short)(((unsigned short)(~((unsigned short)0)) >> ((unsigned short)1)))) #define HALONR_MAX ((MyIDType)(((MyIDType)(~((MyIDType)0)) >> ((MyIDType)17)))) +#define ID_MAX (((long long)ID_MSK) << 32LL) #endif /* used to store a subhalo len in an approximate (quite accurate) way in just two bytes */ diff --git a/src/data/intposconvert.h b/src/data/intposconvert.h index aefed36c911afba031c5a5e4214d268095fe4247..9e62818a5baee2ee70e80d9b7351a0663757f07d 100644 --- a/src/data/intposconvert.h +++ b/src/data/intposconvert.h @@ -12,11 +12,13 @@ #ifndef CONVERT_H #define CONVERT_H -#include "allvars.h" -#include "dtypes.h" +#include "gadgetconfig.h" #include <cmath> +#include "../data/allvars.h" +#include "../data/dtypes.h" + #define MSB ((MyIntPosType)(~((MyIntPosType)(~((MyIntPosType)0)) >> ((MyIntPosType)1)))) #if defined(LONG_X_BITS) diff --git a/src/data/lcparticles.h b/src/data/lcparticles.h index bcbebba516346fde47703f9114745721cb5765ca..2a73706073bf9e1d734460b550ee254de5b40049 100644 --- a/src/data/lcparticles.h +++ b/src/data/lcparticles.h @@ -14,10 +14,10 @@ #if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES) -#include <math.h> - #include "gadgetconfig.h" +#include <math.h> + #include "../data/constants.h" #include "../data/dtypes.h" #include "../data/intposconvert.h" diff --git a/src/data/lightcone_massmap_data.h b/src/data/lightcone_massmap_data.h index 75d6256094f3e31866cad9ececa63f9b224d76e2..3d91f06dd709bc78759e43e681ba0c9d4dbe5185 100644 --- a/src/data/lightcone_massmap_data.h +++ b/src/data/lightcone_massmap_data.h @@ -14,6 +14,8 @@ #if defined(LIGHTCONE) && defined(LIGHTCONE_MASSMAPS) +#include "gadgetconfig.h" + #include "../data/constants.h" #include "../data/dtypes.h" #include "../data/macros.h" diff --git a/src/data/lightcone_particle_data.h b/src/data/lightcone_particle_data.h index 7e3260dd2dadbf67f275fbca70956c0f5acf6d25..bda7626900fdc6ea8e04c047fbf4cd9b38a361ee 100644 --- a/src/data/lightcone_particle_data.h +++ b/src/data/lightcone_particle_data.h @@ -63,6 +63,10 @@ struct lightcone_particle_data approxlen PrevSizeOfSubhalo; // 2-byte #endif +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + unsigned char OriginIndex; +#endif + #ifdef LIGHTCONE_IMAGE_COMP_HSML_VELDISP int NumNgb; MyFloat Hsml; diff --git a/src/data/macros.h b/src/data/macros.h index 20555bca1a7a67572956647490ebced1ab3e8eba..3e542ebf3e00d59b658179439d2e8536cc3d362c 100644 --- a/src/data/macros.h +++ b/src/data/macros.h @@ -12,36 +12,69 @@ #ifndef MACROS_H #define MACROS_H -#ifdef MPI_HYPERCUBE_ALLGATHERV -#define MPI_Allgatherv MPI_hypercube_Allgatherv -#endif +#include "gadgetconfig.h" + +#include <assert.h> +#include <mpi.h> +#include <stdio.h> +#include <stdlib.h> + +#include "../system/system.h" -#define Terminate(...) \ - { \ - { \ - char termbuf1__[8000], termbuf2__[8000]; \ - int thistask; \ - MPI_Comm_rank(MPI_COMM_WORLD, &thistask); \ - sprintf(termbuf1__, "Code termination on task=%d, function %s(), file %s, line %d", thistask, __FUNCTION__, __FILE__, \ - __LINE__); \ - sprintf(termbuf2__, __VA_ARGS__); \ - printf("%s: %s\n", termbuf1__, termbuf2__); \ - fflush(stdout); \ - MPI_Abort(MPI_COMM_WORLD, 1); \ - } \ - exit(0); \ - } -#define warn(...) \ - { \ - char termbuf1__[8000], termbuf2__[8000]; \ - int thistask; \ - MPI_Comm_rank(MPI_COMM_WORLD, &thistask); \ - sprintf(termbuf1__, "Code warning on task=%d, function %s(), file %s, line %d", thistask, __FUNCTION__, __FILE__, __LINE__); \ - sprintf(termbuf2__, __VA_ARGS__); \ - printf("%s: %s\n", termbuf1__, termbuf2__); \ - myflush(stdout); \ - FILE *fd__ = fopen("WARNINGS", "w"); \ - fclose(fd__); \ - } +#define TERMINATE_STATUS EXIT_FAILURE +#define TERMINATE_MSG "TERMINATE: ******!!!!!****** Code termination on task=%d, function %s(), file %s, line %d: " +#define Terminate(...) \ + do \ + { \ + int thistask; \ + MPI_Comm_rank(MPI_COMM_WORLD, &thistask); \ + printf(TERMINATE_MSG, thistask, __func__, __FILE__, __LINE__); \ + printf(__VA_ARGS__); \ + printf("\n"); \ + fflush(stdout); \ + MPI_Abort(MPI_COMM_WORLD, TERMINATE_STATUS); \ + exit(TERMINATE_STATUS); \ + } \ + while(0) + +#define WARNINGS_FILE_NAME "WARNINGS" +#define WARN_MSG "WARNING: Code warning on task=%d, function %s(), file %s, line %d: " +#define warn(...) \ + do \ + { \ + int thistask; \ + MPI_Comm_rank(MPI_COMM_WORLD, &thistask); \ + printf(WARN_MSG, thistask, __func__, __FILE__, __LINE__); \ + printf(__VA_ARGS__); \ + printf("\n"); \ + myflush(stdout); \ + FILE *const warn_fd = fopen(WARNINGS_FILE_NAME, "a"); \ + fprintf(warn_fd, WARN_MSG, thistask, __func__, __FILE__, __LINE__); \ + fprintf(warn_fd, __VA_ARGS__); \ + fprintf(warn_fd, "\n"); \ + fclose(warn_fd); \ + } \ + while(0) + +/* define an "assert" macro which outputs MPI rank (we do NOT want to call + * MPI_Abort, because then the assertion failure isn't caught in the debugger) */ +#define ASSERT_MSG "Assertion failure!\n\ttask=%d, function %s(), file %s, line %d:\n\t%s\n" +#ifdef NDEBUG +#define myassert(cond) +#else +#define myassert(cond) \ + do \ + { \ + if(!(cond)) \ + { \ + int thistask; \ + MPI_Comm_rank(MPI_COMM_WORLD, &thistask); \ + printf(ASSERT_MSG, thistask, __func__, __FILE__, __LINE__, #cond); \ + myflush(stdout); \ + assert(0); \ + } \ + } \ + while(0) +#endif #endif diff --git a/src/data/mmparticles.h b/src/data/mmparticles.h index 8560421ad864dfe555457166bed8e9501ce9bf51..1036620680d7080111462473322968b443e9c009 100644 --- a/src/data/mmparticles.h +++ b/src/data/mmparticles.h @@ -16,6 +16,8 @@ #if defined(LIGHTCONE) && defined(LIGHTCONE_MASSMAPS) +#include "gadgetconfig.h" + #include <math.h> #include "../data/constants.h" @@ -27,7 +29,6 @@ #include "../mpi_utils/setcomm.h" #include "../system/system.h" #include "../time_integration/timestep.h" -#include "gadgetconfig.h" class mmparticles : public setcomm { diff --git a/src/data/mymalloc.cc b/src/data/mymalloc.cc index bf29495ec523d5f18ccbe655fcf7e1b3efa3b978..d184379b9f0113fc102610af410c08541aeb573e 100644 --- a/src/data/mymalloc.cc +++ b/src/data/mymalloc.cc @@ -5,12 +5,16 @@ *******************************************************************************/ #include "gadgetconfig.h" +#include <fcntl.h> #include <math.h> #include <mpi.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> #include "../data/allvars.h" #include "../data/dtypes.h" @@ -63,8 +67,9 @@ void memory::mymalloc_init(int maxmemsize, enum restart_options restartflag) ParentFileName = (char *)malloc(MAXBLOCKS * MAXCHARS * sizeof(char)); FileName = (char *)malloc(MAXBLOCKS * MAXCHARS * sizeof(char)); LineNumber = (int *)malloc(MAXBLOCKS * sizeof(int)); - HighMarkTabBuf = (char *)malloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10)); - HighMarkTabBufWithoutGeneric = (char *)malloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10)); + highmark_bufsize = (100 + 4 * MAXCHARS) * (MAXBLOCKS + 10); + HighMarkTabBuf = (char *)malloc(highmark_bufsize); + HighMarkTabBufWithoutGeneric = (char *)malloc(highmark_bufsize); memset(VarName, 0, MAXBLOCKS * MAXCHARS); memset(FunctionName, 0, MAXBLOCKS * MAXCHARS); @@ -87,7 +92,7 @@ void memory::mymalloc_init(int maxmemsize, enum restart_options restartflag) MPI_Info_create(&win_info); MPI_Info_set(win_info, "alloc_shared_noncontig", "true"); - if(MPI_Win_allocate_shared(n, 1, win_info, Shmem.SharedMemComm, &Base, &Shmem.SharedMemWin) != MPI_SUCCESS) + if(myMPI_Win_allocate_shared(n, 1, win_info, Shmem.SharedMemComm, &Base, &Shmem.SharedMemWin) != MPI_SUCCESS) Terminate("Failed to allocate memory for `Base' (%d Mbytes).\n", All.MaxMemSize); /* we now make sure that the allocated local buffer is really aligned, not all MPI libraries guarantee this */ @@ -124,9 +129,9 @@ void memory::mymalloc_init(int maxmemsize, enum restart_options restartflag) MPI_Bcast(All.OutputDir, sizeof(All.OutputDir), MPI_BYTE, 0, MPI_COMM_WORLD); if(Shmem.GhostRank == 0) - sprintf(buf, "%s%s", All.OutputDir, "memory.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "memory.txt"); else - sprintf(buf, "%s%s", All.OutputDir, "memory_ghostranks.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "memory_ghostranks.txt"); if(!(FdMemory = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); @@ -156,7 +161,7 @@ void memory::mymalloc_init(int maxmemsize, enum restart_options restartflag) { MPI_Aint size; int disp_unit; - MPI_Win_shared_query(Shmem.SharedMemWin, i, &size, &disp_unit, &Shmem.SharedMemBaseAddr[i]); + myMPI_Win_shared_query(Shmem.SharedMemWin, i, &size, &disp_unit, &Shmem.SharedMemBaseAddr[i]); } // now propagte the alignment correction also to the base addresses that all the other processes see @@ -170,6 +175,93 @@ void memory::mymalloc_init(int maxmemsize, enum restart_options restartflag) Mem.myfree(off_list); } +int memory::myMPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, void *baseptr, MPI_Win *win) +{ +#ifndef ALLOCATE_SHARED_MEMORY_VIA_POSIX + return MPI_Win_allocate_shared(size, disp_unit, info, comm, baseptr, win); +#else + + char shmpath[NAME_MAX]; + + /* Base offsets of the other MPI ranks in our shared memory mapping */ + Shmem.SharedMemBaseAddrRaw = (char **)malloc(Shmem.Island_NTask * sizeof(char *)); + + long long *size_list = (long long *)malloc(Shmem.Island_NTask * sizeof(long long)); + + long long loc_bytes = size; + MPI_Allgather(&loc_bytes, 1, MPI_LONG_LONG, size_list, 1, MPI_LONG_LONG, comm); + + long long tot_bytes = 0; + for(int i = 0; i < Shmem.Island_NTask; i++) + tot_bytes += size_list[i]; + + if(Shmem.Island_ThisTask == 0) + { + snprintf(shmpath, NAME_MAX, "/G4-%lld.dat", (long long)getpid()); + + int fd = shm_open(shmpath, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR); + if(fd == -1) + Terminate("shm_open failed in creation"); + + if(ftruncate(fd, tot_bytes) == -1) + Terminate("ftruncate failed"); + + /* Map the object into the caller's address space. */ + + void *buf = mmap(NULL, tot_bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if(buf == MAP_FAILED) + Terminate("mmap failed"); + + Shmem.SharedMemBaseAddrRaw[0] = (char *)buf; + } + + MPI_Bcast(shmpath, NAME_MAX, MPI_BYTE, 0, comm); + + if(Shmem.Island_ThisTask != 0) + { + int fd = shm_open(shmpath, O_RDWR, 0); + if(fd == -1) + Terminate("shm open failed in access"); + + void *buf = mmap(NULL, tot_bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if(buf == MAP_FAILED) + Terminate("mmap failed"); + + Shmem.SharedMemBaseAddrRaw[0] = (char *)buf; + } + + for(int i = 1; i < Shmem.Island_NTask; i++) + Shmem.SharedMemBaseAddrRaw[i] = (char *)Shmem.SharedMemBaseAddrRaw[i - 1] + size_list[i - 1]; + + char **p = (char **)baseptr; + + *p = Shmem.SharedMemBaseAddrRaw[Shmem.Island_ThisTask]; + + free(size_list); + + MPI_Barrier(comm); + + if(Shmem.Island_ThisTask == 0) + shm_unlink(shmpath); + + return MPI_SUCCESS; +#endif +} + +int memory::myMPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr) +{ +#ifndef ALLOCATE_SHARED_MEMORY_VIA_POSIX + return MPI_Win_shared_query(win, rank, size, disp_unit, baseptr); +#else + + char **p = (char **)baseptr; + + *p = Shmem.SharedMemBaseAddrRaw[rank]; + + return MPI_SUCCESS; +#endif +} + void memory::report_memory_usage(int rank, char *tabbuf) { int thistask; @@ -177,12 +269,14 @@ void memory::report_memory_usage(int rank, char *tabbuf) if(thistask == rank) { - char *buf = (char *)mymalloc("buf", (100 + 4 * MAXCHARS) * (Nblocks + 10)); - int cc = 0; - cc += sprintf(buf + cc, "\nMEMORY: Largest Allocation = %g Mbyte | Largest Allocation Without Generic = %g Mbyte\n\n", - OldGlobHighMarkMB, OldGlobHighMarkMBWithoutGeneric); - - cc += sprintf(buf + cc, "%s", tabbuf); + int bufsize = (100 + 4 * MAXCHARS) * (Nblocks + 10); + char *buf = (char *)mymalloc("buf", bufsize); + int cc = 0; + cc += snprintf(buf + cc, bufsize - cc, + "\nMEMORY: Largest Allocation = %g Mbyte | Largest Allocation Without Generic = %g Mbyte\n\n", + OldGlobHighMarkMB, OldGlobHighMarkMBWithoutGeneric); + + cc += snprintf(buf + cc, bufsize - cc, "%s", tabbuf); if(thistask == 0) { if(RestartFlag == RST_BEGIN || RestartFlag == RST_RESUME || RestartFlag == RST_STARTFROMSNAP) @@ -263,8 +357,9 @@ void memory::report_detailed_memory_usage_of_largest_task(void) */ void memory::dump_memory_table(void) { - char *buf = (char *)malloc(200 * (Nblocks + 10)); - dump_memory_table_buffer(buf); + int bufsize = 200 * (Nblocks + 10); + char *buf = (char *)malloc(bufsize); + dump_memory_table_buffer(buf, bufsize); printf("%s", buf); free(buf); } @@ -274,26 +369,26 @@ void memory::dump_memory_table(void) * \param p output buffer * \return the number of characters written to p */ -int memory::dump_memory_table_buffer(char *p) +int memory::dump_memory_table_buffer(char *p, int bufsize) { int cc = 0; size_t totBlocksize = 0; int thistask; MPI_Comm_rank(Communicator, &thistask); - cc += - sprintf(p + cc, "-------------------------- Allocated Memory Blocks---- ( Step %8d )------------------\n", All.NumCurrentTiStep); - cc += sprintf(p + cc, "Task Nr F Variable MBytes Cumulative Function|File|Linenumber\n"); - cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n"); + cc += snprintf(p + cc, bufsize - cc, "-------------------------- Allocated Memory Blocks---- ( Step %8d )------------------\n", + All.NumCurrentTiStep); + cc += snprintf(p + cc, bufsize - cc, "Task Nr F Variable MBytes Cumulative Function|File|Linenumber\n"); + cc += snprintf(p + cc, bufsize - cc, "------------------------------------------------------------------------------------------\n"); for(int i = 0; i < Nblocks; i++) { totBlocksize += BlockSize[i]; - cc += sprintf(p + cc, "%4d %5d %d %40s %10.4f %10.4f %s%s()|%s|%d\n", thistask, i, MovableFlag[i], VarName + i * MAXCHARS, - BlockSize[i] * TO_MBYTE_FAC, totBlocksize * TO_MBYTE_FAC, ParentFileName + i * MAXCHARS, - FunctionName + i * MAXCHARS, FileName + i * MAXCHARS, LineNumber[i]); + cc += snprintf(p + cc, bufsize - cc, "%4d %5d %d %40s %10.4f %10.4f %s%s()|%s|%d\n", thistask, i, MovableFlag[i], + VarName + i * MAXCHARS, BlockSize[i] * TO_MBYTE_FAC, totBlocksize * TO_MBYTE_FAC, ParentFileName + i * MAXCHARS, + FunctionName + i * MAXCHARS, FileName + i * MAXCHARS, LineNumber[i]); } - cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n"); + cc += snprintf(p + cc, bufsize - cc, "------------------------------------------------------------------------------------------\n"); return cc; } @@ -356,13 +451,13 @@ void *memory::mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n if(AllocatedBytes - AllocatedBytesGeneric > HighMarkBytesWithoutGeneric) { HighMarkBytesWithoutGeneric = AllocatedBytes - AllocatedBytesGeneric; - dump_memory_table_buffer(HighMarkTabBufWithoutGeneric); + dump_memory_table_buffer(HighMarkTabBufWithoutGeneric, highmark_bufsize); } if(AllocatedBytes > HighMarkBytes) { HighMarkBytes = AllocatedBytes; - dump_memory_table_buffer(HighMarkTabBuf); + dump_memory_table_buffer(HighMarkTabBuf, highmark_bufsize); } if(clear_flag) @@ -584,7 +679,7 @@ void *memory::myrealloc_movable_fullinfo(void *p, size_t n, const char *func, co if(AllocatedBytes > HighMarkBytes) { HighMarkBytes = AllocatedBytes; - dump_memory_table_buffer(HighMarkTabBuf); + dump_memory_table_buffer(HighMarkTabBuf, highmark_bufsize); } return Table[nr]; diff --git a/src/data/mymalloc.h b/src/data/mymalloc.h index 86a05bdae2f76370af92e8b2e8daf3c8c8fd36ba..2d84ca0a1706fb695e8efa7ceedf838bc9121f6c 100644 --- a/src/data/mymalloc.h +++ b/src/data/mymalloc.h @@ -12,6 +12,8 @@ #ifndef MYMALLOC_H #define MYMALLOC_H +#include "gadgetconfig.h" + #include <stdio.h> #define CACHELINESIZE 64 @@ -65,6 +67,9 @@ class memory : public setcomm void check_maxmemsize_setting(int maxmemsize); + int myMPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, void *baseptr, MPI_Win *win); + int myMPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr); + inline double getAllocatedBytesInMB(void) { return AllocatedBytes * TO_MBYTE_FAC; } template <typename T> @@ -130,7 +135,9 @@ class memory : public setcomm has occurred on this task */ enum restart_options RestartFlag; - int dump_memory_table_buffer(char *p); + int highmark_bufsize; + + int dump_memory_table_buffer(char *p, int bufsize); void report_memory_usage(int rank, char *tabbuf); }; diff --git a/src/data/particle_data.h b/src/data/particle_data.h index 6cf01efef46a15e66fc03edf24a2784b2aa37512..f803903472fa6b661a09a048b68fd35cc1542953 100644 --- a/src/data/particle_data.h +++ b/src/data/particle_data.h @@ -33,23 +33,12 @@ */ struct particle_data { - // we do this ugly trick of using memcpy for our own copy constructor and assignment operator - // because the atomic_flag in particle_data has an implicitly deleted copy operator... so that the implicit functions - // for this are unavailable. But we know what we are doing here, and surrounding this with an ugly hack - // is the easiest way at the moment to work around this in our case unnecessary protection + // Note that because the atomic_flag and atomic data-types in particle_data have an implicitly deleted copy operator, + // we are using them encapsulated in a structure defined in dtypes.h that implements these copy and assignment operators explicitly. + // This is fine because the code logic guarantees that there is no concurrent access when such copies of particle_data happen. particle_data() {} - // declare our own copy constructor - particle_data(particle_data& other) { memcpy(static_cast<void*>(this), static_cast<void*>(&other), sizeof(particle_data)); } - - // declare our own assignment operator - particle_data& operator=(particle_data& other) - { - memcpy(static_cast<void*>(this), static_cast<void*>(&other), sizeof(particle_data)); - return *this; - } - MyIntPosType IntPos[3]; /**< particle position at its current time, stored as an integer type */ MyFloat Vel[3]; /**< particle velocity at its current time */ vector<MyFloat> GravAccel; /**< particle acceleration due to gravity */ @@ -57,9 +46,9 @@ struct particle_data MyFloat GravPM[3]; /**< particle acceleration due to long-range PM gravity force */ #endif - std::atomic<integertime> Ti_Current; /**< current time on integer timeline */ - float OldAcc; /**< magnitude of old gravitational force. Used in relative opening criterion */ - int GravCost; /**< weight factors used for balancing the work-load */ + copyable_atomic<integertime> Ti_Current; /**< current time on integer timeline */ + float OldAcc; /**< magnitude of old gravitational force. Used in relative opening criterion */ + int GravCost; /**< weight factors used for balancing the work-load */ #ifndef LEAN private: @@ -85,7 +74,7 @@ struct particle_data #endif #ifndef LEAN - std::atomic_flag access; + copyable_atomic_flag access; #endif #ifdef REARRANGE_OPTION diff --git a/src/data/simparticles.h b/src/data/simparticles.h index dbf89705d5b5c2b33b0e387c63c779ccb7e09a65..66e8aca42101f036ae117061296342f8bd7e1071 100644 --- a/src/data/simparticles.h +++ b/src/data/simparticles.h @@ -12,6 +12,8 @@ #ifndef SIMPART_H #define SIMPART_H +#include "gadgetconfig.h" + #include <math.h> #include "../data/allvars.h" @@ -321,8 +323,8 @@ class simparticles : public intposconvert, public setcomm void dump_particles(void) { FILE *fd; - char buffer[200]; - sprintf(buffer, "particles_%d.dat", ThisTask); + char buffer[MAXLEN_PATH]; + snprintf(buffer, MAXLEN_PATH, "particles_%d.dat", ThisTask); if((fd = fopen(buffer, "w"))) { fwrite(&NumPart, 1, sizeof(int), fd); diff --git a/src/data/sph_particle_data.h b/src/data/sph_particle_data.h index e1c6bd8c24f440c84e9dfa602efce17a52dab17f..32785c58340a633f0589e88d7444aa4b450267ab 100644 --- a/src/data/sph_particle_data.h +++ b/src/data/sph_particle_data.h @@ -12,6 +12,8 @@ #ifndef SPHPARTDATA_H #define SPHPARTDATA_H +#include "gadgetconfig.h" + #include "../data/constants.h" #include "../data/dtypes.h" #include "../data/intposconvert.h" diff --git a/src/data/symtensor_indices.h b/src/data/symtensor_indices.h index ce536230bfc27f6df1f28e7a01da39678c4fb39a..dc38aaacc338c100ad1099a28dd483f056c89812 100644 --- a/src/data/symtensor_indices.h +++ b/src/data/symtensor_indices.h @@ -12,6 +12,11 @@ #ifndef SRC_DATA_SYMTENSOR_INDICES_H_ #define SRC_DATA_SYMTENSOR_INDICES_H_ +/* 1-vector*/ +#define vX 0 +#define vY 1 +#define vZ 2 + /* 2-tensor element mapping, symmetric 3x3 */ #define qXX 0 #define qXY 1 diff --git a/src/data/symtensors.h b/src/data/symtensors.h index a64609ba283a848814d2f2acf55f39ec25774f56..7b864d88234cdd32fb30151806fcf74672b6a51b 100644 --- a/src/data/symtensors.h +++ b/src/data/symtensors.h @@ -12,6 +12,8 @@ #ifndef SYMTENSORS_H #define SYMTENSORS_H +#include "gadgetconfig.h" + #include "symtensor_indices.h" void symtensor_test(void); diff --git a/src/domain/domain.cc b/src/domain/domain.cc index 085f655cc3d4d399a03743a5e6abaa80292eb992..0192d75479f290cd06df7b7c7ead24b3eb21870b 100644 --- a/src/domain/domain.cc +++ b/src/domain/domain.cc @@ -25,6 +25,7 @@ #include "gadgetconfig.h" #include <mpi.h> + #include <cmath> #include <cstdio> #include <cstdlib> @@ -237,8 +238,8 @@ void domain<simparticles>::domain_find_total_cost(void) if(Tp->P[i].getType() == 0) { - if(bin >= Tp->P[i].getTimeBinHydro()) - HydroCostPerListedTimeBin[n] += 1.0; + if(bin >= Tp->P[i].getTimeBinHydro()) + HydroCostPerListedTimeBin[n] += 1.0; } } } @@ -471,9 +472,9 @@ void domain<simparticles>::domain_report_balance(void) } char buf[MAXLEN_PATH]; - sprintf(buf, "\nDOMAIN BALANCE, Sync-Point %d, Time: %g\n", All.NumCurrentTiStep, All.Time); + snprintf(buf, MAXLEN_PATH, "\nDOMAIN BALANCE, Sync-Point %d, Time: %g\n", All.NumCurrentTiStep, All.Time); domain_printf(buf); - sprintf(buf, "Timebins: Gravity Hydro cumulative grav-balance hydro-balance\n"); + snprintf(buf, MAXLEN_PATH, "Timebins: Gravity Hydro cumulative grav-balance hydro-balance\n"); domain_printf(buf); long long tot = 0, tot_sph = 0; @@ -490,10 +491,10 @@ void domain<simparticles>::domain_report_balance(void) #endif { char buf[MAXLEN_PATH]; - sprintf(buf, "%c%cbin=%2d %10llu %10llu %10llu %6.3f |%6.3f %c %6.3f |%6.3f\n", - i == All.HighestActiveTimeBin ? '>' : ' ', i >= All.SmallestTimeBinWithDomainDecomposition ? '|' : ' ', i, - tot_count[i], tot_count_sph[i], tot_cumulative[i], bal_grav_bin[i], bal_grav_bin_rel[i], - domain_to_be_balanced[i] > 0 ? '*' : ' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]); + snprintf(buf, MAXLEN_PATH, "%c%cbin=%2d %10llu %10llu %10llu %6.3f |%6.3f %c %6.3f |%6.3f\n", + i == All.HighestActiveTimeBin ? '>' : ' ', i >= All.SmallestTimeBinWithDomainDecomposition ? '|' : ' ', i, + tot_count[i], tot_count_sph[i], tot_cumulative[i], bal_grav_bin[i], bal_grav_bin_rel[i], + domain_to_be_balanced[i] > 0 ? '*' : ' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]); domain_printf(buf); tot += tot_count[i]; @@ -501,15 +502,15 @@ void domain<simparticles>::domain_report_balance(void) } } - sprintf(buf, "-------------------------------------------------------------------------------------\n"); + snprintf(buf, MAXLEN_PATH, "-------------------------------------------------------------------------------------\n"); domain_printf(buf); - sprintf(buf, "BALANCE, LOAD: %6.3f %6.3f %6.3f WORK: %6.3f %6.3f\n", - max_dm / (tot - tot_sph + SMALLNUM) * NTask, max_sph / (tot_sph + SMALLNUM) * NTask, max_tot / (tot + SMALLNUM) * NTask, - max_gravcost / (tot_gravcost + SMALLNUM), max_hydrocost / (tot_hydrocost + SMALLNUM)); + snprintf(buf, MAXLEN_PATH, "BALANCE, LOAD: %6.3f %6.3f %6.3f WORK: %6.3f %6.3f\n", + max_dm / (tot - tot_sph + SMALLNUM) * NTask, max_sph / (tot_sph + SMALLNUM) * NTask, max_tot / (tot + SMALLNUM) * NTask, + max_gravcost / (tot_gravcost + SMALLNUM), max_hydrocost / (tot_hydrocost + SMALLNUM)); domain_printf(buf); - sprintf(buf, "-------------------------------------------------------------------------------------\n"); + snprintf(buf, MAXLEN_PATH, "-------------------------------------------------------------------------------------\n"); domain_printf(buf); - sprintf(buf, "\n"); + snprintf(buf, MAXLEN_PATH, "\n"); domain_printf(buf); myflush(Logs.FdDomain); } diff --git a/src/domain/domain.h b/src/domain/domain.h index 1ac2308a3876d37aa44baa876d82a6a7f7e2dda1..98c315f2aae07ca1baf347af1065a659c309d3e7 100644 --- a/src/domain/domain.h +++ b/src/domain/domain.h @@ -8,13 +8,14 @@ * * \brief declares the class used for the domain decomposition */ +#ifndef DOMAIN_H +#define DOMAIN_H + +#include "gadgetconfig.h" #ifndef ALLVARS_H #include "../data/allvars.h" #endif -#ifndef DOMAIN_H -#define DOMAIN_H - #include "../data/dtypes.h" #include "../mpi_utils/setcomm.h" diff --git a/src/domain/domain_balance.cc b/src/domain/domain_balance.cc index 3a14d4970c64c5be0fa619cdd9b0d8a15e605317..e91ea9056e3a7f98a08c2568c120207fb678dc25 100644 --- a/src/domain/domain_balance.cc +++ b/src/domain/domain_balance.cc @@ -90,8 +90,8 @@ void domain<partset>::domain_special_check(int mode, int ndomains) { if(ThisTask == 0) { - char buf[1000]; - sprintf(buf, "%s/domain_data_%d_step%d.txt", All.OutputDir, mode, All.NumCurrentTiStep); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/domain_data_%d_step%d.txt", All.OutputDir, mode, All.NumCurrentTiStep); FILE *fd = fopen(buf, "w"); fprintf(fd, "%d %d\n", ndomains, NumTimeBinsToBeBalanced); for(int n = 0; n < ndomains; n++) @@ -175,8 +175,8 @@ void domain<partset>::domain_combine_multipledomains(void) { if(ThisTask == 0) { - char buf[1000]; - sprintf(buf, "%s/domain_data_0_step%d.txt", All.OutputDir, All.NumCurrentTiStep); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/domain_data_0_step%d.txt", All.OutputDir, All.NumCurrentTiStep); FILE *fd = fopen(buf, "w"); fprintf(fd, "%d %d\n", NTopleaves, NumTimeBinsToBeBalanced); for(int n = 0; n < NTopleaves; n++) @@ -510,8 +510,8 @@ void domain<partset>::domain_combine_multipledomains(void) #ifdef DOMAIN_SPECIAL_CHECK if(All.NumCurrentTiStep == 0 || All.NumCurrentTiStep == 2 || All.NumCurrentTiStep == 4) { - char buf[1000]; - sprintf(buf, "%s/domain_data_1_step%d_task%d.txt", All.OutputDir, All.NumCurrentTiStep, ThisTask); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, "%s/domain_data_1_step%d_task%d.txt", All.OutputDir, All.NumCurrentTiStep, ThisTask); FILE *fd = fopen(buf, "w"); fprintf(fd, "%d %d\n", ndomains, NumTimeBinsToBeBalanced); for(int n = 0; n < ndomains; n++) @@ -525,8 +525,9 @@ void domain<partset>::domain_combine_multipledomains(void) } if(All.NumCurrentTiStep == 0 || All.NumCurrentTiStep == 2 || All.NumCurrentTiStep == 4) { - char buf[1000]; - sprintf(buf, "%s/domain_data_2_step%d_task%d.txt", All.OutputDir, All.NumCurrentTiStep, ThisTask); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/domain_data_2_step%d_task%d.txt", All.OutputDir, All.NumCurrentTiStep, + ThisTask); FILE *fd = fopen(buf, "w"); fprintf(fd, "%d %d\n", NTask, NumTimeBinsToBeBalanced); for(int n = 0; n < NTask; n++) diff --git a/src/domain/domain_exchange.cc b/src/domain/domain_exchange.cc index d0427a941bcd6f947aebb93c48c02adc71613e3b..0e45c08575cd836c7282b3c3447a120df249f311 100644 --- a/src/domain/domain_exchange.cc +++ b/src/domain/domain_exchange.cc @@ -117,7 +117,7 @@ void domain<partset>::domain_exchange(void) toGo[2 * i] = toGoDM[i]; toGo[2 * i + 1] = toGoSph[i]; } - MPI_Alltoall(toGo, 2, MPI_INT, toGet, 2, MPI_INT, Communicator); + myMPI_Alltoall(toGo, 2, MPI_INT, toGet, 2, MPI_INT, Communicator); for(int i = 0; i < NTask; ++i) { toGetDM[i] = toGet[2 * i]; @@ -582,7 +582,7 @@ void domain<partset>::particle_exchange_based_on_PS(MPI_Comm Communicator) if(rep == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); nimport = 0, nexport = 0; Recv_offset[0] = Send_offset[0] = 0; @@ -619,10 +619,10 @@ void domain<partset>::particle_exchange_based_on_PS(MPI_Comm Communicator) { if(Send_count[target] > 0 || Recv_count[target] > 0) { - MPI_Sendrecv(sphBuf + Send_offset[target], Send_count[target] * sizeof(sph_particle_data), MPI_BYTE, - target, TAG_SPHDATA, Tp->SphP + Recv_offset[target] + nstay, - Recv_count[target] * sizeof(sph_particle_data), MPI_BYTE, target, TAG_SPHDATA, Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(sphBuf + Send_offset[target], Send_count[target] * sizeof(sph_particle_data), MPI_BYTE, + target, TAG_SPHDATA, Tp->SphP + Recv_offset[target] + nstay, + Recv_count[target] * sizeof(sph_particle_data), MPI_BYTE, target, TAG_SPHDATA, + Communicator, MPI_STATUS_IGNORE); } } } @@ -681,7 +681,7 @@ void domain<partset>::particle_exchange_based_on_PS(MPI_Comm Communicator) if(rep == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); nimport = 0, nexport = 0; Recv_offset[0] = Send_offset[0] = 0; @@ -724,9 +724,9 @@ void domain<partset>::particle_exchange_based_on_PS(MPI_Comm Communicator) { if(Send_count[target] > 0 || Recv_count[target] > 0) { - MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(pdata), MPI_BYTE, target, TAG_PDATA, - Tp->P + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(pdata), MPI_BYTE, target, - TAG_PDATA, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(pdata), MPI_BYTE, target, + TAG_PDATA, Tp->P + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(pdata), MPI_BYTE, + target, TAG_PDATA, Communicator, MPI_STATUS_IGNORE); } } } @@ -785,7 +785,7 @@ void domain<partset>::particle_exchange_based_on_PS(MPI_Comm Communicator) if(rep == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); nimport = 0, nexport = 0; Recv_offset[0] = Send_offset[0] = 0; @@ -822,9 +822,9 @@ void domain<partset>::particle_exchange_based_on_PS(MPI_Comm Communicator) { if(Send_count[target] > 0 || Recv_count[target] > 0) { - MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target, - TAG_KEY, Tp->PS + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(subfind_data), - MPI_BYTE, target, TAG_KEY, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target, + TAG_KEY, Tp->PS + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(subfind_data), + MPI_BYTE, target, TAG_KEY, Communicator, MPI_STATUS_IGNORE); } } } diff --git a/src/fmm/fmm.cc b/src/fmm/fmm.cc index aae7787ed627c62b1236ad4663c000382c627d7d..6619b1c12647925901a4a08033779bd0008f412c 100644 --- a/src/fmm/fmm.cc +++ b/src/fmm/fmm.cc @@ -1350,7 +1350,7 @@ inline int fmm::fmm_evaluate_particle_node_opening_criterion(int no_sink, char t #endif } - if(nop_source->level == 0) // always open the root node (note: full node length does not fit in the integer type) + if(nop_source->level <= LEVEL_ALWAYS_OPEN) // always open the root node (note: full node length does not fit in the integer type) return NODE_OPEN; MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - nop_source->level); @@ -1708,7 +1708,7 @@ void fmm::gravity_fmm(int timebin) NumOnWorkStack = 0; AllocWorkStackBaseLow = std::max<int>(1.5 * (Tp->NumPart + NumPartImported), TREE_MIN_WORKSTACK_SIZE); AllocWorkStackBaseHigh = AllocWorkStackBaseLow + TREE_EXPECTED_CYCLES * TREE_MIN_WORKSTACK_SIZE; - MaxOnWorkStack = AllocWorkStackBaseLow; + MaxOnWorkStack = std::max<int>(AllocWorkStackBaseLow, 2 * 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT); FMM_WorkStack = (fmm_workstack_data *)Mem.mymalloc("FMM_WorkStack", AllocWorkStackBaseHigh * sizeof(fmm_workstack_data)); ResultIndexList = (int *)Mem.mymalloc("ResultIndexList", NumPartImported * sizeof(int)); @@ -1763,6 +1763,7 @@ void fmm::gravity_fmm(int timebin) // set a default size of the fetch stack equal to half the work stack (this may still be somewhat too large) MaxOnFetchStack = std::max<int>(0.1 * (Tp->NumPart + NumPartImported), TREE_MIN_WORKSTACK_SIZE); + MaxOnFetchStack = std::max<int>(MaxOnFetchStack, 2 * 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT); StackToFetch = (fetch_data *)Mem.mymalloc_movable(&StackToFetch, "StackToFetch", MaxOnFetchStack * sizeof(fetch_data)); // let's grab at most half the still available memory for imported points and nodes @@ -1813,6 +1814,7 @@ void fmm::gravity_fmm(int timebin) NewOnWorkStack = 0; // gives the new entries NumOnFetchStack = 0; MaxOnWorkStack = std::min<int>(AllocWorkStackBaseLow + max_ncycles * TREE_MIN_WORKSTACK_SIZE, AllocWorkStackBaseHigh); + MaxOnWorkStack = std::max<int>(MaxOnWorkStack, 2 * 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT); TIMER_START(CPU_TREEWALK); diff --git a/src/fmm/fmm.h b/src/fmm/fmm.h index 46db767f4992dce774f9347fea6902b259b82e3d..b943976011992d2296a438635c12667499bd3e3a 100644 --- a/src/fmm/fmm.h +++ b/src/fmm/fmm.h @@ -12,6 +12,8 @@ #ifndef FMM_H #define FMM_H +#include "gadgetconfig.h" + #ifdef FMM #include "../data/symtensors.h" diff --git a/src/fof/fof.cc b/src/fof/fof.cc index afe30597734395ed9e7ef03f4fc45a2d268186d3..29d03d1482193a5ff9d65de3b389647e5ecf62c6 100644 --- a/src/fof/fof.cc +++ b/src/fof/fof.cc @@ -14,6 +14,7 @@ #ifdef FOF #include <mpi.h> + #include <algorithm> #include <climits> #include <cmath> @@ -72,7 +73,7 @@ void fof<partset>::fof_fof(int num, const char *grpcat_basename, const char *grp Tp->DistanceOrigin = (double *)Mem.mymalloc("DistanceOrigin", Tp->NumPart * sizeof(double)); #endif - Tp->MinID = (MyIDStorage *)Mem.mymalloc("MinID", Tp->NumPart * sizeof(MyIDStorage)); // smallest particle ID withing FOF group + Tp->MinID = (MyIDStorage *)Mem.mymalloc("MinID", Tp->NumPart * sizeof(MyIDStorage)); // smallest particle ID within FOF group Tp->MinIDTask = (int *)Mem.mymalloc("MinIDTask", Tp->NumPart * sizeof(int)); // processor on which this ID is stored Tp->Head = (int *)Mem.mymalloc("Head", Tp->NumPart * sizeof(int)); // first particle in chaining list if local FOF group segment Tp->Next = (int *)Mem.mymalloc("Next", Tp->NumPart * sizeof(int)); // next particle in chaining list @@ -80,14 +81,34 @@ void fof<partset>::fof_fof(int num, const char *grpcat_basename, const char *grp Tp->Len = (int *)Mem.mymalloc("Len", Tp->NumPart * sizeof(int)); // length of local FOF group segment (note: 32 bit enough even for // huge groups because they are split across processors) + int *numpart_list = (int *)Mem.mymalloc("numpart_list", NTask * sizeof(int)); + + MPI_Allgather(&Tp->NumPart, 1, MPI_INT, numpart_list, 1, MPI_INT, Communicator); + + long long NumPartTot = 0; + for(int i = 0; i < NTask; i++) + NumPartTot += numpart_list[i]; + + mpi_printf("FOF: NumPartTot=%lld\n", NumPartTot); + + if(NumPartTot > (long long)ID_MAX) + Terminate("The chosen ID data type is not sufficiently big to store unique IDs for NumPartTot=%lld particles\n", NumPartTot); + + MyIDType id = 0; + for(int i = 0; i < ThisTask; i++) + id += numpart_list[i]; + + Mem.myfree(numpart_list); + /* initialize link-lists, each particle is in a group of its own initially */ for(int i = 0; i < Tp->NumPart; i++) { Tp->Head[i] = Tp->Tail[i] = i; Tp->Len[i] = 1; Tp->Next[i] = -1; - Tp->MinID[i] = Tp->P[i].ID; - Tp->MinIDTask[i] = ThisTask; + Tp->MinID[i].set(id++); // we use new IDs here instead of P[].ID to make sure that also for lightcone group finding MinID is + // unique for all box replicas + Tp->MinIDTask[i] = ThisTask; #if defined(LIGHTCONE_PARTICLES_GROUPS) Tp->DistanceOrigin[i] = fof_distance_to_origin(i); @@ -280,8 +301,8 @@ void fof<partset>::fof_fof(int num, const char *grpcat_basename, const char *grp { TIMER_STOP(CPU_FOF); - char catname[1000]; - sprintf(catname, "%s_subhalo_tab", grpcat_basename); + char catname[MAXLEN_PATH_EXTRA]; + snprintf(catname, MAXLEN_PATH_EXTRA, "%s_subhalo_tab", grpcat_basename); subfind_find_subhalos(num, catname, grpcat_dirbasename); @@ -297,8 +318,8 @@ void fof<partset>::fof_fof(int num, const char *grpcat_basename, const char *grp fof_io<partset> FoF_IO{this, this->Communicator, All.SnapFormat}; - char catname[1000]; - sprintf(catname, "%s_tab", grpcat_basename); + char catname[MAXLEN_PATH_EXTRA]; + snprintf(catname, MAXLEN_PATH_EXTRA, "%s_tab", grpcat_basename); FoF_IO.fof_subfind_save_groups(num, catname, grpcat_dirbasename); @@ -546,7 +567,12 @@ double fof<partset>::fof_get_comoving_linking_length(void) } sumup_large_ints(1, &ndm, &ndmtot, Communicator); MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, Communicator); - double rhodm = (All.Omega0 - All.OmegaBaryon) * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + double rhodm; + if(Tp->TotNumGas > 0) + rhodm = (All.Omega0 - All.OmegaBaryon) * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + else + rhodm = All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); return FOF_LINKLENGTH * pow(masstot / ndmtot / rhodm, 1.0 / 3); } @@ -625,7 +651,7 @@ void fof<partset>::fof_compile_catalogue(double inner_distance) Send_count[FOF_GList[i].MinIDTask]++; /* inform everybody about how much they have to receive */ - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); /* count how many we get and prepare offset tables */ int nimport = 0; @@ -656,9 +682,9 @@ void fof<partset>::fof_compile_catalogue(double inner_distance) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the group info */ - MPI_Sendrecv(&FOF_GList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(fof_group_list), MPI_BYTE, recvTask, - TAG_DENS_A, &get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(fof_group_list), MPI_BYTE, - recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&FOF_GList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(fof_group_list), MPI_BYTE, recvTask, + TAG_DENS_A, &get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(fof_group_list), + MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } } } @@ -732,9 +758,9 @@ void fof<partset>::fof_compile_catalogue(double inner_distance) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the group info */ - MPI_Sendrecv(&get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(fof_group_list), MPI_BYTE, recvTask, - TAG_DENS_A, &FOF_GList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(fof_group_list), MPI_BYTE, - recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(fof_group_list), MPI_BYTE, recvTask, + TAG_DENS_A, &FOF_GList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(fof_group_list), MPI_BYTE, + recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } } } @@ -1003,7 +1029,7 @@ void fof<partset>::fof_add_in_properties_of_group_segments(void) for(int i = 0; i < NgroupsExt; i++) Send_count[Group[i].MinIDTask]++; - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); int nimport = 0; Recv_offset[0] = 0, Send_offset[0] = 0; @@ -1032,9 +1058,9 @@ void fof<partset>::fof_add_in_properties_of_group_segments(void) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the group data */ - MPI_Sendrecv(&Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_properties), MPI_BYTE, recvTask, - TAG_DENS_A, &get_Group[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_properties), MPI_BYTE, - recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_properties), MPI_BYTE, recvTask, + TAG_DENS_A, &get_Group[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_properties), MPI_BYTE, + recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } } } diff --git a/src/fof/fof_findgroups.cc b/src/fof/fof_findgroups.cc index 8faa002274b5ef16bd25aa1c6cd8d2816fce0321..bf323c9017ffeebee3bebc9c1adb2b40af09e7c4 100644 --- a/src/fof/fof_findgroups.cc +++ b/src/fof/fof_findgroups.cc @@ -404,7 +404,7 @@ int foftree<partset>::treefind_fof_primary(MyIntPosType *searchcenter, MyNgbTree { fofnode *current = get_nodep(no, shmrank); - if(current->level == 0) + if(current->level <= LEVEL_ALWAYS_OPEN) { /* we always open the root node (its full node length couldn't be stored in the integer type */ no = current->nextnode; /* no change in shmrank expected here */ @@ -426,7 +426,7 @@ int foftree<partset>::treefind_fof_primary(MyIntPosType *searchcenter, MyNgbTree if(Tp->MinID[head].get() <= target_MinID.get()) { #if defined(LIGHTCONE_PARTICLES_GROUPS) - if(Tp->DistanceOrigin[FullyLinkedNodePIndex[no]] <= target_DistanceOrigin) + if(Tp->DistanceOrigin[head] <= target_DistanceOrigin) #endif { no = current->sibling; /* the node can be discarded */ diff --git a/src/fof/fof_io.cc b/src/fof/fof_io.cc index 91e658fb044e18f37133c036c02fd67328828ccb..1a8b61eb23abeb0e1183f37ee1a3e51fcf13e1b3 100644 --- a/src/fof/fof_io.cc +++ b/src/fof/fof_io.cc @@ -16,6 +16,7 @@ #include <hdf5.h> #include <mpi.h> #include <sys/stat.h> + #include <algorithm> #include <cmath> #include <cstdio> @@ -52,7 +53,7 @@ fof_io<partset>::fof_io(fof<partset> *FoF_ptr, MPI_Comm comm, int format) : IO_D this->header_size = sizeof(catalogue_header); this->header_buf = &catalogue_header; this->type_of_file = FILE_IS_GROUPCAT; - sprintf(this->info, "FOF/SUBFIND: writing group catalogue"); + snprintf(this->info, MAXLEN_PATH, "FOF/SUBFIND: writing group catalogue"); init_field("FLEN", "GroupLen", mem_len_type, file_len_type, READ_IF_PRESENT, 1, A_G, &FoF->Group[0].Len, NULL, GROUPS, 0, 0, 0, 0, 0, 0, 0, true); @@ -229,9 +230,9 @@ void fof_io<partset>::fof_subfind_save_groups(int num, const char *basename, con if(ThisTask == 0) { #ifdef ALT_NAMING - sprintf(buf, "%s/%s_%06d", All.OutputDir, grpcat_dirbasename, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%06d", All.OutputDir, grpcat_dirbasename, num); #else - sprintf(buf, "%s/%s_%03d", All.OutputDir, grpcat_dirbasename, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%03d", All.OutputDir, grpcat_dirbasename, num); #endif mkdir(buf, 02755); } @@ -240,14 +241,14 @@ void fof_io<partset>::fof_subfind_save_groups(int num, const char *basename, con #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/%s_%06d/%s_%06d", All.OutputDir, grpcat_dirbasename, num, basename, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%06d/%s_%06d", All.OutputDir, grpcat_dirbasename, num, basename, num); else - sprintf(buf, "%s%s_%06d", All.OutputDir, basename, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, basename, num); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/%s_%03d/%s_%03d", All.OutputDir, grpcat_dirbasename, num, basename, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%03d/%s_%03d", All.OutputDir, grpcat_dirbasename, num, basename, num); else - sprintf(buf, "%s%s_%03d", All.OutputDir, basename, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, basename, num); #endif write_multiple_files(buf, All.NumFilesPerSnapshot); @@ -269,11 +270,11 @@ void fof_io<partset>::fof_subfind_load_groups(int num) char fname[MAXLEN_PATH_EXTRA], fname_multiple[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(fname_multiple, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "fof_subhalo_tab", num); - sprintf(fname, "%s%s_%06d", All.OutputDir, "fof_subhalo_tab", num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "fof_subhalo_tab", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, "fof_subhalo_tab", num); #else - sprintf(fname_multiple, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "fof_subhalo_tab", num); - sprintf(fname, "%s%s_%03d", All.OutputDir, "fof_subhalo_tab", num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "fof_subhalo_tab", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, "fof_subhalo_tab", num); #endif int num_files = find_files(fname, fname_multiple); @@ -529,13 +530,13 @@ void fof_io<partset>::get_datagroup_name(int type, char *buf) switch(type) { case 0: - sprintf(buf, "/Group"); + snprintf(buf, MAXLEN_PATH, "/Group"); break; case 1: - sprintf(buf, "/Subhalo"); + snprintf(buf, MAXLEN_PATH, "/Subhalo"); break; case 2: - sprintf(buf, "/IDs"); + snprintf(buf, MAXLEN_PATH, "/IDs"); break; default: Terminate("wrong group: type=%d", type); diff --git a/src/fof/fof_io.h b/src/fof/fof_io.h index 4d4c92604ad04607c07337af1b8cbb3787125825..4519c9974b93f89eb1a21f9fe1c9ba4f05c46b69 100644 --- a/src/fof/fof_io.h +++ b/src/fof/fof_io.h @@ -12,6 +12,8 @@ #ifndef FOF_IO_H #define FOF_IO_H +#include "gadgetconfig.h" + #include "../fof/fof.h" #include "../io/io.h" diff --git a/src/fof/fof_nearest.cc b/src/fof/fof_nearest.cc index 5cf4e478dbbf6bd4835ddbaf48e005a86f705199..57d01fbbb190279783a976949ccc7785d0b7e4ab 100644 --- a/src/fof/fof_nearest.cc +++ b/src/fof/fof_nearest.cc @@ -157,7 +157,7 @@ class fofdata_comm : public generic_comm<fofdata_in, fofdata_out, T_tree, T_doma fofnode *current = Tree->get_nodep(no, shmrank); - if(current->level == 0) + if(current->level <= LEVEL_ALWAYS_OPEN) { /* we always open the root node (its full node length can't be stored in the integer type */ no = current->nextnode; /* no change in shmrank expected here */ diff --git a/src/fof/foftree.h b/src/fof/foftree.h index 8bb51237d67f5b0bcc1efbcbbc1947fa7880bf55..96e086ad01d5d009842b63c8349fbf316def3481 100644 --- a/src/fof/foftree.h +++ b/src/fof/foftree.h @@ -12,6 +12,8 @@ #ifndef FOFTREE_H #define FOFTREE_H +#include "gadgetconfig.h" + #include "../data/simparticles.h" #include "../tree/tree.h" diff --git a/src/fof/foftree_build.cc b/src/fof/foftree_build.cc index 7b1789873bf865659a03304ad75efcecb6b7f8f6..a2a743d1d0433cd628f4b5138e86728a2d90a7c1 100644 --- a/src/fof/foftree_build.cc +++ b/src/fof/foftree_build.cc @@ -107,8 +107,8 @@ void foftree<partset>::exchange_topleafdata(void) } } - MPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, - D->Communicator); + myMPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, + D->Communicator); for(int task = 0; task < D->NTask; task++) recvcounts[task] = 0; diff --git a/src/gravity/ewald.cc b/src/gravity/ewald.cc index e31df783dfa61d0bb4313f3eacbf66e0d8e12970..76674c272a7d343439ed2d9ff7975c9880ad36a1 100644 --- a/src/gravity/ewald.cc +++ b/src/gravity/ewald.cc @@ -76,9 +76,9 @@ void ewald::ewald_init(void) Ewd = (ewald_data *)Mem.mymalloc("Ewd", sizeof(ewald_data) * (ENX + 1) * (ENY + 1) * (ENZ + 1)); - char buf[200]; - sprintf(buf, "ewald_table_%d-%d-%d_%d-%d-%d_precision%d-order%d.dat", LONG_X, LONG_Y, LONG_Z, ENX, ENY, ENZ, (int)sizeof(MyReal), - HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "ewald_table_%d-%d-%d_%d-%d-%d_precision%d-order%d.dat", LONG_X, LONG_Y, LONG_Z, ENX, ENY, ENZ, + (int)sizeof(MyReal), HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER); int recomputeflag = 0; @@ -136,11 +136,12 @@ void ewald::ewald_init(void) if(ThisTask == 0) { - if(((n - first) % (count / 20)) == 0) - { - printf("%4.1f percent done\n", (n - first) / (count / 100.0)); - myflush(stdout); - } + if(count > 20) + if(((n - first) % (count / 20)) == 0) + { + printf("%4.1f percent done\n", (n - first) / (count / 100.0)); + myflush(stdout); + } } double xx = 0.5 * DBX * (1.0 / LONG_X) * ((double)i) / ENX; @@ -167,28 +168,78 @@ void ewald::ewald_init(void) ewdp->D7phi = ewald_D7(xx, yy, zz); #endif #else - ewdp->D0phi = ewald_D0(xx, zz, yy); - - vector<double> force = ewald_D1(yy, zz, xx); switch(GRAVITY_TALLBOX) { case 0: - ewdp->D1phi[0] = force[2]; - ewdp->D1phi[1] = force[0]; - ewdp->D1phi[2] = force[1]; + { + ewdp->D0phi = ewald_D0(yy, zz, xx); + auto D1phi = ewald_D1(yy, zz, xx); + auto D2phi = ewald_D2(yy, zz, xx); + auto D3phi = ewald_D3(yy, zz, xx); + + ewdp->D1phi[vX] = D1phi[vZ]; + ewdp->D1phi[vY] = D1phi[vX]; + ewdp->D1phi[vZ] = D1phi[vY]; + + ewdp->D2phi[qXX] = D2phi[qZZ]; + ewdp->D2phi[qXY] = D2phi[qZX]; + ewdp->D2phi[qXZ] = D2phi[qZY]; + ewdp->D2phi[qYY] = D2phi[qXX]; + ewdp->D2phi[qYZ] = D2phi[qXY]; + ewdp->D2phi[qZZ] = D2phi[qYY]; + + ewdp->D3phi[dXXX] = D3phi[dZZZ]; + ewdp->D3phi[dXXY] = D3phi[dZZX]; + ewdp->D3phi[dXXZ] = D3phi[dZZY]; + ewdp->D3phi[dXYY] = D3phi[dZXX]; + ewdp->D3phi[dXYZ] = D3phi[dZXY]; + ewdp->D3phi[dXZZ] = D3phi[dZYY]; + ewdp->D3phi[dYYY] = D3phi[dXXX]; + ewdp->D3phi[dYYZ] = D3phi[dXXY]; + ewdp->D3phi[dYZZ] = D3phi[dXYY]; + ewdp->D3phi[dZZZ] = D3phi[dYYY]; + } break; case 1: - ewdp->D1phi[0] = force[0]; - ewdp->D1phi[1] = force[2]; - ewdp->D1phi[2] = force[1]; + { + ewdp->D0phi = ewald_D0(xx, zz, yy); + auto D1phi = ewald_D1(xx, zz, yy); + auto D2phi = ewald_D2(xx, zz, yy); + auto D3phi = ewald_D3(xx, zz, yy); + + ewdp->D1phi[vX] = D1phi[vX]; + ewdp->D1phi[vY] = D1phi[vZ]; + ewdp->D1phi[vZ] = D1phi[vY]; + + ewdp->D2phi[qXX] = D2phi[qXX]; + ewdp->D2phi[qXY] = D2phi[qXZ]; + ewdp->D2phi[qXZ] = D2phi[qXY]; + ewdp->D2phi[qYY] = D2phi[qZZ]; + ewdp->D2phi[qYZ] = D2phi[qZY]; + ewdp->D2phi[qZZ] = D2phi[qYY]; + + ewdp->D3phi[dXXX] = D3phi[dXXX]; + ewdp->D3phi[dXXY] = D3phi[dXXZ]; + ewdp->D3phi[dXXZ] = D3phi[dXXY]; + ewdp->D3phi[dXYY] = D3phi[dXZZ]; + ewdp->D3phi[dXYZ] = D3phi[dXZY]; + ewdp->D3phi[dXZZ] = D3phi[dXYY]; + ewdp->D3phi[dYYY] = D3phi[dZZZ]; + ewdp->D3phi[dYYZ] = D3phi[dZZY]; + ewdp->D3phi[dYZZ] = D3phi[dZYY]; + ewdp->D3phi[dZZZ] = D3phi[dYYY]; + } break; case 2: - ewdp->D1phi[0] = force[0]; - ewdp->D1phi[1] = force[1]; - ewdp->D1phi[2] = force[2]; + { + ewdp->D0phi = ewald_D0(xx, yy, zz); + ewdp->D1phi = ewald_D1(xx, yy, zz); + ewdp->D2phi = ewald_D2(xx, yy, zz); + ewdp->D3phi = ewald_D3(xx, yy, zz); + } break; } #endif @@ -205,7 +256,7 @@ void ewald::ewald_init(void) recvoffs[i] = off * sizeof(ewald_data); } - MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(ewald_data), MPI_BYTE, Ewd, recvcnts, recvoffs, MPI_BYTE, Communicator); + myMPI_Allgatherv(MPI_IN_PLACE, size * sizeof(ewald_data), MPI_BYTE, Ewd, recvcnts, recvoffs, MPI_BYTE, Communicator); Mem.myfree(recvoffs); Mem.myfree(recvcnts); @@ -306,49 +357,83 @@ void ewald::ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *t { // we determine the closest available point in our Ewald look-up table - static MyIntPosType const halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - (EWLEVEL + 1)); - static MyIntPosType const intlen = halflen << 1; - static MyIntPosType const ewaldmask = ~(intlen - 1); + static MyIntPosType const halflenX = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - (EWLEVEL + 1) - MAX_LONG_X_BITS); + static MyIntPosType const intlenX = halflenX << 1; + static MyIntPosType const ewaldmaskX = ~(intlenX - 1); + + static MyIntPosType const halflenY = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - (EWLEVEL + 1) - MAX_LONG_Y_BITS); + static MyIntPosType const intlenY = halflenY << 1; + static MyIntPosType const ewaldmaskY = ~(intlenY - 1); + + static MyIntPosType const halflenZ = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - (EWLEVEL + 1) - MAX_LONG_Z_BITS); + static MyIntPosType const intlenZ = halflenZ << 1; + static MyIntPosType const ewaldmaskZ = ~(intlenZ - 1); MyIntPosType temppos[3] = {p_intpos[0] - target_intpos[0], p_intpos[1] - target_intpos[1], p_intpos[2] - target_intpos[2]}; + constrain_intpos(temppos); + MyIntPosType gridpos[3]; - gridpos[0] = (temppos[0] + halflen) & ewaldmask; - gridpos[1] = (temppos[1] + halflen) & ewaldmask; - gridpos[2] = (temppos[2] + halflen) & ewaldmask; + gridpos[0] = (temppos[0] + halflenX) & ewaldmaskX; + gridpos[1] = (temppos[1] + halflenY) & ewaldmaskY; + gridpos[2] = (temppos[2] + halflenZ) & ewaldmaskZ; vector<double> off; nearest_image_intpos_to_pos(temppos, gridpos, off.da); - int i = (gridpos[0] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1))); - int j = (gridpos[1] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1))); - int k = (gridpos[2] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1))); + int i = (gridpos[0] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1) - MAX_LONG_X_BITS)); + int j = (gridpos[1] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1) - MAX_LONG_Y_BITS)); + int k = (gridpos[2] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1) - MAX_LONG_Z_BITS)); int signx = 1, signy = 1, signz = 1; - if(i > EN) +#if defined(GRAVITY_TALLBOX) && (GRAVITY_TALLBOX == 0) + if(p_intpos[0] < target_intpos[0]) + { + signx = -1; + i = ENX - i; + } +#else + if(i > ENX) { - i = 2 * EN - i; + i = 2 * ENX - i; signx = -1; } - else if(i == EN && gridpos[0] < temppos[0]) + else if(i == ENX && gridpos[0] < temppos[0]) signx = -1; +#endif - if(j > EN) +#if defined(GRAVITY_TALLBOX) && (GRAVITY_TALLBOX == 1) + if(p_intpos[1] < target_intpos[1]) { - j = 2 * EN - j; + signx = -1; + j = ENY - i; + } +#else + if(j > ENY) + { + j = 2 * ENY - j; signy = -1; } - else if(j == EN && gridpos[1] < temppos[1]) + else if(j == ENY && gridpos[1] < temppos[1]) signy = -1; +#endif - if(k > EN) +#if defined(GRAVITY_TALLBOX) && (GRAVITY_TALLBOX == 2) + if(p_intpos[2] < target_intpos[2]) + { + signz = -1; + k = ENZ - k; + } +#else + if(k > ENZ) { - k = 2 * EN - k; + k = 2 * ENZ - k; signz = -1; } - else if(k == EN && gridpos[2] < temppos[2]) + else if(k == ENZ && gridpos[2] < temppos[2]) signz = -1; +#endif fper = Ewd[ewd_offset(i, j, k)]; @@ -493,11 +578,8 @@ void ewald::ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *t // only second order Taylor expansion, i.e. EWALD_TAYLOR_ORDER==2 // now Taylor corrections - -#ifndef GRAVITY_TALLBOX fper.D0phi += fper.D1phi * off + 0.5 * ((fper.D2phi * off) * off); fper.D1phi += fper.D2phi * off + 0.5 * ((fper.D3phi * off) * off); -#endif if(flag == POINTMASS) return; @@ -774,19 +856,10 @@ double ewald::ewald_D0(double x, double y, double z) double k2 = kx * kx + ky * ky; double k = sqrt(k2); - if(k * z > 0) - { - double ex = exp(-k * z); - if(ex > 0) - D0 += -M_PI / (BOXX * BOXY) * (erfc(k / (2 * alpha) + alpha * z) / ex + ex * erfc(k / (2 * alpha) - alpha * z)) / k; - } - else - { - double ex = exp(k * z); - if(ex > 0) - D0 += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) * - (ex * erfc(k / (2 * alpha) + alpha * z) + erfc(k / (2 * alpha) - alpha * z) / ex) / k; - } + double ex = exp(-k * z); // note: z positive here + + if(ex > 1.0e-60) // to prevent divisions by zero due to underflows + D0 += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha)); } } @@ -797,6 +870,24 @@ double ewald::ewald_D0(double x, double y, double z) return D0; } +double ewald::specerf(double z, double k, double alpha) { return exp(k * z) * erfc(k / (2 * alpha) + alpha * z); } + +double ewald::d_specerf(double z, double k, double alpha) +{ + return -2 * alpha / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) + k * specerf(z, k, alpha); +} + +double ewald::dd_specerf(double z, double k, double alpha) +{ + return +4 * pow(alpha, 3) * z / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) + k * d_specerf(z, k, alpha); +} + +double ewald::ddd_specerf(double z, double k, double alpha) +{ + return +4 * pow(alpha, 3) / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) - + 8 * pow(alpha, 5) * z * z / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) + k * dd_specerf(z, k, alpha); +} + /*! \brief This function computes the force correction term (difference between full * force of infinite lattice and nearest image) by Ewald summation. * @@ -977,13 +1068,16 @@ vector<double> ewald::ewald_D1(double x, double y, double z) double k2 = kx * kx + ky * ky; double k = sqrt(k2); - double val = M_PI / (BOXX * BOXY) * sin(kx * x + ky * y) * - (exp(k * z) * erfc(k / (2 * alpha) + alpha * z) + exp(-k * z) * erfc(k / (2 * alpha) - alpha * z)) / k; + double ex = exp(-k * z); // note: z positive here - D1[0] -= -kx * val; - D1[1] -= -ky * val; - D1[2] -= M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) * - (exp(k * z) * erfc(k / (2 * alpha) + alpha * z) - exp(-k * z) * erfc(k / (2 * alpha) - alpha * z)); + if(ex > 1.0e-60) // to prevent divisions by zero due to underflows + { + double val = M_PI / (BOXX * BOXY) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha)); + + D1[0] += kx * val * sin(kx * x + ky * y); + D1[1] += ky * val * sin(kx * x + ky * y); + D1[2] += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (d_specerf(z, k, alpha) - d_specerf(-z, k, alpha)); + } } } @@ -999,6 +1093,8 @@ symtensor2<double> ewald::ewald_D2(double x, double y, double z) symtensor2<double> D2 = 0.0; +#ifndef GRAVITY_TALLBOX + double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3); double alpha = 2.0 / leff; double alpha2 = alpha * alpha; @@ -1107,6 +1203,108 @@ symtensor2<double> ewald::ewald_D2(double x, double y, double z) D2 += (val * kxyz) % kxyz; } } +#else + /* this is the case with periodicity only in two dimensions */ + /* this is the case with periodicity only in two dimensions */ + + double leff = sqrt(BOXX * BOXY); + double alpha = 2.0 / leff; + double alpha2 = alpha * alpha; + + int qxmax = (int)(8.0 / (BOXX * alpha) + 0.5); + int qymax = (int)(8.0 / (BOXY * alpha) + 0.5); + + int nxmax = (int)(2.0 * alpha * BOXX + 0.5); + int nymax = (int)(2.0 * alpha * BOXY + 0.5); + + if(printed == 0) + { + mpi_printf("EWALD: D2 table: qxmax=%d qymax=%d nxmax=%d nymax=%d\n", qxmax, qymax, nxmax, nymax); + printed = 1; + } + + for(int nx = -qxmax; nx <= qxmax; nx++) + for(int ny = -qymax; ny <= qymax; ny++) + { + double dx = x - nx * BOXX; + double dy = y - ny * BOXY; + double dz = z; + + vector<double> dxyz(dx, dy, dz); + + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + + double rinv = (r > 0) ? 1.0 / r : 0.0; + double r2inv = rinv * rinv; + double r3inv = r2inv * rinv; + double r5inv = r3inv * r2inv; + + double g1, g2; + + if(nx != 0 || ny != 0) + { + g1 = (erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv; + + g2 = -(3.0 * erfc(alpha * r) + (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv; + } + else + { + /* we add the 1/r term here to the (0|0) entry */ + + if((alpha * r) < 0.5) + { + g1 = 4.0 * pow(alpha, 3) / sqrt(M_PI) * + (-1.0 / 3.0 + pow(alpha * r, 2) / 5.0 - pow(alpha * r, 4) / 14.0 + pow(alpha * r, 6) / 54.0 - + pow(alpha * r, 8) / 264.0 + pow(alpha * r, 10) / 1560.0); + + g2 = 8.0 * pow(alpha, 5) / sqrt(M_PI) * + (1.0 / 5.0 - pow(alpha * r, 2) / 7.0 + pow(alpha * r, 4) / 18.0 - pow(alpha * r, 6) / 66.0 + + pow(alpha * r, 8) / 312.0 - pow(alpha * r, 10) / 1800.0); + } + else + { + g1 = (-erf(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv; + + g2 = (3.0 * erf(alpha * r) - (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv; + } + } + + D2 += g2 * (dxyz % dxyz); + D2[qXX] += g1; + D2[qYY] += g1; + D2[qZZ] += g1; + } + + for(int nx = -nxmax; nx <= nxmax; nx++) + for(int ny = -nymax; ny <= nymax; ny++) + { + if(nx != 0 || ny != 0) + { + double kx = (2.0 * M_PI / BOXX) * nx; + double ky = (2.0 * M_PI / BOXY) * ny; + double k2 = kx * kx + ky * ky; + double k = sqrt(k2); + + double ex = exp(-k * z); // note: z positive here + + if(ex > 1.0e-60) // to prevent divisions by zero due to underflows + { + double val = M_PI / (BOXX * BOXY) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha)); + double dzval = M_PI / (BOXX * BOXY) / k * (d_specerf(z, k, alpha) - d_specerf(-z, k, alpha)); + + D2[qXX] += kx * kx * val * cos(kx * x + ky * y); + D2[qXY] += kx * ky * val * cos(kx * x + ky * y); + D2[qXZ] += kx * dzval * sin(kx * x + ky * y); + D2[qYY] += ky * ky * val * cos(kx * x + ky * y); + D2[qYZ] += ky * dzval * sin(kx * x + ky * y); + D2[qZZ] += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (dd_specerf(z, k, alpha) + dd_specerf(-z, k, alpha)); + } + } + } + + D2[qZZ] += 4.0 * alpha * sqrt(M_PI) / (BOXX * BOXY) * exp(-pow(alpha * z, 2)); +#endif return D2; } @@ -1115,12 +1313,10 @@ symtensor3<double> ewald::ewald_D3(double x, double y, double z) { static int printed = 0; -#ifdef GRAVITY_TALLBOX - Terminate("GRAVITY_TALLBOX is not implemented for MULTIPOLE_ORDER >= 3"); -#endif - symtensor3<double> D3 = 0.0; +#ifndef GRAVITY_TALLBOX + double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3); double alpha = 2.0 / leff; double alpha2 = alpha * alpha; @@ -1234,6 +1430,119 @@ symtensor3<double> ewald::ewald_D3(double x, double y, double z) D3 += (val * kxyz) % (kxyz % kxyz); } } +#else + /* this is the case with periodicity only in two dimensions */ + /* this is the case with periodicity only in two dimensions */ + /* this is the case with periodicity only in two dimensions */ + + double leff = sqrt(BOXX * BOXY); + double alpha = 2.0 / leff; + double alpha2 = alpha * alpha; + + int qxmax = (int)(8.0 / (BOXX * alpha) + 0.5); + int qymax = (int)(8.0 / (BOXY * alpha) + 0.5); + + int nxmax = (int)(2.0 * alpha * BOXX + 0.5); + int nymax = (int)(2.0 * alpha * BOXY + 0.5); + + if(printed == 0) + { + mpi_printf("EWALD: D2 table: qxmax=%d qymax=%d nxmax=%d nymax=%d\n", qxmax, qymax, nxmax, nymax); + printed = 1; + } + + for(int nx = -qxmax; nx <= qxmax; nx++) + for(int ny = -qymax; ny <= qymax; ny++) + { + double dx = x - nx * BOXX; + double dy = y - ny * BOXY; + double dz = z; + + vector<double> dxyz(dx, dy, dz); + + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + + double rinv = (r > 0) ? 1.0 / r : 0.0; + double r2inv = rinv * rinv; + double r3inv = r2inv * rinv; + double r4inv = r2inv * r2inv; + double r5inv = r2inv * r3inv; + double r7inv = r3inv * r4inv; + + double g2, g3; + + if(nx != 0 || ny != 0) + { + g2 = -(3.0 * erfc(alpha * r) + (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv; + + g3 = (15.0 * erfc(alpha * r) + + (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) * + r7inv; + } + else + { + if((alpha * r) < 0.5) + { + g2 = 8.0 * pow(alpha, 5) / sqrt(M_PI) * + (1.0 / 5.0 - pow(alpha * r, 2) / 7.0 + pow(alpha * r, 4) / 18.0 - pow(alpha * r, 6) / 66.0 + + pow(alpha * r, 8) / 312.0 - pow(alpha * r, 10) / 1800.0); + + g3 = 16.0 * pow(alpha, 7) / sqrt(M_PI) * + (-1.0 / 7.0 + pow(alpha * r, 2) / 9.0 - pow(alpha * r, 4) / 22.0 + pow(alpha * r, 6) / 78.0 - + pow(alpha * r, 8) / 360.0 + pow(alpha * r, 10) / 2040.0); + } + else + { + g2 = (3.0 * erf(alpha * r) - (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv; + + g3 = (-15.0 * erf(alpha * r) + + (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) * + r7inv; + } + } + + symtensor2<double> aux2 = dxyz % dxyz; + symtensor3<double> aux3; + + setup_D3(ADD, D3, dxyz, aux2, aux3, g2, g3); + } + + for(int nx = -nxmax; nx <= nxmax; nx++) + for(int ny = -nymax; ny <= nymax; ny++) + { + if(nx != 0 || ny != 0) + { + double kx = (2.0 * M_PI / BOXX) * nx; + double ky = (2.0 * M_PI / BOXY) * ny; + double k2 = kx * kx + ky * ky; + double k = sqrt(k2); + + double ex = exp(-k * z); // note: z positive here + + if(ex > 1.0e-60) // to prevent divisions by zero due to underflows + { + double val = M_PI / (BOXX * BOXY) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha)); + double dzval = M_PI / (BOXX * BOXY) / k * (d_specerf(z, k, alpha) - d_specerf(-z, k, alpha)); + double dzdzval = M_PI / (BOXX * BOXY) / k * (dd_specerf(z, k, alpha) + dd_specerf(-z, k, alpha)); + + D3[dXXX] += -kx * kx * kx * val * sin(kx * x + ky * y); + D3[dXXY] += -kx * kx * ky * val * sin(kx * x + ky * y); + D3[dXXZ] += kx * kx * dzval * cos(kx * x + ky * y); + D3[dXYY] += -kx * ky * ky * val * sin(kx * x + ky * y); + D3[dXYZ] += kx * ky * dzval * cos(kx * x + ky * y); + D3[dXZZ] += kx * dzdzval * sin(kx * x + ky * y); + D3[dYYY] += -ky * ky * ky * val * sin(kx * x + ky * y); + D3[dYYZ] += ky * ky * dzval * cos(kx * x + ky * y); + D3[dYZZ] += ky * dzdzval * sin(kx * x + ky * y); + D3[dZZZ] += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (ddd_specerf(z, k, alpha) - ddd_specerf(-z, k, alpha)); + } + } + } + + D3[dZZZ] += -8.0 * pow(alpha, 3) * z * sqrt(M_PI) / (BOXX * BOXY) * exp(-pow(alpha * z, 2)); + +#endif return D3; } diff --git a/src/gravity/ewald.h b/src/gravity/ewald.h index 77e4317f76c518fc6e4446ead59ffe8a0fe67b8f..507825046511fe745f6ac8e06425d406137370c7 100644 --- a/src/gravity/ewald.h +++ b/src/gravity/ewald.h @@ -91,6 +91,10 @@ class ewald : public intposconvert, public io_streamcount, public setcomm ewald_data *Ewd; // points to an [ENX + 1][ENY + 1][ENZ + 1] array inline int ewd_offset(int i, int j, int k) { return (i * (ENY + 1) + j) * (ENZ + 1) + k; } + inline double specerf(double z, double k, double alpha); + inline double d_specerf(double z, double k, double alpha); + inline double dd_specerf(double z, double k, double alpha); + inline double ddd_specerf(double z, double k, double alpha); double Ewd_fac_intp[3]; diff --git a/src/gravity/ewaldtensors.h b/src/gravity/ewaldtensors.h index bcdcb6df30995599d6275c9e9d987f7ad5a6a20e..5f90d8253de17fe938946e1a930e75a0dd4af6d9 100644 --- a/src/gravity/ewaldtensors.h +++ b/src/gravity/ewaldtensors.h @@ -12,6 +12,8 @@ #ifndef GRAVITY_EWALDTENSORS_H #define GRAVITY_EWALDTENSORS_H +#include "gadgetconfig.h" + #include "../data/symtensors.h" // derivative tensors for Ewald correction - they have few independent elements due to cubic symmetry diff --git a/src/gravity/grav_direct.cc b/src/gravity/grav_direct.cc index daca6bffd9ddaae691a75959046a134de4c674d3..f5396f785769d6eb686bdd587e3f73f178ab27b0 100644 --- a/src/gravity/grav_direct.cc +++ b/src/gravity/grav_direct.cc @@ -117,8 +117,8 @@ void gravtree<simparticles>::gravity_direct(simparticles *Sp, domain<simparticle Send_offset[j] = Recv_offset[j] * sizeof(directdata); } - MPI_Allgatherv(DirectDataIn, nforces * sizeof(directdata), MPI_BYTE, DirectDataAll, Send_count, Send_offset, MPI_BYTE, - D->Communicator); + myMPI_Allgatherv(DirectDataIn, nforces * sizeof(directdata), MPI_BYTE, DirectDataAll, Send_count, Send_offset, MPI_BYTE, + D->Communicator); /* subdivide the work evenly */ int first, count; diff --git a/src/gravity/grav_external.cc b/src/gravity/grav_external.cc index e572d1849d037aff5469c81e4351d9ca1dd6d0ab..b51c0dd7552bb84f01c45928852dafbd3f8120d2 100644 --- a/src/gravity/grav_external.cc +++ b/src/gravity/grav_external.cc @@ -35,6 +35,16 @@ void sim::gravity_external(void) { +#ifdef PERIODIC + // pick middle of (stretched) box, but could also choose other point + vector<double> pos_center{0.5 * All.BoxSize / LONG_X, 0.5 * All.BoxSize / LONG_Y, 0.5 * All.BoxSize / LONG_Z}; +#else + // here pick origin + vector<double> pos_center{0, 0, 0}; +#endif + MyIntPosType intpos_center[3]; + Sp.pos_to_intpos(pos_center.da, intpos_center); + for(int i = 0; i < Sp.TimeBinsGravity.NActiveParticles; i++) { int target = Sp.TimeBinsGravity.ActiveParticleList[i]; @@ -46,7 +56,7 @@ void sim::gravity_external(void) #ifdef EXTERNALGRAVITY_STATICHQ { vector<double> pos; - Sp.intpos_to_pos(Sp.P[target].IntPos, pos.da); /* converts the integer coordinate to floating point */ + Sp.nearest_image_intpos_to_pos(Sp.P[target].IntPos, intpos_center, pos.da); double r = sqrt(pos.r2()); diff --git a/src/gravity/grav_forcetest.cc b/src/gravity/grav_forcetest.cc index ad99a5fa99185bf018d8270101abd3f31f867a02..6e75f7cc5081403106f7e59b4967c43119978af5 100644 --- a/src/gravity/grav_forcetest.cc +++ b/src/gravity/grav_forcetest.cc @@ -532,7 +532,7 @@ void gravtest::gravity_forcetest(int timebin) if(nthis == D->ThisTask) { char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "forcetest.txt"); if(!(Logs.FdForceTest = fopen(buf, "a"))) Terminate("error in opening file '%s'\n", buf); diff --git a/src/gravtree/gravtree.cc b/src/gravtree/gravtree.cc index 200e8d3cd99702449b96efc53e9d3194be790ca9..082dc58afe3a9815b09b125972f84dde4479d018 100644 --- a/src/gravtree/gravtree.cc +++ b/src/gravtree/gravtree.cc @@ -15,6 +15,7 @@ #include <mpi.h> #include <stdlib.h> #include <string.h> + #include <atomic> #include "../data/allvars.h" @@ -156,18 +157,16 @@ void gravtree<partset>::gravity_exchange_forces(void) k++; } } - MPI_Alltoall(recv_count, 1, MPI_INT, send_count, 1, MPI_INT, D->Communicator); + myMPI_Alltoall(recv_count, 1, MPI_INT, send_count, 1, MPI_INT, D->Communicator); recv_offset[0] = 0; send_offset[0] = 0; int Nexport = 0; - int Nimport = 0; for(int j = 0; j < D->NTask; j++) { Nexport += send_count[j]; - Nimport += recv_count[j]; if(j > 0) { send_offset[j] = send_offset[j - 1] + send_count[j - 1]; @@ -187,10 +186,10 @@ void gravtree<partset>::gravity_exchange_forces(void) { if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) { - MPI_Sendrecv(&ResultsActiveImported[recv_offset[recvTask]], recv_count[recvTask] * sizeof(resultsactiveimported_data), - MPI_BYTE, recvTask, TAG_FOF_A, &tmp_results[send_offset[recvTask]], - send_count[recvTask] * sizeof(resultsactiveimported_data), MPI_BYTE, recvTask, TAG_FOF_A, D->Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&ResultsActiveImported[recv_offset[recvTask]], recv_count[recvTask] * sizeof(resultsactiveimported_data), + MPI_BYTE, recvTask, TAG_FOF_A, &tmp_results[send_offset[recvTask]], + send_count[recvTask] * sizeof(resultsactiveimported_data), MPI_BYTE, recvTask, TAG_FOF_A, D->Communicator, + MPI_STATUS_IGNORE); } } } diff --git a/src/gravtree/gravtree_build.cc b/src/gravtree/gravtree_build.cc index 844d103b512219cd5afb638507a95fdeb3198ed3..42ec87edb35148256f70b5924b33fc602ffde8fc 100644 --- a/src/gravtree/gravtree_build.cc +++ b/src/gravtree/gravtree_build.cc @@ -238,8 +238,8 @@ void gravtree<partset>::exchange_topleafdata(void) // optimise this step - only need to update this once per shared memory node - MPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, - D->Communicator); + myMPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, + D->Communicator); for(int task = 0; task < D->NTask; task++) recvcounts[task] = 0; diff --git a/src/gravtree/gwalk.cc b/src/gravtree/gwalk.cc index 103e294caee1372b6c6b3d3cfc4b61cce534e39b..f5b3af0f65ccdd88b976a1a710185ac451b06178 100644 --- a/src/gravtree/gwalk.cc +++ b/src/gravtree/gwalk.cc @@ -199,7 +199,7 @@ inline void gwalk::evaluate_particle_particle_interaction(const pinfo &pdat, con inline int gwalk::evaluate_particle_node_opening_criterion_and_interaction(const pinfo &pdat, gravnode *nop) { - if(nop->level == 0) // always open the root node (note: full node length does not fit in the integer type) + if(nop->level <= LEVEL_ALWAYS_OPEN) // always open the root node (note: full node length does not fit in the integer type) return NODE_OPEN; MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - nop->level); diff --git a/src/gravtree/gwalk.h b/src/gravtree/gwalk.h index b0c79d23a815e0b38aa2426ca62b78851672d655..be01e7a3503d6035cdc0b307bafaf57063a5ebd8 100644 --- a/src/gravtree/gwalk.h +++ b/src/gravtree/gwalk.h @@ -12,6 +12,8 @@ #ifndef GRAVTREE_WALK_H #define GRAVTREE_WALK_H +#include "gadgetconfig.h" + #include "../mpi_utils/shared_mem_handler.h" class gwalk : public gravtree<simparticles> diff --git a/src/io/hdf5_util.h b/src/io/hdf5_util.h index 449cf67576673c78db21483e9ad47535535cfbfa..c7bb21abb7a469bd07a09efe34f7aaee6fcef748 100644 --- a/src/io/hdf5_util.h +++ b/src/io/hdf5_util.h @@ -12,6 +12,8 @@ #ifndef HDF5_UTIL_H #define HDF5_UTIL_H +#include "gadgetconfig.h" + #include <hdf5.h> #define COMPRESSION_CHUNKSIZE 1000 diff --git a/src/io/io.cc b/src/io/io.cc index eeff77fe6118eb7edebdb6f8979895cce8d8d1cc..e7c7c44eace0a1bcb087bde660d9d227d6c78314 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -19,6 +19,7 @@ #include <stdlib.h> #include <string.h> #include <sys/stat.h> + #include <algorithm> #include "../cooling_sfr/cooling.h" @@ -136,23 +137,23 @@ void IO_Def::init_field(const char *label, const char *datasetname, enum types_i int IO_Def::find_files(const char *fname, const char *fname_multiple) { FILE *fd; - char buf[200], buf1[200]; + char buf[MAXLEN_PATH_EXTRA], buf1[MAXLEN_PATH_EXTRA]; int dummy, files_found = 0; if(file_format == FILEFORMAT_HDF5) { #ifdef ALT_NAMING - sprintf(buf, "%s.%d%s", fname_multiple, 0, HDF5_EXT); - sprintf(buf1, "%s%s", fname, HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d%s", fname_multiple, 0, HDF5_EXT); + snprintf(buf1, MAXLEN_PATH_EXTRA, "%s%s", fname, HDF5_EXT); #else - sprintf(buf, "%s.%d.hdf5", fname_multiple, 0); - sprintf(buf1, "%s.hdf5", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d.hdf5", fname_multiple, 0); + snprintf(buf1, MAXLEN_PATH_EXTRA, "%s.hdf5", fname); #endif } else { - sprintf(buf, "%s.%d", fname_multiple, 0); - sprintf(buf1, "%s", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d", fname_multiple, 0); + snprintf(buf1, MAXLEN_PATH_EXTRA, "%s", fname); } memset(header_buf, 0, header_size); @@ -248,14 +249,14 @@ void IO_Def::read_files_driver(const char *fname, int rep, int num_files) while(rest_files > NTask) { - char buf[MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s.%d", fname, ThisTask + (rest_files - NTask)); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d", fname, ThisTask + (rest_files - NTask)); if(file_format == FILEFORMAT_HDF5) #ifdef ALT_NAMING - sprintf(buf, "%s.%d%s", fname, ThisTask + (rest_files - NTask), HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d%s", fname, ThisTask + (rest_files - NTask), HDF5_EXT); #else - sprintf(buf, "%s.%d.hdf5", fname, ThisTask + (rest_files - NTask)); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d.hdf5", fname, ThisTask + (rest_files - NTask)); #endif int ngroups = NTask / All.MaxFilesWithConcurrentIO; @@ -284,26 +285,26 @@ void IO_Def::read_files_driver(const char *fname, int rep, int num_files) distribute_file(rest_files, &filenr, &masterTask, &lastTask); - char buf[MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; if(num_files > 1) { - sprintf(buf, "%s.%d", fname, filenr); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d", fname, filenr); if(file_format == FILEFORMAT_HDF5) #ifdef ALT_NAMING - sprintf(buf, "%s.%d%s", fname, filenr, HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d%s", fname, filenr, HDF5_EXT); #else - sprintf(buf, "%s.%d.hdf5", fname, filenr); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d.hdf5", fname, filenr); #endif } else { - sprintf(buf, "%s", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s", fname); if(file_format == FILEFORMAT_HDF5) #ifdef ALT_NAMING - sprintf(buf, "%s%s", fname, HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", fname, HDF5_EXT); #else - sprintf(buf, "%s.hdf5", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.hdf5", fname); #endif } @@ -730,11 +731,11 @@ void IO_Def::write_multiple_files(char *fname, int numfilesperdump, int append_f int filenr, masterTask, lastTask; distribute_file(numfilesperdump, &filenr, &masterTask, &lastTask); - char buf[MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; if(numfilesperdump > 1) - sprintf(buf, "%s.%d", fname, filenr); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d", fname, filenr); else - sprintf(buf, "%s", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s", fname); seq_data seq_loc; seq_loc.thistask = ThisTask; @@ -838,14 +839,14 @@ void IO_Def::write_multiple_files(char *fname, int numfilesperdump, int append_f void IO_Def::write_file(char *fname, int writeTask, int lastTask, void *CommBuffer, int numfilesperdump, int chunksize) { int typelist[N_DataGroups]; - long long n_type[N_DataGroups], npart[N_DataGroups]; + long long n_type[N_DataGroups], npart[N_DataGroups], pcsum = 0; char label[LABEL_LEN + 1]; unsigned int blksize, bytes_per_blockelement_in_file = 0; FILE *fd = 0; hid_t hdf5_file = 0, hdf5_grp[N_DataGroups], hdf5_headergrp = 0, hdf5_dataspace_memory; hid_t hdf5_dataspace_in_file = 0, hdf5_dataset = 0, hdf5_prop = 0; hsize_t dims[2], count[2], start[2]; - int rank = 0, pcsum = 0; + int rank = 0; hid_t hdf5_paramsgrp = 0; hid_t hdf5_configgrp = 0; @@ -861,11 +862,11 @@ void IO_Def::write_file(char *fname, int writeTask, int lastTask, void *CommBuff { if(file_format == FILEFORMAT_HDF5) { - char buf[MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(buf, "%s%s", fname, HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", fname, HDF5_EXT); #else - sprintf(buf, "%s.hdf5", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.hdf5", fname); #endif mpi_printf("%s file: '%s' (file 1 of %d)\n", info, fname, numfilesperdump); @@ -1020,7 +1021,7 @@ void IO_Def::write_file(char *fname, int writeTask, int lastTask, void *CommBuff for(int task = writeTask, offset = 0; task <= lastTask; task++) { - int n_for_this_task; + long long n_for_this_task; if(task == ThisTask) { @@ -1028,14 +1029,15 @@ void IO_Def::write_file(char *fname, int writeTask, int lastTask, void *CommBuff for(int p = writeTask; p <= lastTask; p++) if(p != ThisTask) - MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, Communicator); + MPI_Send(&n_for_this_task, sizeof(n_for_this_task), MPI_BYTE, p, TAG_NFORTHISTASK, Communicator); } else - MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, Communicator, MPI_STATUS_IGNORE); + MPI_Recv(&n_for_this_task, sizeof(n_for_this_task), MPI_BYTE, task, TAG_NFORTHISTASK, Communicator, + MPI_STATUS_IGNORE); while(n_for_this_task > 0) { - int pc = n_for_this_task; + long long pc = n_for_this_task; if(pc > blockmaxlen) pc = blockmaxlen; @@ -1132,9 +1134,9 @@ void IO_Def::write_file(char *fname, int writeTask, int lastTask, void *CommBuff my_H5Gclose(hdf5_configgrp, "/Config"); #ifdef ALT_NAMING - sprintf(buf, "%s%s", fname, HDF5_EXT); + snprintf(buf, MAXLEN_PATH, "%s%s", fname, HDF5_EXT); #else - sprintf(buf, "%s.hdf5", fname); + snprintf(buf, MAXLEN_PATH, "%s.hdf5", fname); #endif my_H5Fclose(hdf5_file, buf); } @@ -1146,11 +1148,11 @@ void IO_Def::write_file(char *fname, int writeTask, int lastTask, void *CommBuff void IO_Def::append_file(char *fname, int writeTask, int lastTask, void *CommBuffer, int numfilesperdump, int chunksize) { int typelist[N_DataGroups]; - long long n_type[N_DataGroups], npart[N_DataGroups], n_previous[N_DataGroups]; + long long n_type[N_DataGroups], npart[N_DataGroups], n_previous[N_DataGroups], pcsum = 0; hid_t hdf5_file = 0, hdf5_grp[N_DataGroups], hdf5_headergrp = 0, hdf5_dataspace_memory; hid_t hdf5_dataspace_in_file = 0, hdf5_dataset = 0; hsize_t dims[2], count[2], start[2]; - int rank = 0, pcsum = 0; + int rank = 0; if(file_format != FILEFORMAT_HDF5) Terminate("appending to files only works with HDF5 format\n"); @@ -1165,11 +1167,11 @@ void IO_Def::append_file(char *fname, int writeTask, int lastTask, void *CommBuf /* open file and write header */ if(ThisTask == writeTask) { - char buf[MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(buf, "%s%s", fname, HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", fname, HDF5_EXT); #else - sprintf(buf, "%s.hdf5", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.hdf5", fname); #endif hdf5_file = my_H5Fopen(buf, H5F_ACC_RDWR, H5P_DEFAULT); @@ -1259,7 +1261,7 @@ void IO_Def::append_file(char *fname, int writeTask, int lastTask, void *CommBuf for(int task = writeTask, offset = 0; task <= lastTask; task++) { - int n_for_this_task; + long long n_for_this_task; if(task == ThisTask) { @@ -1267,14 +1269,15 @@ void IO_Def::append_file(char *fname, int writeTask, int lastTask, void *CommBuf for(int p = writeTask; p <= lastTask; p++) if(p != ThisTask) - MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, Communicator); + MPI_Send(&n_for_this_task, sizeof(n_for_this_task), MPI_BYTE, p, TAG_NFORTHISTASK, Communicator); } else - MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, Communicator, MPI_STATUS_IGNORE); + MPI_Recv(&n_for_this_task, sizeof(n_for_this_task), MPI_BYTE, task, TAG_NFORTHISTASK, Communicator, + MPI_STATUS_IGNORE); while(n_for_this_task > 0) { - int pc = n_for_this_task; + long long pc = n_for_this_task; if(pc > blockmaxlen) pc = blockmaxlen; @@ -1339,9 +1342,9 @@ void IO_Def::append_file(char *fname, int writeTask, int lastTask, void *CommBuf my_H5Gclose(hdf5_headergrp, "/Header"); #ifdef ALT_NAMING - sprintf(buf, "%s%s", fname, HDF5_EXT); + snprintf(buf, MAXLEN_PATH, "%s%s", fname, HDF5_EXT); #else - sprintf(buf, "%s.hdf5", fname); + snprintf(buf, MAXLEN_PATH, "%s.hdf5", fname); #endif my_H5Fclose(hdf5_file, buf); } @@ -1693,9 +1696,10 @@ void IO_Def::read_file(const char *fname, int filenr, int readTask, int lastTask ; if(blksize1 != blksize2) { - char buf[MAXLEN_PATH]; - sprintf(buf, "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, - blocknr, blksize1, blksize2); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH, + "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, + blocknr, blksize1, blksize2); if(blocknr == 2) /* block number 2 is always IDs */ strcat(buf, "Possible mismatch of 32bit and 64bit ID's in IC file and GADGET compilation !\n"); Terminate(buf); @@ -2246,29 +2250,29 @@ void IO_Def::read_single_file_segment(const char *basename, int filenr, int type hid_t hdf5_file = 0, hdf5_grp = 0, hdf5_dataspace_in_file; hid_t hdf5_dataspace_in_memory, hdf5_dataset; FILE *fd = 0; - char fname[MAXLEN_PATH]; + char fname[MAXLEN_PATH_EXTRA]; if(num_files > 1) { if(file_format == FILEFORMAT_HDF5) #ifdef ALT_NAMING - sprintf(fname, "%s.%d%s", basename, filenr, HDF5_EXT); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s.%d%s", basename, filenr, HDF5_EXT); #else - sprintf(fname, "%s.%d.hdf5", basename, filenr); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s.%d.hdf5", basename, filenr); #endif else - sprintf(fname, "%s.%d", basename, filenr); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s.%d", basename, filenr); } else { if(file_format == FILEFORMAT_HDF5) #ifdef ALT_NAMING - sprintf(fname, "%s%s", basename, HDF5_EXT); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s", basename, HDF5_EXT); #else - sprintf(fname, "%s.hdf5", basename); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s.hdf5", basename); #endif else - sprintf(fname, "%s", basename); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s", basename); } /* open file */ @@ -2356,9 +2360,10 @@ void IO_Def::read_single_file_segment(const char *basename, int filenr, int type my_fread(&blksize2, sizeof(int), 1, fd); if(blksize1 != blksize2) { - char buf[MAXLEN_PATH]; - sprintf(buf, "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, - blocknr, blksize1, blksize2); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, + "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, blocknr, + blksize1, blksize2); if(blocknr == 2) /* block number 2 is always IDs */ strcat(buf, "Possible mismatch of 32bit and 64bit ID's in IC file and GADGET compilation !\n"); Terminate(buf); @@ -2426,9 +2431,10 @@ void IO_Def::read_single_file_segment(const char *basename, int filenr, int type my_fread(&blksize2, sizeof(int), 1, fd); if(blksize1 != blksize2) { - char buf[MAXLEN_PATH]; - sprintf(buf, "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, - blocknr, blksize1, blksize2); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, + "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, blocknr, + blksize1, blksize2); if(blocknr == 2) /* block number 2 is always IDs */ strcat(buf, "Possible mismatch of 32bit and 64bit ID's in IC file and GADGET compilation !\n"); Terminate(buf); @@ -2454,18 +2460,19 @@ void IO_Def::read_single_file_segment(const char *basename, int filenr, int type void IO_Def::rename_file_to_bak_if_it_exists(char *fname) { - char fin[MAXLEN_PATH], buf[2 * MAXLEN_PATH]; - - strcpy(fin, fname); - + char fin[MAXLEN_PATH], buf[MAXLEN_PATH_EXTRA]; + + strncpy(fin, fname, MAXLEN_PATH); + fin[MAXLEN_PATH - 1] = 0; + char *p = strrchr(fin, '/'); if(p) { *p = 0; - sprintf(buf, "%s/bak-%s", fin, p + 1); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/bak-%s", fin, p + 1); } else - sprintf(buf, "bak-%s", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "bak-%s", fname); if(FILE *fcheck = fopen(fname, "r")) // check if file already exists, if yes, try to rename the existing file { @@ -2485,26 +2492,26 @@ void IO_Def::alloc_and_read_ntype_in_files(const char *fname, int num_files) for(int filenr = 0; filenr < num_files; filenr++) { - char buf[3 * MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; if(num_files > 1) { - sprintf(buf, "%s.%d", fname, filenr); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d", fname, filenr); if(file_format == 3) #ifdef ALT_NAMING - sprintf(buf, "%s.%d%s", fname, filenr, HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d%s", fname, filenr, HDF5_EXT); #else - sprintf(buf, "%s.%d.hdf5", fname, filenr); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.%d.hdf5", fname, filenr); #endif } else { - sprintf(buf, "%s", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s", fname); if(file_format == 3) #ifdef ALT_NAMING - sprintf(buf, "%s%s", fname, HDF5_EXT); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", fname, HDF5_EXT); #else - sprintf(buf, "%s.hdf5", fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s.hdf5", fname); #endif } diff --git a/src/io/io.h b/src/io/io.h index 0390074958d2be931c49a4f161721e0e3daac653..749c889e539a23bd2ac7566a80953de44c08ad29 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -12,8 +12,9 @@ #ifndef IO_H #define IO_H -#include <hdf5.h> +#include "gadgetconfig.h" +#include <hdf5.h> #ifdef LIGHTCONE_PARTICLES #include <chealpix.h> #endif @@ -174,7 +175,7 @@ class IO_Def : public io_streamcount, public setcomm void *header_buf; long long *ntype_in_files; - char info[100]; + char info[MAXLEN_PATH]; #if defined(MERGERTREE) typedef fof<simparticles>::treehalo_t treehalo_type; diff --git a/src/io/io_streamcount.h b/src/io/io_streamcount.h index 4448cff9760cb4bc809874ec0337e96760b1c576..aa85428e61896611eaede58f4d67645880fcd9e1 100644 --- a/src/io/io_streamcount.h +++ b/src/io/io_streamcount.h @@ -12,7 +12,12 @@ #ifndef IO_STREAMCOUNT_H #define IO_STREAMCOUNT_H +#include "gadgetconfig.h" + #include <errno.h> +#include <string.h> + +#include "../data/macros.h" class io_streamcount { diff --git a/src/io/parameters.cc b/src/io/parameters.cc index 35f667b8e856a174753a13cb3ff1d46ea20b2b82..0ccb722e089437264cc9b800e1b7c2a68145a03e 100644 --- a/src/io/parameters.cc +++ b/src/io/parameters.cc @@ -57,7 +57,8 @@ void parameters::add_param(const char *name, void *buf, int type, int flag) int parameters::read_parameter_file(const char *fname) { FILE *fd, *fdout; - char buf[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200]; + int bufsize = MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200; + char buf[bufsize]; int param_handled[MAX_PARAMETERS]; int errorFlag = 0; @@ -83,7 +84,7 @@ int parameters::read_parameter_file(const char *fname) { if((fd = fopen(fname, "r"))) { - sprintf(buf, "%s%s", fname, "-usedvalues"); + snprintf(buf, bufsize, "%s%s", fname, "-usedvalues"); if(!(fdout = fopen(buf, "w"))) { printf("error opening file '%s' \n", buf); @@ -95,7 +96,10 @@ int parameters::read_parameter_file(const char *fname) int cnt = 0; while(!feof(fd)) { - char buf1[MAXLEN_PARAM_TAG + 200], buf2[MAXLEN_PARAM_VALUE + 200], buf3[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 400]; + int bufsize1 = MAXLEN_PARAM_TAG + 200; + int bufsize2 = MAXLEN_PARAM_VALUE + 200; + int bufsize3 = MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 400; + char buf1[bufsize1], buf2[bufsize2], buf3[bufsize3]; *buf = 0; fgets(buf, MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200, fd); @@ -129,7 +133,7 @@ int parameters::read_parameter_file(const char *fname) { case PARAM_DOUBLE: *((double *)ParametersValue[j]) = atof(buf2); - sprintf(buf3, "%%-%ds%%g\n", MAXLEN_PARAM_TAG); + snprintf(buf3, bufsize3, "%%-%ds%%g\n", MAXLEN_PARAM_TAG); fprintf(fdout, buf3, buf1, *((double *)ParametersValue[j])); fprintf(stdout, " "); fprintf(stdout, buf3, buf1, *((double *)ParametersValue[j])); @@ -143,14 +147,14 @@ int parameters::read_parameter_file(const char *fname) Terminate("no environment variable OUTPUT_DIR found"); } strcpy((char *)ParametersValue[j], buf2); - sprintf(buf3, "%%-%ds%%s\n", MAXLEN_PARAM_TAG); + snprintf(buf3, bufsize3, "%%-%ds%%s\n", MAXLEN_PARAM_TAG); fprintf(fdout, buf3, buf1, buf2); fprintf(stdout, " "); fprintf(stdout, buf3, buf1, buf2); break; case PARAM_INT: *((int *)ParametersValue[j]) = atoi(buf2); - sprintf(buf3, "%%-%ds%%d\n", MAXLEN_PARAM_TAG); + snprintf(buf3, bufsize3, "%%-%ds%%d\n", MAXLEN_PARAM_TAG); fprintf(fdout, buf3, buf1, *((int *)ParametersValue[j])); fprintf(stdout, " "); fprintf(stdout, buf3, buf1, *((int *)ParametersValue[j])); @@ -198,7 +202,7 @@ void parameters::write_used_parameters(const char *dirname, const char *fname) { mkdir(dirname, 02755); char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s%s", dirname, fname); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", dirname, fname); FILE *fdout = fopen(buf, "w"); if(!fdout) Terminate("Can't open file '%s'", buf); @@ -209,20 +213,21 @@ void parameters::write_used_parameters(const char *dirname, const char *fname) if(j >= 0) { - char buf3[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 400]; + int bufsize3 = MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 400; + char buf3[bufsize3]; switch(ParametersType[j]) { case PARAM_DOUBLE: - sprintf(buf3, "%%-%ds%%g\n", MAXLEN_PARAM_TAG); + snprintf(buf3, bufsize3, "%%-%ds%%g\n", MAXLEN_PARAM_TAG); fprintf(fdout, buf3, ParametersTag[j], *((double *)ParametersValue[j])); break; case PARAM_STRING: - sprintf(buf3, "%%-%ds%%s\n", MAXLEN_PARAM_TAG); + snprintf(buf3, bufsize3, "%%-%ds%%s\n", MAXLEN_PARAM_TAG); fprintf(fdout, buf3, ParametersTag[j], (char *)ParametersValue[j]); break; case PARAM_INT: - sprintf(buf3, "%%-%ds%%d\n", MAXLEN_PARAM_TAG); + snprintf(buf3, bufsize3, "%%-%ds%%d\n", MAXLEN_PARAM_TAG); fprintf(fdout, buf3, ParametersTag[j], *((int *)ParametersValue[j])); break; } diff --git a/src/io/parameters.h b/src/io/parameters.h index 734c6aaa06c6d59d24f30972b11385526c220916..252e4a1ea0fdfa4d015355c706dd353cb6b7be64 100644 --- a/src/io/parameters.h +++ b/src/io/parameters.h @@ -12,6 +12,8 @@ #ifndef PARAMETERS_H #define PARAMETERS_H +#include "gadgetconfig.h" + #include "../data/dtypes.h" #include "../mpi_utils/setcomm.h" diff --git a/src/io/restart.cc b/src/io/restart.cc index af956a28f49ab69289c4d1185be56c44a432e6a0..d4bf75fc5536aa88e377decbe0854a67f24b3856 100644 --- a/src/io/restart.cc +++ b/src/io/restart.cc @@ -9,7 +9,11 @@ * \brief handles the reading/writing of restart files */ +// clang-format off #include "gadgetconfig.h" +// clang-format on + +#include "../io/restart.h" #include <gsl/gsl_rng.h> #include <math.h> @@ -24,7 +28,6 @@ #include "../data/mymalloc.h" #include "../domain/domain.h" #include "../io/io.h" -#include "../io/restart.h" #include "../lightcone/lightcone.h" #include "../logs/logs.h" #include "../logs/timer.h" @@ -133,8 +136,8 @@ void restart::backup_restartfiles(int task) mpi_printf("RESTART: Backing up restart files...\n"); - sprintf(buf, "%s/restartfiles/%s.%d", All.OutputDir, "restart", task); - sprintf(buf_bak, "%s/restartfiles/bak-%s.%d", All.OutputDir, "restart", task); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/restartfiles/%s.%d", All.OutputDir, "restart", task); + snprintf(buf_bak, MAXLEN_PATH_EXTRA, "%s/restartfiles/bak-%s.%d", All.OutputDir, "restart", task); if((fcheck = fopen(buf, "r"))) { @@ -191,7 +194,7 @@ void restart::do_restart(int modus) if(ThisTask == 0 && modus == MODUS_WRITE) { char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s/restartfiles", All.OutputDir); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/restartfiles", All.OutputDir); mkdir(buf, 02755); } MPI_Barrier(Communicator); @@ -368,7 +371,7 @@ void restart::work_files(int modus) void restart::contents_restart_file(int modus) { char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s/restartfiles/%s.%d", All.OutputDir, "restart", ThisTask); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/restartfiles/%s.%d", All.OutputDir, "restart", ThisTask); if(modus == MODUS_READ) { @@ -434,8 +437,6 @@ void restart::contents_restart_file(int modus) in(&Sim->Lp.NumPart, modus); byten(&Sim->Lp.P[0], Sim->Lp.NumPart * sizeof(lightcone_particle_data), modus); - - in(&Sim->LightCone.NumLastCheck, modus); #endif /* lightcone massmap data */ @@ -516,7 +517,7 @@ void restart::contents_restart_file(int modus) if(Sim->NgbTree.MaxPart != 0) { Sim->NgbTree.Points = (ngbpoint_data *)Mem.mymalloc_movable(&Sim->NgbTree.Points, "Points", - Sim->NgbTree.NumPartImported * sizeof(ngbpoint_data)); + Sim->NgbTree.NumPartImported * sizeof(ngbpoint_data)); Sim->NgbTree.Nextnode = (int *)Mem.mymalloc_movable( &Sim->NgbTree.Nextnode, "Nextnode", (Sim->NgbTree.MaxPart + Sim->Domain.NTopleaves + Sim->NgbTree.NumPartImported) * sizeof(int)); @@ -587,26 +588,44 @@ void restart::readjust_timebase(double TimeMax_old, double TimeMax_new) All.PM_Ti_endstep /= 2; #endif +#ifdef FORCE_EQUAL_TIMESTEPS + GlobalTimeStep /= 2; +#endif + + for(int n = 0; n < TIMEBINS; n++) + All.Ti_begstep[n] /= 2; + + All.Ti_nextoutput /= 2; + All.Ti_lastoutput /= 2; + for(int i = 0; i < Sim->Sp.NumPart; i++) { + Sim->Sp.P[i].Ti_Current = Sim->Sp.P[i].Ti_Current / 2; + if(Sim->Sp.P[i].TimeBinGrav > 0) { - Sim->Sp.P[i].Ti_Current = Sim->Sp.P[i].Ti_Current / 2; + int oldbin = Sim->Sp.P[i].TimeBinGrav; + int newbin = oldbin - 1; - Sim->Sp.P[i].TimeBinGrav--; - - if(Sim->Sp.P[i].TimeBinGrav <= 0) + if(newbin <= 0) Terminate("Error in readjust_timebase(). Minimum Timebin for particle %d reached.\n", i); + + Sim->Sp.TimeBinsGravity.timebin_move_particle(i, oldbin, newbin); + Sim->Sp.P[i].TimeBinGrav = newbin; } if(Sim->Sp.P[i].getType() == 0) { if(Sim->Sp.P[i].getTimeBinHydro() > 0) { - Sim->Sp.P[i].setTimeBinHydro(Sim->Sp.P[i].getTimeBinHydro() - 1); + int oldbin = Sim->Sp.P[i].getTimeBinHydro(); + int newbin = oldbin - 1; - if(Sim->Sp.P[i].getTimeBinHydro() <= 0) + if(newbin <= 0) Terminate("Error in readjust_timebase(). Minimum Timebin (hydro) for sph particle %d reached.\n", i); + + Sim->Sp.TimeBinsHydro.timebin_move_particle(i, oldbin, newbin); + Sim->Sp.P[i].setTimeBinHydro(newbin); } } } diff --git a/src/io/restart.h b/src/io/restart.h index 4824a6de258624b51c7c5fb550b0f2a53ae96ed3..da2d44761526ff8381f24a3afd1e5d3efa7a0688 100644 --- a/src/io/restart.h +++ b/src/io/restart.h @@ -17,6 +17,8 @@ #define BLKSIZE (1024 * 1024) +#include "gadgetconfig.h" + #include "../io/io_streamcount.h" #include "../main/simulation.h" diff --git a/src/io/snap_io.cc b/src/io/snap_io.cc index c1222d4c3bc26c78bb737cc5cf8b55f987f1eb19..43d352b1ee9e5c62343ae1b179759cdb74f450d0 100644 --- a/src/io/snap_io.cc +++ b/src/io/snap_io.cc @@ -56,7 +56,7 @@ void snap_io::init_basic(simparticles *Sp_ptr) this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_SNAPSHOT; - sprintf(this->info, "SNAPSHOT: writing snapshot"); + snprintf(this->info, MAXLEN_PATH, "SNAPSHOT: writing snapshot"); #ifdef OUTPUT_COORDINATES_AS_INTEGERS init_field("IPOS", "IntCoordinates", MEM_MY_INTPOS_TYPE, FILE_MY_INTPOS_TYPE, READ_IF_PRESENT, 3, A_P, NULL, io_func_intpos, @@ -74,24 +74,6 @@ void snap_io::init_basic(simparticles *Sp_ptr) 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); #endif -#ifdef OUTPUT_ACCELERATION -#ifdef OUTPUT_ACCELERATIONS_IN_HALF_PRECISION - All.accel_normalize_fac = 10.0 * All.Hubble * (100.0 * 1.0e5 / All.UnitVelocity_in_cm_per_s); - - init_field("ACCE", "Acceleration", MEM_MY_FLOAT, FILE_HALF, SKIP_ON_READ, 3, A_NONE, 0, io_func_accel, ALL_TYPES, 1, -2.0, 1, -1, 0, - 2, All.accel_normalize_fac * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); -#else - All.accel_normalize_fac = 1.0; - - init_field("ACCE", "Acceleration", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, SKIP_ON_READ, 3, A_NONE, 0, io_func_accel, ALL_TYPES, 1, -2.0, 1, - -1, 0, 2, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); -#endif - - /* hydro acceleration */ - init_field("HACC", "HydroAcceleration", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, READ_IF_PRESENT, 3, A_SPHP, &Sp->SphP[0].HydroAccel, 0, - GAS_ONLY, 0, 0, 0, 0, 0, 0, 0); -#endif - init_field("ID ", "ParticleIDs", MEM_MY_ID_TYPE, FILE_MY_ID_TYPE, READ_IF_PRESENT, 1, A_P, NULL, io_func_id, ALL_TYPES, 0, 0, 0, 0, 0, 0, 0, true); @@ -114,6 +96,24 @@ void snap_io::init_basic(simparticles *Sp_ptr) init_field("HSML", "SmoothingLength", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, READ_IF_PRESENT, 1, A_SPHP, &Sp->SphP[0].Hsml, NULL, GAS_ONLY, 1, 1., -1., 1., 0., 0., All.UnitLength_in_cm); +#ifdef OUTPUT_ACCELERATION +#ifdef OUTPUT_ACCELERATIONS_IN_HALF_PRECISION + All.accel_normalize_fac = 10.0 * All.Hubble * (100.0 * 1.0e5 / All.UnitVelocity_in_cm_per_s); + + init_field("ACCE", "Acceleration", MEM_MY_FLOAT, FILE_HALF, SKIP_ON_READ, 3, A_NONE, 0, io_func_accel, ALL_TYPES, 1, -2.0, 1, -1, 0, + 2, All.accel_normalize_fac * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#else + All.accel_normalize_fac = 1.0; + + init_field("ACCE", "Acceleration", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, SKIP_ON_READ, 3, A_NONE, 0, io_func_accel, ALL_TYPES, 1, -2.0, 1, + -1, 0, 2, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif + + /* hydro acceleration */ + init_field("HACC", "HydroAcceleration", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, READ_IF_PRESENT, 3, A_SPHP, &Sp->SphP[0].HydroAccel, 0, + GAS_ONLY, 0, 0, 0, 0, 0, 0, 0); +#endif + #ifdef STARFORMATION init_field("SFR ", "StarFormationRate", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, All.RestartFlag == RST_FOF ? READ_IF_PRESENT : SKIP_ON_READ, @@ -266,16 +266,16 @@ void snap_io::read_snapshot(int num, mysnaptype loc_snap_type) if(snap_type == MOST_BOUND_PARTICLE_SNAPHOT) { if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%03d/%s-prevmostboundonly_%03d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s-prevmostboundonly_%03d", All.OutputDir, num, All.SnapshotFileBase, num); else - sprintf(buf, "%s%s-prevmostboundonly_%03d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s-prevmostboundonly_%03d", All.OutputDir, All.SnapshotFileBase, num); } else { if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%03d/%s_%03d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s_%03d", All.OutputDir, num, All.SnapshotFileBase, num); else - sprintf(buf, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); } #endif @@ -456,6 +456,17 @@ void snap_io::snap_init_domain_mapping(void) Sp->FacCoordToInt = pow(2.0, BITS_FOR_POSITIONS) / Sp->RegionLen; Sp->FacIntToCoord = Sp->RegionLen / pow(2.0, BITS_FOR_POSITIONS); +#if defined(NGENIC) && !defined(CREATE_GRID) + if(All.RestartFlag == RST_BEGIN || All.RestartFlag == RST_CREATEICS) + { + // Make sure that the velocities are zero when a glass file is fed to IC creation + mpi_printf("READIC: Setting velocities in glass file to zero.\n"); + for(int i = 0; i < Sp->NumPart; i++) + for(int k = 0; k < 3; k++) + Sp->P[i].Vel[k] = 0; + } +#endif + #else double posmin[3], posmax[3]; @@ -615,9 +626,9 @@ void snap_io::write_snapshot(int num, mysnaptype loc_snap_type) { char buf[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(buf, "%s/snapdir_%06d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%06d", All.OutputDir, num); #else - sprintf(buf, "%s/snapdir_%03d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d", All.OutputDir, num); #endif mkdir(buf, 02755); } @@ -627,42 +638,44 @@ void snap_io::write_snapshot(int num, mysnaptype loc_snap_type) char buf[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%06d/%s_%06d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%06d/%s_%06d", All.OutputDir, num, All.SnapshotFileBase, num); else - sprintf(buf, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, num); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%03d/%s_%03d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s_%03d", All.OutputDir, num, All.SnapshotFileBase, num); else - sprintf(buf, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); #endif if(snap_type == MOST_BOUND_PARTICLE_SNAPHOT) { #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%06d/%s-prevmostboundonly_%06d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%06d/%s-prevmostboundonly_%06d", All.OutputDir, num, All.SnapshotFileBase, num); else - sprintf(buf, "%s%s-prevmostboundonly_%06d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s-prevmostboundonly_%06d", All.OutputDir, All.SnapshotFileBase, num); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%03d/%s-prevmostboundonly_%03d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s-prevmostboundonly_%03d", All.OutputDir, num, All.SnapshotFileBase, num); else - sprintf(buf, "%s%s-prevmostboundonly_%03d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s-prevmostboundonly_%03d", All.OutputDir, All.SnapshotFileBase, num); #endif } else if(snap_type == MOST_BOUND_PARTICLE_SNAPHOT_REORDERED) { #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%06d/%s-prevmostboundonly-treeorder_%06d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%06d/%s-prevmostboundonly-treeorder_%06d", All.OutputDir, num, + All.SnapshotFileBase, num); else - sprintf(buf, "%s%s-prevmostboundonly-treeorder_%06d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s-prevmostboundonly-treeorder_%06d", All.OutputDir, All.SnapshotFileBase, num); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/snapdir_%03d/%s-prevmostboundonly-treeorder_%03d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s-prevmostboundonly-treeorder_%03d", All.OutputDir, num, + All.SnapshotFileBase, num); else - sprintf(buf, "%s%s-prevmostboundonly-treeorder_%03d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s-prevmostboundonly-treeorder_%03d", All.OutputDir, All.SnapshotFileBase, num); #endif } @@ -731,7 +744,7 @@ void snap_io::fill_file_header(int writeTask, int lastTask, long long *n_type, l header.npartTotal[n] = ntot_type_all[n]; } -#ifdef MERGERTREE +#if defined(MERGERTREE) && !defined(GADGET2_HEADER) if(snap_type == MOST_BOUND_PARTICLE_SNAPHOT_REORDERED) { header.Ntrees = ntot_type[NTYPES]; @@ -902,7 +915,7 @@ void snap_io::read_file_header(const char *fname, int filenr, int readTask, int nall += n_for_this_task; } -#ifdef MERGERTREE +#if defined(MERGERTREE) && !defined(GADGET2_HEADER) if(snap_type == MOST_BOUND_PARTICLE_SNAPHOT_REORDERED) ntot_type[NTYPES] = header.Ntrees; #endif @@ -988,7 +1001,7 @@ void snap_io::read_header_fields(const char *fname) read_vector_attribute(handle, "NumPart_Total", header.npartTotal, H5T_NATIVE_UINT64, ntypes); -#ifdef MERGERTREE +#if defined(MERGERTREE) && !defined(GADGET2_HEADER) if(snap_type == MOST_BOUND_PARTICLE_SNAPHOT_REORDERED) { read_scalar_attribute(handle, "Ntrees_ThisFile", &header.Ntrees, H5T_NATIVE_UINT64); @@ -1002,6 +1015,10 @@ void snap_io::read_header_fields(const char *fname) read_scalar_attribute(handle, "BoxSize", &header.BoxSize, H5T_NATIVE_DOUBLE); read_scalar_attribute(handle, "NumFilesPerSnapshot", &header.num_files, H5T_NATIVE_INT); +#if defined(GADGET2_HEADER) && defined(SECOND_ORDER_LPT_ICS) + read_scalar_attribute(handle, "LptScalingfactor", &header.lpt_scalingfactor, H5T_NATIVE_FLOAT); +#endif + my_H5Gclose(handle, "/Header"); my_H5Fclose(hdf5_file, fname); } @@ -1022,9 +1039,9 @@ void snap_io::read_increase_numbers(int type, int n_for_this_task) void snap_io::get_datagroup_name(int type, char *buf) { if(type < NTYPES) - sprintf(buf, "/PartType%d", type); + snprintf(buf, MAXLEN_PATH, "/PartType%d", type); else if(type == NTYPES) - sprintf(buf, "/TreeTable"); + snprintf(buf, MAXLEN_PATH, "/TreeTable"); else Terminate("wrong group"); } diff --git a/src/io/snap_io.h b/src/io/snap_io.h index ca9014108aee31d73fd90c2fd6439991f864e562..e56c91fe1943fed9295e17d7cc8d63daa26095d8 100644 --- a/src/io/snap_io.h +++ b/src/io/snap_io.h @@ -171,6 +171,18 @@ class snap_io : public IO_Def for(int k = 0; k < 3; k++) thisobj->Ptmp[particle].Pos[k] = in_buffer[k]; +#if defined(NGENIC) && !defined(CREATE_GRID) // This is meant to become active when a glass file is used for IC creation + if(All.RestartFlag == RST_BEGIN || All.RestartFlag == RST_CREATEICS) + { + double fac = All.BoxSize / thisobj->header.BoxSize; +#ifdef TILING + fac /= TILING; +#endif + for(int k = 0; k < 3; k++) + thisobj->Ptmp[particle].Pos[k] *= fac; // scale the glass file to the right size + } +#endif + #ifdef SQUASH_TEST thisobj->Ptmp[particle].Pos[1] *= 1.0 / 4; thisobj->Ptmp[particle].Pos[2] *= 1.0 / 16; @@ -212,7 +224,8 @@ class snap_io : public IO_Def out_buffer[k] = thisobj->Sp->P[particle].Vel[k]; /* we are using p = a^2 * xdot internally as velocity unit. Convert to legacy Gadget velocity units */ - out_buffer[k] *= sqrt(All.cf_a3inv); + if(All.RestartFlag != RST_CONVERTSNAP) + out_buffer[k] *= sqrt(All.cf_a3inv); } } else diff --git a/src/io/test_io_bandwidth.cc b/src/io/test_io_bandwidth.cc index e940a8b536ba76005c6e8ca8d1a74a840362a0c8..17908458699cc50b07d81c819ce00b024cb02ed9 100644 --- a/src/io/test_io_bandwidth.cc +++ b/src/io/test_io_bandwidth.cc @@ -39,7 +39,7 @@ void test_io_bandwidth::measure_io_bandwidth(void) if(ThisTask == 0) { char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s/testdata", All.OutputDir); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/testdata", All.OutputDir); mkdir(buf, 02755); } MPI_Barrier(Communicator); @@ -82,7 +82,7 @@ void test_io_bandwidth::write_test_data(void) /* now delete test data */ char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s/testdata/%s.%d", All.OutputDir, "testdata", ThisTask); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/testdata/%s.%d", All.OutputDir, "testdata", ThisTask); unlink(buf); MPI_Barrier(Communicator); } @@ -172,7 +172,7 @@ void test_io_bandwidth::work_files(int modus) void test_io_bandwidth::contents_restart_file(int modus) { char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s/testdata/%s.%d", All.OutputDir, "testdata", ThisTask); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/testdata/%s.%d", All.OutputDir, "testdata", ThisTask); if(modus == MODUS_READ) { diff --git a/src/io/test_io_bandwidth.h b/src/io/test_io_bandwidth.h index 8b9eee7f7833b096b4d8180ad3446bc5955ef6f8..f9ec7e7e6636b5ab2449ce8719cf4d8e9ce24b20 100644 --- a/src/io/test_io_bandwidth.h +++ b/src/io/test_io_bandwidth.h @@ -12,6 +12,8 @@ #ifndef TEST_IO_BANDWIDTH_H #define TEST_IO_BANDWIDTH_H +#include "gadgetconfig.h" + #include <mpi.h> #define MODUS_WRITE 0 diff --git a/src/lightcone/lightcone.cc b/src/lightcone/lightcone.cc index 9cc3183341836d8e850dd95de28f319a05a60ef3..2068f9ee736b5b63c3473884c3c9f251f760947d 100644 --- a/src/lightcone/lightcone.cc +++ b/src/lightcone/lightcone.cc @@ -98,7 +98,7 @@ int lightcone::lightcone_add_position_massmaps(particle_data *P, double *pos, do #endif #ifdef LIGHTCONE_PARTICLES -void lightcone::lightcone_add_position_particles(particle_data *P, double *pos, double ascale) +void lightcone::lightcone_add_position_particles(particle_data *P, double *pos, double ascale, int oindex) { if(Lp->NumPart >= Lp->MaxPart) { @@ -142,6 +142,10 @@ void lightcone::lightcone_add_position_particles(particle_data *P, double *pos, #if defined(LIGHTCONE_PARTICLES_GROUPS) && defined(FOF) Lp->P[q].setFlagSaveDistance(); #endif + +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + Lp->P[q].OriginIndex = oindex; +#endif } #endif @@ -191,105 +195,117 @@ int lightcone::lightcone_test_for_particle_addition(particle_data *P, integertim bool previously = P->ID.is_previously_most_bound(); #endif - NumLastCheck = 0; +#ifndef LIGHTCONE_MULTIPLE_ORIGINS + int oindex = 0; +#else + for(int oindex = 0; oindex < NlightconeOrigins; oindex++) +#endif + { + for(int n = 0; n < BoxOrigin[oindex].NumBoxes; n++) + { + if(R0 < BoxOrigin[oindex].BoxList[n].Rmin) + continue; - for(int n = 0; n < NumBoxes; n++) - { - if(R0 < BoxList[n].Rmin) - continue; + if(R1prime > BoxOrigin[oindex].BoxList[n].Rmax) + break; - if(R1prime > BoxList[n].Rmax) - break; + int i = BoxOrigin[oindex].BoxList[n].i; + int j = BoxOrigin[oindex].BoxList[n].j; + int k = BoxOrigin[oindex].BoxList[n].k; - NumLastCheck++; + double PosA[3]; - int i = BoxList[n].i; - int j = BoxList[n].j; - int k = BoxList[n].k; + PosA[0] = pos[0] + i * All.BoxSize; + PosA[1] = pos[1] + j * All.BoxSize; + PosA[2] = pos[2] + k * All.BoxSize; - double PosA[3]; +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + PosA[0] -= ConeOrigins[oindex].PosOrigin[0]; + PosA[1] -= ConeOrigins[oindex].PosOrigin[1]; + PosA[2] -= ConeOrigins[oindex].PosOrigin[2]; +#endif - PosA[0] = pos[0] + i * All.BoxSize; - PosA[1] = pos[1] + j * All.BoxSize; - PosA[2] = pos[2] + k * All.BoxSize; + double rA2 = PosA[0] * PosA[0] + PosA[1] * PosA[1] + PosA[2] * PosA[2]; - double rA2 = PosA[0] * PosA[0] + PosA[1] * PosA[1] + PosA[2] * PosA[2]; + if(rA2 < R0_squared) + { + double PosB[3]; + double diffBminusA[3]; - if(rA2 < R0_squared) - { - double PosB[3]; - double diffBminusA[3]; + for(int q = 0; q < 3; q++) + { + diffBminusA[q] = P->Vel[q] * dt_drift * Sp->FacIntToCoord; + PosB[q] = PosA[q] + diffBminusA[q]; + } - for(int q = 0; q < 3; q++) - { - diffBminusA[q] = P->Vel[q] * dt_drift * Sp->FacIntToCoord; - PosB[q] = PosA[q] + diffBminusA[q]; - } + double rB2 = PosB[0] * PosB[0] + PosB[1] * PosB[1] + PosB[2] * PosB[2]; - double rB2 = PosB[0] * PosB[0] + PosB[1] * PosB[1] + PosB[2] * PosB[2]; + if(rB2 > R1_squared) + { + /* ok, particle crossed the lightcone. Interpolate the coordinate of the crossing */ - if(rB2 > R1_squared) - { - /* ok, particle crossed the lightcone. Interpolate the coordinate of the crossing */ + double dr2 = diffBminusA[0] * diffBminusA[0] + diffBminusA[1] * diffBminusA[1] + diffBminusA[2] * diffBminusA[2]; - double dr2 = diffBminusA[0] * diffBminusA[0] + diffBminusA[1] * diffBminusA[1] + diffBminusA[2] * diffBminusA[2]; + double a = pow(R1 - R0, 2) - dr2; + double b = 2 * R0 * (R1 - R0) - 2 * (PosA[0] * diffBminusA[0] + PosA[1] * diffBminusA[1] + PosA[2] * diffBminusA[2]); + double c = R0 * R0 - rA2; - double a = pow(R1 - R0, 2) - dr2; - double b = 2 * R0 * (R1 - R0) - 2 * (PosA[0] * diffBminusA[0] + PosA[1] * diffBminusA[1] + PosA[2] * diffBminusA[2]); - double c = R0 * R0 - rA2; + double det = b * b - 4 * a * c; - double det = b * b - 4 * a * c; - - if(det < 0) - Terminate( - "det=%g R0=%g R1=%g rA=%g rB=%g dr=%g dt_drift=%g dx=(%g|%g|%g) vel=(%g|%g|%g) " - "posA=(%g|%g|%g) posB=(%g|%g|%g)\n", - det, R0, R1, sqrt(rA2), sqrt(rB2), sqrt(dr2), dt_drift, P->Vel[0] * dt_drift * Sp->FacIntToCoord, - P->Vel[1] * dt_drift * Sp->FacIntToCoord, P->Vel[2] * dt_drift * Sp->FacIntToCoord, P->Vel[0], P->Vel[1], - P->Vel[2], PosA[0], PosA[1], PosA[2], PosB[0], PosB[1], PosB[2]); + if(det < 0) + Terminate( + "det=%g R0=%g R1=%g rA=%g rB=%g dr=%g dt_drift=%g dx=(%g|%g|%g) vel=(%g|%g|%g) " + "posA=(%g|%g|%g) posB=(%g|%g|%g)\n", + det, R0, R1, sqrt(rA2), sqrt(rB2), sqrt(dr2), dt_drift, P->Vel[0] * dt_drift * Sp->FacIntToCoord, + P->Vel[1] * dt_drift * Sp->FacIntToCoord, P->Vel[2] * dt_drift * Sp->FacIntToCoord, P->Vel[0], P->Vel[1], + P->Vel[2], PosA[0], PosA[1], PosA[2], PosB[0], PosB[1], PosB[2]); - double fac = (-b - sqrt(det)) / (2 * a); + double fac = (-b - sqrt(det)) / (2 * a); - vector<double> Pos; + vector<double> Pos; - for(int q = 0; q < 3; q++) - Pos[q] = PosA[q] + fac * diffBminusA[q]; + for(int q = 0; q < 3; q++) + Pos[q] = PosA[q] + fac * diffBminusA[q]; - double ascale = All.TimeBegin * exp((time0 + (time1 - time0) * fac) * All.Timebase_interval); + double ascale = All.TimeBegin * exp((time0 + (time1 - time0) * fac) * All.Timebase_interval); - /* now we can add particle at position Pos[] to the lightcone, provided it fits into the angular mask */ + /* now we can add particle at position Pos[] to the lightcone, provided it fits into the angular mask */ - if(fac < 0 || fac > 1) - { - warn( - "ascale=%g fac=%g fac-1%g R0=%g R1=%g rA=%g rB=%g dr=%g dt_drift=%g dx=(%g|%g|%g) vel=(%g|%g|%g) " - "posA=(%g|%g|%g) posB=(%g|%g|%g)\n", - ascale, fac, fac - 1, R0, R1, sqrt(rA2), sqrt(rB2), sqrt(dr2), dt_drift, - P->Vel[0] * dt_drift * Sp->FacIntToCoord, P->Vel[1] * dt_drift * Sp->FacIntToCoord, - P->Vel[2] * dt_drift * Sp->FacIntToCoord, P->Vel[0], P->Vel[1], P->Vel[2], PosA[0], PosA[1], PosA[2], PosB[0], - PosB[1], PosB[2]); - } - else - { + if(fac < 0 || fac > 1) + { + warn( + "ascale=%g fac=%g fac-1%g R0=%g R1=%g rA=%g rB=%g dr=%g dt_drift=%g dx=(%g|%g|%g) vel=(%g|%g|%g) " + "posA=(%g|%g|%g) posB=(%g|%g|%g)\n", + ascale, fac, fac - 1, R0, R1, sqrt(rA2), sqrt(rB2), sqrt(dr2), dt_drift, + P->Vel[0] * dt_drift * Sp->FacIntToCoord, P->Vel[1] * dt_drift * Sp->FacIntToCoord, + P->Vel[2] * dt_drift * Sp->FacIntToCoord, P->Vel[0], P->Vel[1], P->Vel[2], PosA[0], PosA[1], PosA[2], PosB[0], + PosB[1], PosB[2]); + } + else + { #ifdef LIGHTCONE_PARTICLES - if(ascale >= ConeGlobAstart && ascale < ConeGlobAend) - for(int cone = 0; cone < Nlightcones; cone++) - if(lightcone_is_cone_member_basic(ascale, Pos, previously, cone)) - { - /* we only add the particle once if it is at least contained in one of the cones */ - lightcone_add_position_particles(P, Pos.da, ascale); - break; - } + if(ascale >= ConeGlobAstart && ascale < ConeGlobAend) + for(int cone = 0; cone < Nlightcones; cone++) +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + if(oindex == Cones[cone].OriginIndex) +#endif + if(lightcone_is_cone_member_basic(ascale, Pos, previously, cone)) + { + /* we only add the particle once if it is at least contained in one of the cones */ + lightcone_add_position_particles(P, Pos.da, ascale, oindex); + break; + } #endif #ifdef LIGHTCONE_MASSMAPS - if(ascale >= MassMapBoundariesAscale[0] && ascale < MassMapBoundariesAscale[NumMassMapBoundaries - 1]) - buffer_full_flag |= lightcone_add_position_massmaps(P, Pos.da, ascale); + if(ascale >= MassMapBoundariesAscale[0] && ascale < MassMapBoundariesAscale[NumMassMapBoundaries - 1]) + buffer_full_flag |= lightcone_add_position_massmaps(P, Pos.da, ascale); #endif - } - } - } - } + } + } + } + } + } return buffer_full_flag; } @@ -303,6 +319,11 @@ bool lightcone::lightcone_is_cone_member(int i, int cone) return false; #endif +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + if(Lp->P[i].OriginIndex != Cones[cone].OriginIndex) + return false; +#endif + vector<double> pos; if(i >= Lp->NumPart) @@ -310,6 +331,13 @@ bool lightcone::lightcone_is_cone_member(int i, int cone) Lp->signedintpos_to_pos((MySignedIntPosType *)Lp->P[i].IntPos, pos.da); +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + int oindex = Cones[cone].OriginIndex; + pos[0] -= ConeOrigins[oindex].PosOrigin[0]; + pos[1] -= ConeOrigins[oindex].PosOrigin[1]; + pos[2] -= ConeOrigins[oindex].PosOrigin[2]; +#endif + return lightcone_is_cone_member_basic(Lp->P[i].Ascale, pos, Lp->P[i].ID.is_previously_most_bound(), cone); } @@ -394,6 +422,67 @@ void lightcone::lightcone_init_geometry(char *fname) if(ThisTask == 0) { +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + + for(int iter = 0; iter < 2; iter++) + { + NlightconeOrigins = 0; + FILE *fd; + + if(!(fd = fopen(All.LightConeOriginsFile, "r"))) + Terminate("LIGHTCONE_MULTIPLE_ORIGINS: cannot read lightcone origins from file `%s'\n", All.LightConeOriginsFile); + + if(iter == 0) + { + while(1) + { + double dummy; + if(fscanf(fd, "%lg %lg %lg", &dummy, &dummy, &dummy) != 3) + break; + + NlightconeOrigins++; + if(NlightconeOrigins > LIGHTCONE_MAX_NUMBER_ORIGINS) + Terminate("LIGHTCONE_MULTIPLE_ORIGINS: Too many entries in file %s (maximum number of origins set to %d)", + All.LightConeOriginsFile, LIGHTCONE_MAX_NUMBER_ORIGINS); + } + + if(NlightconeOrigins == 0) + Terminate("LIGHTCONE_MULTIPLE_ORIGINS: No entry in file %s", All.LightConeOriginsFile); + + ConeOrigins = (cone_origin *)Mem.mymalloc("ConeOrigins", (NlightconeOrigins + 1) * sizeof(cone_origin)); + + mpi_printf("LIGHTCONE_MULTIPLE_ORIGINS: read specification for %d origins from file `%s'.\n", NlightconeOrigins, + All.LightConeOriginsFile); + } + else + { + while(1) + { + if(fscanf(fd, "%lg %lg %lg", &ConeOrigins[NlightconeOrigins].PosOrigin[0], + &ConeOrigins[NlightconeOrigins].PosOrigin[1], &ConeOrigins[NlightconeOrigins].PosOrigin[2]) != 3) + break; + + NlightconeOrigins++; + }; + } + + fclose(fd); + } + + for(int n = 0; n < NlightconeOrigins; n++) + { + for(int i = 0; i < 0; i++) + { + while(ConeOrigins[n].PosOrigin[i] < 0) + ConeOrigins[n].PosOrigin[i] += All.BoxSize; + while(ConeOrigins[n].PosOrigin[i] >= All.BoxSize) + ConeOrigins[n].PosOrigin[i] -= All.BoxSize; + } + mpi_printf("LIGHTCONE_MULTIPLE_ORIGINS: Origin #%03d: %10g %10g %10g\n", n, ConeOrigins[n].PosOrigin[0], + ConeOrigins[n].PosOrigin[1], ConeOrigins[n].PosOrigin[2]); + } +#endif + for(int iter = 0; iter < 2; iter++) { Nlightcones = 0; @@ -445,6 +534,12 @@ void lightcone::lightcone_init_geometry(char *fname) break; } +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + int lc_origin; + if(fscanf(fd, "%d", &lc_origin) != 1) + Terminate("LIGHTCONE_PARTICLES: can't read origin identifier in file '%s'", fname); +#endif + Nlightcones++; } @@ -467,14 +562,14 @@ void lightcone::lightcone_init_geometry(char *fname) case LC_TYPE_FULLSKY: fscanf(fd, "%d %lg %lg", &Cones[Nlightcones].OnlyMostBoundFlag, &Cones[Nlightcones].Astart, &Cones[Nlightcones].Aend); - sprintf(Cones[Nlightcones].Tag, "Full-sky"); + snprintf(Cones[Nlightcones].Tag, MAXLEN_PATH, "Full-sky"); break; case LC_TYPE_OCTANT: fscanf(fd, "%d %lg %lg", &Cones[Nlightcones].OnlyMostBoundFlag, &Cones[Nlightcones].Astart, &Cones[Nlightcones].Aend); fscanf(fd, "%d", &Cones[Nlightcones].OctantNr); - sprintf(Cones[Nlightcones].Tag, "Octant"); + snprintf(Cones[Nlightcones].Tag, MAXLEN_PATH, "Octant"); break; case LC_TYPE_PENCIL: @@ -490,7 +585,7 @@ void lightcone::lightcone_init_geometry(char *fname) /* convert to rad */ Cones[Nlightcones].PencilAngleRad = Cones[Nlightcones].PencilAngle * M_PI / 180.0; - sprintf(Cones[Nlightcones].Tag, "Pencil-Beam"); + snprintf(Cones[Nlightcones].Tag, MAXLEN_PATH, "Pencil-Beam"); break; case LC_TYPE_DISK: @@ -502,7 +597,7 @@ void lightcone::lightcone_init_geometry(char *fname) /* normalize the normal vector in case it is not normalized yet */ Cones[Nlightcones].DiskNormal *= 1.0 / Cones[Nlightcones].DiskNormal.norm(); - sprintf(Cones[Nlightcones].Tag, "Disk (for image)"); + snprintf(Cones[Nlightcones].Tag, MAXLEN_PATH, "Disk (for image)"); break; case LC_TYPE_SQUAREMAP: @@ -537,22 +632,36 @@ void lightcone::lightcone_init_geometry(char *fname) mpi_printf("LIGHTCONE_SQUAREMAP: cone=%2d z-axis = %15g %15g %15g\n", Nlightcones, Cones[Nlightcones].SquareMapZdir[0], Cones[Nlightcones].SquareMapZdir[1], Cones[Nlightcones].SquareMapZdir[2]); - sprintf(Cones[Nlightcones].Tag, "Square-map"); + snprintf(Cones[Nlightcones].Tag, MAXLEN_PATH, "Square-map"); break; default: Terminate("odd"); } +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + fscanf(fd, "%d", &Cones[Nlightcones].OriginIndex); + + if(Cones[Nlightcones].OriginIndex < 0 || Cones[Nlightcones].OriginIndex >= NlightconeOrigins) + Terminate("lightcone origin '%d' out of range, we have only %d origins", Cones[Nlightcones].OriginIndex, + NlightconeOrigins); +#endif Nlightcones++; } } fclose(fd); } +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + for(int i = 0; i < Nlightcones; i++) + mpi_printf("LIGHTCONE_PARTICLES: lightcone #%2d: %18s %20s Astart=%10g Aend=%10g Origin: (%8g|%8g|%8g)\n", i, Cones[i].Tag, + Cones[i].OnlyMostBoundFlag ? "(only most bound)" : "(all particles)", Cones[i].Astart, Cones[i].Aend, + ConeOrigins[Cones[i].OriginIndex].PosOrigin[0], ConeOrigins[Cones[i].OriginIndex].PosOrigin[1], + ConeOrigins[Cones[i].OriginIndex].PosOrigin[2]); +#else for(int i = 0; i < Nlightcones; i++) mpi_printf("LIGHTCONE_PARTICLES: lightcone #%2d: %18s %20s Astart=%10g Aend=%10g\n", i, Cones[i].Tag, Cones[i].OnlyMostBoundFlag ? "(only most bound)" : "(all particles)", Cones[i].Astart, Cones[i].Aend); - +#endif mpi_printf("\n"); } @@ -564,6 +673,15 @@ void lightcone::lightcone_init_geometry(char *fname) Cones = (cone_data *)Mem.mymalloc("Cones", Nlightcones * sizeof(cone_data)); MPI_Bcast(Cones, Nlightcones * sizeof(cone_data), MPI_BYTE, 0, Communicator); + +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + MPI_Bcast(&NlightconeOrigins, 1, MPI_INT, 0, Communicator); + + if(ThisTask != 0) + ConeOrigins = (cone_origin *)Mem.mymalloc("ConeOrigins", (NlightconeOrigins + 1) * sizeof(cone_origin)); + + MPI_Bcast(ConeOrigins, NlightconeOrigins * sizeof(cone_origin), MPI_BYTE, 0, Communicator); +#endif } int lightcone::lightcone_init_times(void) @@ -598,63 +716,86 @@ int lightcone::lightcone_init_times(void) ConeGlobComDistStart = Driftfac.get_comoving_distance(ConeGlobTime_start); ConeGlobComDistEnd = Driftfac.get_comoving_distance(ConeGlobTime_end); - int n = ceil(ConeGlobComDistStart / All.BoxSize + 1); + double fac = (4 * M_PI / 3.0) * pow(ConeGlobComDistStart, 3) / pow(All.BoxSize, 3); - for(int rep = 0; rep < 2; rep++) - { - if(rep == 1) - // BoxList = (boxlist *) Mem.mymalloc_movable(&BoxList, "BoxList", NumBoxes * sizeof(boxlist)); - BoxList = Mem.alloc_movable<boxlist> MMM(BoxList, NumBoxes); + mpi_printf( + "LIGHTCONE_PARTICLES: scale_factor: %10g to %10g comoving distance: %10g to %10g covered volume in units of box " + "volume=%g\n", + ConeGlobAstart, ConeGlobAend, ConeGlobComDistStart, ConeGlobComDistEnd, fac); - NumBoxes = 0; + return 0; +} - for(int i = -n; i <= n; i++) - for(int j = -n; j <= n; j++) - for(int k = -n; k <= n; k++) - { - double corner[3]; +#endif - corner[0] = i * All.BoxSize; - corner[1] = j * All.BoxSize; - corner[2] = k * All.BoxSize; +int lightcone::lightcone_init_boxlist(void) +{ + double max_GlobComDistStart = 0; - double Rmin, Rmax; +#ifdef LIGHTCONE_PARTICLES + if(ConeGlobComDistStart > max_GlobComDistStart) + max_GlobComDistStart = ConeGlobComDistStart; +#endif - if(lightcone_box_at_corner_overlaps_at_least_with_one_cone(corner, Rmin, Rmax)) - { - if(rep == 1) - { - BoxList[NumBoxes].i = i; - BoxList[NumBoxes].j = j; - BoxList[NumBoxes].k = k; - BoxList[NumBoxes].Rmin = Rmin; - BoxList[NumBoxes].Rmax = Rmax; - } +#ifdef LIGHTCONE_MASSMAPS + if(MassMapBoundariesComDist[0] > max_GlobComDistStart) + max_GlobComDistStart = MassMapBoundariesComDist[0]; +#endif - NumBoxes++; - } - } - } + int n = ceil(max_GlobComDistStart / All.BoxSize + 1); - mycxxsort(BoxList, BoxList + NumBoxes, lightcone_compare_BoxList_Rmax); +#ifndef LIGHTCONE_MULTIPLE_ORIGINS + int oindex = 0; +#else + for(int oindex = 0; oindex < NlightconeOrigins; oindex++) +#endif + { + for(int rep = 0; rep < 2; rep++) + { + if(rep == 1) + BoxOrigin[oindex].BoxList = (boxlist *)Mem.mymalloc_movable(&BoxOrigin[oindex].BoxList, "BoxOrigin[oindex].BoxList", + BoxOrigin[oindex].NumBoxes * sizeof(boxlist)); + + BoxOrigin[oindex].NumBoxes = 0; + + for(int i = -n; i <= n; i++) + for(int j = -n; j <= n; j++) + for(int k = -n; k <= n; k++) + { + double corner[3]; + + corner[0] = i * All.BoxSize; + corner[1] = j * All.BoxSize; + corner[2] = k * All.BoxSize; + + double Rmin, Rmax; + + if(lightcone_box_at_corner_overlaps_at_least_with_one_cone(corner, Rmin, Rmax, oindex)) + { + int num = BoxOrigin[oindex].NumBoxes++; + + if(rep == 1) + { + BoxOrigin[oindex].BoxList[num].i = i; + BoxOrigin[oindex].BoxList[num].j = j; + BoxOrigin[oindex].BoxList[num].k = k; + BoxOrigin[oindex].BoxList[num].Rmin = Rmin; + BoxOrigin[oindex].BoxList[num].Rmax = Rmax; + } + } + } + } + mycxxsort(BoxOrigin[oindex].BoxList, BoxOrigin[oindex].BoxList + BoxOrigin[oindex].NumBoxes, lightcone_compare_BoxList_Rmax); + } lightcone_clear_boxlist(All.Time); - NumLastCheck = 0; + mpi_printf("LIGHTCONE: Number of box replicas to check for first origin lightcone geometry settings = %d\n", BoxOrigin[0].NumBoxes); - double fac = (4 * M_PI / 3.0) * pow(ConeGlobComDistStart, 3) / pow(All.BoxSize, 3); - - mpi_printf( - "LIGHTCONE_PARTICLES: scale_factor: %10g to %10g comoving distance: %10g to %10g covered volume in units of box " - "volume=%g\n", - ConeGlobAstart, ConeGlobAend, ConeGlobComDistStart, ConeGlobComDistEnd, fac); - - mpi_printf("LIGHTCONE_PARTICLES: number of box replicas to check for this lightcone geometry settings = %d\n", NumBoxes); - - if(NumBoxes > LIGHTCONE_MAX_BOXREPLICAS) + if(BoxOrigin[0].NumBoxes > LIGHTCONE_MAX_BOXREPLICAS) { mpi_printf( - "\nLIGHTCONE_PARTICLES: Your lightcone extends to such high redshift that the box needs to be replicated a huge number of " + "\nLIGHTCONE: Your lightcone extends to such high redshift that the box needs to be replicated a huge number of " "times to cover it,\n" "more than the prescribed limit of LIGHTCONE_MAX_BOXREPLICAS=%d. We better don't do such an inefficient run, unless you " "override this constant.\n", @@ -671,46 +812,59 @@ void lightcone::lightcone_clear_boxlist(double ascale) double dist = Driftfac.get_comoving_distance(time_start); - int count = 0; - - for(int i = 0; i < NumBoxes; i++) - { - if(dist < BoxList[i].Rmin) - { - BoxList[i] = BoxList[--NumBoxes]; - i--; - count++; - } - } - - if(count) - { - mpi_printf("LIGHTCONE: Eliminated %d entries from BoxList\n", count); - mycxxsort(BoxList, BoxList + NumBoxes, lightcone_compare_BoxList_Rmax); - } +#ifndef LIGHTCONE_MULTIPLE_ORIGINS + int oindex = 0; +#else + for(int oindex = 0; oindex < NlightconeOrigins; oindex++) +#endif + { + int count = 0; + + for(int i = 0; i < BoxOrigin[oindex].NumBoxes; i++) + { + if(dist < BoxOrigin[oindex].BoxList[i].Rmin) + { + BoxOrigin[oindex].BoxList[i] = BoxOrigin[oindex].BoxList[--BoxOrigin[oindex].NumBoxes]; + i--; + count++; + } + } + + if(count) + { + mpi_printf("LIGHTCONE: Eliminated %d entries from BoxList\n", count); + mycxxsort(BoxOrigin[oindex].BoxList, BoxOrigin[oindex].BoxList + BoxOrigin[oindex].NumBoxes, lightcone_compare_BoxList_Rmax); + } + } } -bool lightcone::lightcone_box_at_corner_overlaps_at_least_with_one_cone(double *corner, double &Rmin, double &Rmax) +bool lightcone::lightcone_box_at_corner_overlaps_at_least_with_one_cone(double *corner, double &Rmin, double &Rmax, int oindex) { - Rmin = MAX_DOUBLE_NUMBER; + Rmin = 0; Rmax = 0; - for(int ii = 0; ii <= 1; ii++) - for(int jj = 0; jj <= 1; jj++) - for(int kk = 0; kk <= 1; kk++) - { - double crn[3]; - crn[0] = corner[0] + ii * All.BoxSize; - crn[1] = corner[1] + jj * All.BoxSize; - crn[2] = corner[2] + kk * All.BoxSize; - - double r = sqrt(crn[0] * crn[0] + crn[1] * crn[1] + crn[2] * crn[2]); - if(Rmin > r) - Rmin = r; - if(Rmax < r) - Rmax = r; - } + for(int i = 0; i < 3; i++) + { + double left = corner[i]; +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + left -= ConeOrigins[oindex].PosOrigin[i]; +#endif + double right = left + All.BoxSize; + + double dx_min = std::min<double>(fabs(left), fabs(right)); + double dx_max = std::max<double>(fabs(left), fabs(right)); + + if(left * right <= 0) + dx_min = 0; + + Rmin += dx_min * dx_min; + Rmax += dx_max * dx_max; + } + + Rmin = sqrt(Rmin); + Rmax = sqrt(Rmax); +#ifdef LIGHTCONE_PARTICLES for(int cone = 0; cone < Nlightcones; cone++) { if(Rmin < Cones[cone].ComDistStart && Rmax > Cones[cone].ComDistEnd) @@ -741,6 +895,11 @@ bool lightcone::lightcone_box_at_corner_overlaps_at_least_with_one_cone(double * crn[1] = corner[1] + jj * All.BoxSize; crn[2] = corner[2] + kk * All.BoxSize; +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + crn[0] -= ConeOrigins[oindex].PosOrigin[0]; + crn[1] -= ConeOrigins[oindex].PosOrigin[1]; + crn[2] -= ConeOrigins[oindex].PosOrigin[2]; +#endif double dist = crn[0] * Cones[cone].DiskNormal[0] + crn[1] * Cones[cone].DiskNormal[1] + crn[2] * Cones[cone].DiskNormal[2]; @@ -761,12 +920,16 @@ bool lightcone::lightcone_box_at_corner_overlaps_at_least_with_one_cone(double * } } } +#endif + +#ifdef LIGHTCONE_MASSMAPS + if(Rmin < MassMapBoundariesComDist[0]) + return true; +#endif return false; } -#endif - #ifdef LIGHTCONE_MASSMAPS void lightcone::lightcone_init_massmaps(void) @@ -933,7 +1096,7 @@ void lightcone::lightcone_massmap_binning(void) Send_count[target]++; } - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; int nexport = 0, nimport = 0; @@ -980,9 +1143,10 @@ void lightcone::lightcone_massmap_binning(void) if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&send_P[Send_offset[recvTask]], Send_count[recvTask] * sizeof(lightcone_massmap_data), MPI_BYTE, recvTask, - TAG_DENS_A, &Mp->P[Mp->NumPart + Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(lightcone_massmap_data), - MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&send_P[Send_offset[recvTask]], Send_count[recvTask] * sizeof(lightcone_massmap_data), MPI_BYTE, recvTask, + TAG_DENS_A, &Mp->P[Mp->NumPart + Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(lightcone_massmap_data), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, + MPI_STATUS_IGNORE); } Mp->NumPart += nimport; diff --git a/src/lightcone/lightcone.h b/src/lightcone/lightcone.h index 96bd9615096583d0222cac171908da3b162258c5..367948abbac77cb4b405dca6873681596edb96e7 100644 --- a/src/lightcone/lightcone.h +++ b/src/lightcone/lightcone.h @@ -37,6 +37,12 @@ #define LIGHTCONE_ORDER_NSIDE 256 #endif +#ifdef LIGHTCONE_MULTIPLE_ORIGINS +#define LIGHTCONE_MAX_NUMBER_ORIGINS 32 +#else +#define LIGHTCONE_MAX_NUMBER_ORIGINS 1 +#endif + #define LC_TYPE_FULLSKY 0 #define LC_TYPE_OCTANT 1 #define LC_TYPE_PENCIL 2 @@ -98,6 +104,16 @@ class lightcone : public parameters #ifdef LIGHTCONE_PARTICLES int Nlightcones; +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + int NlightconeOrigins; + + struct cone_origin + { + double PosOrigin[3]; + }; + cone_origin *ConeOrigins; +#endif + struct cone_data { double Astart; @@ -129,6 +145,10 @@ class lightcone : public parameters double SquareMapAngleRad; char Tag[100]; + +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + int OriginIndex; +#endif }; cone_data *Cones; @@ -149,17 +169,23 @@ class lightcone : public parameters double Rmin; /* minimum comoving distance of this box */ double Rmax; /* minimum comoving distance of this box */ }; - boxlist *BoxList; - int NumBoxes; - int NumLastCheck; + + struct boxorigin + { + boxlist *BoxList; + int NumBoxes; + }; + + boxorigin BoxOrigin[LIGHTCONE_MAX_NUMBER_ORIGINS]; void lightcone_init_geometry(char *fname); - void lightcone_add_position_particles(particle_data *P, double *pos, double ascale); + void lightcone_add_position_particles(particle_data *P, double *pos, double ascale, int oindex); int lightcone_init_times(void); + int lightcone_init_boxlist(void); bool lightcone_is_cone_member(int i, int cone); bool lightcone_is_cone_member_basic(double ascale, vector<double> &pos, bool previously, int cone); - bool lightcone_box_at_corner_overlaps_at_least_with_one_cone(double *corner, double &rmin, double &rmax); + bool lightcone_box_at_corner_overlaps_at_least_with_one_cone(double *corner, double &rmin, double &rmax, int oindex); void lightcone_clear_boxlist(double ascale); static bool lightcone_compare_BoxList_Rmax(const boxlist &a, const boxlist &b) diff --git a/src/lightcone/lightcone_massmap_io.cc b/src/lightcone/lightcone_massmap_io.cc index 6a2fb749cb86ce3907f36d75b79d74e556a44e7a..b3f4020495bc144d205b1d17f01579d2754e7ff3 100644 --- a/src/lightcone/lightcone_massmap_io.cc +++ b/src/lightcone/lightcone_massmap_io.cc @@ -53,7 +53,7 @@ lightcone_massmap_io::lightcone_massmap_io(mmparticles *Mp_ptr, lightcone *Light this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_MASSMAP; - sprintf(this->info, "LIGHTCONE: writing mass map data"); + snprintf(this->info, MAXLEN_PATH, "LIGHTCONE: writing mass map data"); init_field("MAMP", "Mass", MEM_DOUBLE, FILE_MY_IO_FLOAT, SKIP_ON_READ, 1, A_MM, &LightCone->MassMap[0], NULL, MASSMAPS, 1, 0., -1., 0., 1., 0., All.UnitMass_in_g, true); @@ -61,7 +61,7 @@ lightcone_massmap_io::lightcone_massmap_io(mmparticles *Mp_ptr, lightcone *Light void lightcone_massmap_io::lightcone_massmap_save(int num) { - char buf[2 * MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; selected_bnd = num; @@ -74,11 +74,11 @@ void lightcone_massmap_io::lightcone_massmap_save(int num) { if(ThisTask == 0) { - char buf[2 * MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(buf, "%s/mapsdir_%06d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/mapsdir_%06d", All.OutputDir, num); #else - sprintf(buf, "%s/mapsdir_%03d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/mapsdir_%03d", All.OutputDir, num); #endif mkdir(buf, 02755); } @@ -87,14 +87,14 @@ void lightcone_massmap_io::lightcone_massmap_save(int num) #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/mapsdir_%06d/%s_%06d", All.OutputDir, num, "maps", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/mapsdir_%06d/%s_%06d", All.OutputDir, num, "maps", num); else - sprintf(buf, "%s/%s_%06d", All.OutputDir, "maps", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%06d", All.OutputDir, "maps", num); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/mapsdir_%03d/%s_%03d", All.OutputDir, num, "maps", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/mapsdir_%03d/%s_%03d", All.OutputDir, num, "maps", num); else - sprintf(buf, "%s/%s_%03d", All.OutputDir, "maps", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%03d", All.OutputDir, "maps", num); #endif write_multiple_files(buf, All.NumFilesPerSnapshot); @@ -168,7 +168,7 @@ void lightcone_massmap_io::set_filenr_in_header(int numfiles) { header.num_files void lightcone_massmap_io::get_datagroup_name(int type, char *buf) { if(type == 0) - sprintf(buf, "/Maps"); + snprintf(buf, MAXLEN_PATH, "/Maps"); else Terminate("should not get here"); } diff --git a/src/lightcone/lightcone_particle_io.cc b/src/lightcone/lightcone_particle_io.cc index b9b0e4df90773dad9bb649954bc6f0ad80202958..146cad69af9bf1fb87141d0e3b733725c3e6f1f8 100644 --- a/src/lightcone/lightcone_particle_io.cc +++ b/src/lightcone/lightcone_particle_io.cc @@ -64,7 +64,7 @@ lightcone_particle_io::lightcone_particle_io(lcparticles *Lp_ptr, lightcone *Lig this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_LIGHTCONE; - sprintf(this->info, "LIGHTCONE: writing particle lightcone data"); + snprintf(this->info, MAXLEN_PATH, "LIGHTCONE: writing particle lightcone data"); init_field("POS ", "Coordinates", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, READ_IF_PRESENT, 3, A_LC, NULL, io_func_pos, ALL_TYPES, 1, 1., -1., 1., 0., 0., All.UnitLength_in_cm, true); @@ -138,12 +138,12 @@ void lightcone_particle_io::lightcone_read(int num, int conenr) Lp->TotNumPart = 0; - char fname[2 * MAXLEN_PATH]; + char fname[MAXLEN_PATH_EXTRA]; if(All.NumFilesPerSnapshot > 1) - sprintf(fname, "%s/lightcone_%02d/conedir_%04d/%s_%04d", All.OutputDir, conenr, num, "conesnap", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s/lightcone_%02d/conedir_%04d/%s_%04d", All.OutputDir, conenr, num, "conesnap", num); else - sprintf(fname, "%s/lightcone_%02d/%s_%04d", All.OutputDir, conenr, "conesnap", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s/lightcone_%02d/%s_%04d", All.OutputDir, conenr, "conesnap", num); int num_files = find_files(fname, fname); @@ -178,7 +178,7 @@ void lightcone_particle_io::lightcone_read(int num, int conenr) void lightcone_particle_io::lightcone_save(int num, int conenr, bool reordered_flag) { - char buf[3 * MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; cone = conenr; /* note: cone is here a variable of the class, NOT a local variable */ reorder_flag = reordered_flag; @@ -228,33 +228,33 @@ void lightcone_particle_io::lightcone_save(int num, int conenr, bool reordered_f char lname[MAXLEN_PATH]; if(reordered_flag) - sprintf(lname, "lightcone_treeorder"); + snprintf(lname, MAXLEN_PATH, "lightcone_treeorder"); else - sprintf(lname, "lightcone"); + snprintf(lname, MAXLEN_PATH, "lightcone"); + + if(ThisTask == 0) + { + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%02d", All.OutputDir, lname, cone); + mkdir(buf, 02755); + } + MPI_Barrier(Communicator); if(All.NumFilesPerSnapshot > 1) { if(ThisTask == 0) { - char buf[3 * MAXLEN_PATH]; - sprintf(buf, "%s/%s_%02d", All.OutputDir, lname, cone); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%02d/conedir_%04d", All.OutputDir, lname, cone, num); mkdir(buf, 02755); } MPI_Barrier(Communicator); } - if(ThisTask == 0) - { - char buf[3 * MAXLEN_PATH]; - sprintf(buf, "%s/%s_%02d/conedir_%04d", All.OutputDir, lname, cone, num); - mkdir(buf, 02755); - } - MPI_Barrier(Communicator); - if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/%s_%02d/conedir_%04d/%s_%04d", All.OutputDir, lname, cone, num, "conesnap", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%02d/conedir_%04d/%s_%04d", All.OutputDir, lname, cone, num, "conesnap", num); else - sprintf(buf, "%s/%s_%02d/%s_%04d", All.OutputDir, lname, cone, "conesnap", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/%s_%02d/%s_%04d", All.OutputDir, lname, cone, "conesnap", num); write_multiple_files(buf, All.NumFilesPerSnapshot); @@ -336,6 +336,12 @@ void lightcone_particle_io::fill_file_header(int writeTask, int lastTask, long l } header.num_files = All.NumFilesPerSnapshot; + +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + int oindex = LightCone->Cones[cone].OriginIndex; + for(int n = 0; n < 3; n++) + header.Origin[n] = LightCone->ConeOrigins[oindex].PosOrigin[n]; +#endif } void lightcone_particle_io::write_header_fields(hid_t handle) @@ -355,6 +361,10 @@ void lightcone_particle_io::write_header_fields(hid_t handle) write_scalar_attribute(handle, "Npix_ThisFile", &header.Npix, H5T_NATIVE_UINT32); write_scalar_attribute(handle, "Npix_Total", &header.TotNpix, H5T_NATIVE_UINT32); } + +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + write_vector_attribute(handle, "Origin", header.Origin, H5T_NATIVE_DOUBLE, 3); +#endif } void lightcone_particle_io::set_filenr_in_header(int numfiles) { header.num_files = numfiles; } @@ -362,11 +372,11 @@ void lightcone_particle_io::set_filenr_in_header(int numfiles) { header.num_file void lightcone_particle_io::get_datagroup_name(int type, char *buf) { if(type < NTYPES) - sprintf(buf, "/PartType%d", type); + snprintf(buf, MAXLEN_PATH, "/PartType%d", type); else if(type == NTYPES) - sprintf(buf, "/TreeTable"); + snprintf(buf, MAXLEN_PATH, "/TreeTable"); else if(type == NTYPES + 1) - sprintf(buf, "/HealPixHashTable"); + snprintf(buf, MAXLEN_PATH, "/HealPixHashTable"); else Terminate("wrong group"); } diff --git a/src/lightcone/lightcone_particle_io.h b/src/lightcone/lightcone_particle_io.h index aa84a6eeaa02fe1ac21c645b55e704789144b0c4..363fcb044de2efcbe0057a6598ffb52be42f751d 100644 --- a/src/lightcone/lightcone_particle_io.h +++ b/src/lightcone/lightcone_particle_io.h @@ -62,6 +62,10 @@ class lightcone_particle_io : public IO_Def int TotNpix; int num_files; + +#ifdef LIGHTCONE_MULTIPLE_ORIGINS + double Origin[3]; +#endif }; io_header header; /**< holds header for snapshot files */ diff --git a/src/logs/logs.cc b/src/logs/logs.cc index f9604ef0ea19a868d12521225e396bc7afc13f2f..a6a7754394c92f89491f984a4a2f45a0636e090d 100644 --- a/src/logs/logs.cc +++ b/src/logs/logs.cc @@ -44,54 +44,54 @@ void logs::open_logfiles(void) if(ThisTask != 0) /* only the root processors writes to the log files */ return; - sprintf(buf, "%s%s", All.OutputDir, "cpu.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "cpu.txt"); if(!(FdCPU = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "cpu.csv"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "cpu.csv"); if(!(FdCPUCSV = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "info.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "info.txt"); if(!(FdInfo = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "energy.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "energy.txt"); if(!(FdEnergy = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "timings.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "timings.txt"); if(!(FdTimings = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "density.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "density.txt"); if(!(FdDensity = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "hydro.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "hydro.txt"); if(!(FdHydro = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "balance.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "balance.txt"); if(!(FdBalance = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "timebins.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "timebins.txt"); if(!(FdTimebin = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); - sprintf(buf, "%s%s", All.OutputDir, "domain.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "domain.txt"); if(!(FdDomain = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); #ifdef MEASURE_TOTAL_MOMENTUM - sprintf(buf, "%s%s", All.OutputDir, "momentum.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "momentum.txt"); if(!(FdMomentum = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); #endif #ifdef FORCETEST - sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "forcetest.txt"); if(!(FdForceTest = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); @@ -99,7 +99,7 @@ void logs::open_logfiles(void) #endif #ifdef DEBUG_MD5 - sprintf(buf, "%s%s", All.OutputDir, "debug_md5.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "debug_md5.txt"); if(!(FdDebug = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); #endif @@ -120,7 +120,7 @@ void logs::open_logfiles(void) fprintf(FdCPUCSV, "\n"); #ifdef STARFORMATION - sprintf(buf, "%s%s", All.OutputDir, "sfr.txt"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "sfr.txt"); if(!(FdSfr = fopen(buf, mode))) Terminate("error in opening file '%s'\n", buf); #endif @@ -222,10 +222,11 @@ void logs::output_log_messages(void) for(int i = 0; i < TIMEBINS; i++) { double sum = 0; - for(int j = 0; j < All.CPU_TimeBinCountMeasurements[i]; j++) - sum += All.CPU_TimeBinMeasurements[i][j]; + if(tot_count_sph[i] > 0 || tot_count_grav[i] > 0) + for(int j = 0; j < All.CPU_TimeBinCountMeasurements[i]; j++) + sum += All.CPU_TimeBinMeasurements[i][j]; - if(All.CPU_TimeBinCountMeasurements[i]) + if(All.CPU_TimeBinCountMeasurements[i] && (tot_count_sph[i] > 0 || tot_count_grav[i] > 0)) avg_CPU_TimeBin[i] = sum / All.CPU_TimeBinCountMeasurements[i]; else avg_CPU_TimeBin[i] = 0; @@ -235,7 +236,7 @@ void logs::output_log_messages(void) double sum = 0; double frac_CPU_TimeBin[TIMEBINS]; - for(int i = All.HighestOccupiedTimeBin; i >= 0 && tot_count_grav[i] > 0; i--, weight *= 2) + for(int i = All.HighestOccupiedTimeBin; i >= 0; i--, weight *= 2) { int corr_weight; @@ -248,7 +249,7 @@ void logs::output_log_messages(void) sum += frac_CPU_TimeBin[i]; } - for(int i = All.HighestOccupiedTimeBin; i >= 0 && tot_count_grav[i] > 0; i--) + for(int i = All.HighestOccupiedTimeBin; i >= 0; i--) { if(sum) frac_CPU_TimeBin[i] /= sum; diff --git a/src/logs/logs.h b/src/logs/logs.h index 680a40a643a334fc45904c6d48bad49ab4e59de9..13c9389af0e6a3f77cb9263817260f681c5645c0 100644 --- a/src/logs/logs.h +++ b/src/logs/logs.h @@ -12,6 +12,8 @@ #ifndef LOGS_H #define LOGS_H +#include "gadgetconfig.h" + #include <stdio.h> #include "../main/simulation.h" diff --git a/src/main/begrun.cc b/src/main/begrun.cc index 486d581aba761a37bc274d0a05a06609f6b05c25..ca49c4334af0d957e19d65a6f4ac07fb5e73b70e 100644 --- a/src/main/begrun.cc +++ b/src/main/begrun.cc @@ -9,8 +9,8 @@ * \brief initial set-up of a simulation run */ -#include "compiler-command-line-args.h" #include "gadgetconfig.h" +#include "compiler-command-line-args.h" #include <hdf5.h> #include <math.h> @@ -262,7 +262,7 @@ void sim::begrun1(const char *parameterFile) void sim::begrun2(void) { char contfname[MAXLEN_PATH_EXTRA]; - sprintf(contfname, "%scont", All.OutputDir); + snprintf(contfname, MAXLEN_PATH_EXTRA, "%scont", All.OutputDir); unlink(contfname); if(All.RestartFlag != RST_BEGIN && All.RestartFlag != RST_RESUME && All.RestartFlag != RST_STARTFROMSNAP) @@ -295,6 +295,9 @@ void sim::begrun2(void) if(LightCone.lightcone_massmap_report_boundaries()) endrun(); #endif + if(LightCone.lightcone_init_boxlist()) + endrun(); + double linklength = 0; #ifdef FOF @@ -350,6 +353,16 @@ void sim::set_units(void) if(All.ComovingIntegrationOn) { +#if defined(RADIATION) && !defined(SMOOTHMATTER) + All.OmegaCurvature = 1.0 - (All.Omega0 + All.OmegaLambda + All.OmegaR); +#elif !defined(RADIATION) && defined(SMOOTHMATTER) + All.OmegaCurvature = 1.0 - (All.Omega0 + All.OmegaLambda + All.OmegaSmooth); +#elif defined(RADIATION) && defined(SMOOTHMATTER) + All.OmegaCurvature = 1.0 - (All.Omega0 + All.OmegaLambda + All.OmegaSmooth + All.OmegaR); +#else + All.OmegaCurvature = 1.0 - (All.Omega0 + All.OmegaLambda); +#endif + /* check whether the supplied value of All.Hubble makes sense */ if(All.HubbleParam != 1.0) { diff --git a/src/main/init.cc b/src/main/init.cc index 0a026f660b92e6208d2462876d38e1f3522e835e..1b4d4e2485d3cf54b6433a447a5cb1933ef93507 100644 --- a/src/main/init.cc +++ b/src/main/init.cc @@ -9,9 +9,12 @@ * \brief code for initialization of a simulation from initial conditions */ +// clang-format off #include "gadgetconfig.h" +// clang-format on #include <mpi.h> + #include <algorithm> #include <cmath> #include <cstdio> @@ -485,7 +488,6 @@ void sim::init(int RestartSnapNum) #endif #endif - double mass = 0; for(int i = 0; i < Sp.NumGas; i++) { #ifndef INITIAL_CONDITIONS_CONTAIN_ENTROPY @@ -502,8 +504,6 @@ void sim::init(int RestartSnapNum) /* The predicted entropy values have been already set for all SPH formulation */ /* so it should be ok computing pressure and csound now */ Sp.SphP[i].set_thermodynamic_variables(); - - mass += Sp.P[i].getMass(); } if(All.ComovingIntegrationOn) diff --git a/src/main/main.cc b/src/main/main.cc index 42c8fe6dad5ca000266ca8f5bb76bf1b674a46d3..b675d6cfc29127fa0ef8860ee333d82e1a992cba 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -226,6 +226,7 @@ int main(int argc, char **argv) { restart Restart{Sim.Communicator}; Restart.load(&Sim); + All.RestartFlag = RST_RESUME; // prevent that this is overwritten by All.RestartFlag in restart set } else { @@ -236,14 +237,16 @@ int main(int argc, char **argv) { #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(fname, "%s/snapdir_%06d/%s_%06d", All.OutputDir, restartSnapNum, All.SnapshotFileBase, restartSnapNum); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s/snapdir_%06d/%s_%06d", All.OutputDir, restartSnapNum, All.SnapshotFileBase, + restartSnapNum); else - sprintf(fname, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, restartSnapNum); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, restartSnapNum); #else if(All.NumFilesPerSnapshot > 1) - sprintf(fname, "%s/snapdir_%03d/%s_%03d", All.OutputDir, restartSnapNum, All.SnapshotFileBase, restartSnapNum); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s_%03d", All.OutputDir, restartSnapNum, All.SnapshotFileBase, + restartSnapNum); else - sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, restartSnapNum); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, restartSnapNum); #endif } else diff --git a/src/main/run.cc b/src/main/run.cc index ce4c23170a863b8bbc020562f6e42b2ef6367a83..75172e88f769639be29b06496e0b144a769c002f 100644 --- a/src/main/run.cc +++ b/src/main/run.cc @@ -9,7 +9,9 @@ * \brief contains the basic simulation loop that iterates over timesteps */ +// clang-format off #include "gadgetconfig.h" +// clang-format on #include <ctype.h> #include <math.h> @@ -84,8 +86,7 @@ void sim::run(void) #ifdef LIGHTCONE #ifdef LIGHTCONE_PARTICLES - mpi_printf("LIGHTCONE_PARTICLES: Lp.NumPart=%d Checked %d box replicas out of list of length %d\n", Lp.NumPart, - LightCone.NumLastCheck, LightCone.NumBoxes); + mpi_printf("LIGHTCONE_PARTICLES: Lp.NumPart=%d\n", Lp.NumPart); #endif #ifdef LIGHTCONE_MASSMAPS mpi_printf("LIGHTCONE_MASSMAPS: Mp.NumPart=%d \n", Mp.NumPart); @@ -168,7 +169,7 @@ void sim::run(void) Logs.log_debug_md5("AFTER SNAP"); #endif - if(All.Ti_Current >= TIMEBASE) /* did we reached the final time? */ + if(All.Ti_Current >= TIMEBASE || All.Time > All.TimeMax) /* did we reached the final time? */ { mpi_printf("\nFinal time=%g reached. Simulation ends.\n", All.TimeMax); @@ -274,7 +275,7 @@ int sim::check_for_interruption_of_run(void) FILE *fd; char stopfname[MAXLEN_PATH_EXTRA]; - sprintf(stopfname, "%sstop", All.OutputDir); + snprintf(stopfname, MAXLEN_PATH_EXTRA, "%sstop", All.OutputDir); if((fd = fopen(stopfname, "r"))) /* Is the stop-file present? If yes, interrupt the run. */ { fclose(fd); @@ -283,7 +284,7 @@ int sim::check_for_interruption_of_run(void) unlink(stopfname); } - sprintf(stopfname, "%srestart", All.OutputDir); + snprintf(stopfname, MAXLEN_PATH_EXTRA, "%srestart", All.OutputDir); if((fd = fopen(stopfname, "r"))) /* Is the restart-file present? If yes, write a user-requested restart file. */ { fclose(fd); @@ -314,7 +315,7 @@ int sim::check_for_interruption_of_run(void) { FILE *fd; char contfname[MAXLEN_PATH_EXTRA]; - sprintf(contfname, "%scont", All.OutputDir); + snprintf(contfname, MAXLEN_PATH_EXTRA, "%scont", All.OutputDir); if((fd = fopen(contfname, "w"))) fclose(fd); } @@ -703,8 +704,10 @@ void sim::create_snapshot_if_desired(void) mycxxsort_parallel(Lp.P, Lp.P + Lp.NumPart, Lp.compare_ipnest, Communicator); #endif +#if !defined(LIGHTCONE_PARTICLES_SKIP_SAVING) for(int conenr = 0; conenr < LightCone.Nlightcones; conenr++) Lcone.lightcone_save(All.LightconeFileCount, conenr, false); +#endif mpi_printf("LIGHTCONE: done with writing files.\n"); diff --git a/src/mergertree/descendant.cc b/src/mergertree/descendant.cc index b4f79ce7bc357756963e9e6935745c6ea868d968..c7df8acfda64a69e89469068c1189ddf8fb9f21e 100644 --- a/src/mergertree/descendant.cc +++ b/src/mergertree/descendant.cc @@ -528,7 +528,7 @@ void mergertree::mergertree_set_first_progenitor_with_same_descendant(void) if(mode == 0) // prepare offset tables { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = 0; Send_offset[0] = 0; @@ -558,7 +558,7 @@ void mergertree::mergertree_set_first_progenitor_with_same_descendant(void) if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A, + myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } @@ -638,7 +638,7 @@ void mergertree::mergertree_select_maximum_score_progenitors(int nmatch) if(mode == 0) // prepare offset tables { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = 0; Send_offset[0] = 0; @@ -668,7 +668,7 @@ void mergertree::mergertree_select_maximum_score_progenitors(int nmatch) if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask, + myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask, TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } @@ -755,7 +755,7 @@ void mergertree::mergertree_select_maximum_score_descendants(int nmatch) if(mode == 0) // prepare offset tables { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = 0; Send_offset[0] = 0; @@ -785,7 +785,7 @@ void mergertree::mergertree_select_maximum_score_descendants(int nmatch) if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask, + myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask, TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } @@ -885,7 +885,7 @@ void mergertree::mergertree_set_first_descendant_with_same_progenitor(void) if(mode == 0) // prepare offset tables { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = 0; Send_offset[0] = 0; @@ -915,7 +915,7 @@ void mergertree::mergertree_set_first_descendant_with_same_progenitor(void) if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A, + myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } diff --git a/src/mergertree/halotrees.cc b/src/mergertree/halotrees.cc index 52b2d0a03071e736960638d4084e8fad8ac757a9..4b30f4bfa77fcb1c78efdd43ff568aca91f918c8 100644 --- a/src/mergertree/halotrees.cc +++ b/src/mergertree/halotrees.cc @@ -335,7 +335,7 @@ void mergertree::halotrees_assign_global_subhalonr_and_groupnr(void) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -359,9 +359,9 @@ void mergertree::halotrees_assign_global_subhalonr_and_groupnr(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask, - TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE, - recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE, + recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } long long firstgrnr = 0; @@ -389,9 +389,9 @@ void mergertree::halotrees_assign_global_subhalonr_and_groupnr(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask, - TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE, - recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask, + TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE, + recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } /* now read it out and assign the data */ @@ -980,7 +980,7 @@ void mergertree::halotrees_reshuffle(char **ptr, size_t len, int ncurrent, int n MPI_Allgather(&ntarget, 1, MPI_INT, tab_ntarget, 1, MPI_INT, Communicator); /* now work out where our local data should go */ - int nexport = 0, nimport = 0; + int nimport = 0; for(int i = 0; i < NTask; i++) Send_count[i] = 0; @@ -998,13 +998,12 @@ void mergertree::halotrees_reshuffle(char **ptr, size_t len, int ncurrent, int n Send_count[target]++; } - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { nimport += Recv_count[j]; - nexport += Send_count[j]; if(j > 0) { @@ -1021,9 +1020,9 @@ void mergertree::halotrees_reshuffle(char **ptr, size_t len, int ncurrent, int n int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&buf[Send_offset[recvTask] * len], Send_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B, - *ptr + Recv_offset[recvTask] * len, Recv_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B, Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&buf[Send_offset[recvTask] * len], Send_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B, + *ptr + Recv_offset[recvTask] * len, Recv_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B, Communicator, + MPI_STATUS_IGNORE); } Mem.myfree(tab_ntarget); @@ -1198,7 +1197,7 @@ void mergertree::halotrees_remap_treepointers(void) if(mode == 0) // prepare offset tables { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -1222,10 +1221,10 @@ void mergertree::halotrees_remap_treepointers(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(remap_data), MPI_BYTE, - recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], - Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(remap_data), MPI_BYTE, + recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, + MPI_STATUS_IGNORE); } /* incoming data is not necessarily be sorted according to subhalorn, that's why we need to sort it now */ @@ -1277,10 +1276,10 @@ void mergertree::halotrees_remap_treepointers(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE, - recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]], - Send_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE, + recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, + MPI_STATUS_IGNORE); } for(int i = 0; i < nexport; i++) @@ -1552,7 +1551,7 @@ int mergertree::halotrees_join_via_descendants(int num) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -1576,9 +1575,9 @@ int mergertree::halotrees_join_via_descendants(int num) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, - TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, - recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, + recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } /* the collection of incoming data is not necessarily sorted according to descendantnr, so we need to sort it for efficient matching @@ -1624,9 +1623,9 @@ int mergertree::halotrees_join_via_descendants(int num) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, - TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, - recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, + TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, + recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */ @@ -1726,7 +1725,7 @@ int mergertree::halotrees_join_via_progenitors(int num) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -1750,9 +1749,9 @@ int mergertree::halotrees_join_via_progenitors(int num) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, - TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, - recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, + recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } /* the collection of incoming data is not necessarily sorted according to descendantnr, so we need to sort it for efficient @@ -1799,9 +1798,9 @@ int mergertree::halotrees_join_via_progenitors(int num) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, - TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, - recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask, + TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, + recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */ @@ -1928,7 +1927,7 @@ void mergertree::halotrees_propagate_max_branch_length_descendants(int num) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -1952,10 +1951,10 @@ void mergertree::halotrees_propagate_max_branch_length_descendants(int num) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, - recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], - Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, + recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, + MPI_STATUS_IGNORE); } /* the collection of incoming data is not necessarily sorted according to DescSubhaloNr, so we need to sort it for efficient @@ -2067,7 +2066,7 @@ void mergertree::halotrees_propagate_max_branch_length_progenitors(int num) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -2091,10 +2090,10 @@ void mergertree::halotrees_propagate_max_branch_length_progenitors(int num) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, - recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], - Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, + recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, + MPI_STATUS_IGNORE); } for(int i = 0; i < nimport; i++) @@ -2133,10 +2132,10 @@ void mergertree::halotrees_propagate_max_branch_length_progenitors(int num) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, - recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]], - Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, + recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, + MPI_STATUS_IGNORE); } /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */ diff --git a/src/mergertree/io_descendant.cc b/src/mergertree/io_descendant.cc index d68d622bb286481e0f5eb762255830887b474b7d..bc305ac5af42de012eec0bbc471b3f51a2e7ab3d 100644 --- a/src/mergertree/io_descendant.cc +++ b/src/mergertree/io_descendant.cc @@ -48,7 +48,7 @@ descendant_io::descendant_io(mergertree *MergerTree_ptr, MPI_Comm comm, int form this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_DESCCAT; - sprintf(this->info, "MERGERTREE: writing descendant information"); + snprintf(this->info, MAXLEN_PATH, "MERGERTREE: writing descendant information"); init_field("DSNR", "DescSubhaloNr", MEM_INT64, FILE_INT64, READ_IF_PRESENT, 1, A_DESC, NULL, io_func_descsubhalonr, PREVSUBS, 0, 0, 0, 0, 0, 0, 0, true); @@ -77,9 +77,9 @@ void descendant_io::mergertree_save_descendants(int num) if(ThisTask == 0) { #ifdef ALT_NAMING - sprintf(buf, "%s/groups_%06d", All.OutputDir, num - 1); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%06d", All.OutputDir, num - 1); #else - sprintf(buf, "%s/groups_%03d", All.OutputDir, num - 1); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%03d", All.OutputDir, num - 1); #endif mkdir(buf, 02755); } @@ -88,14 +88,14 @@ void descendant_io::mergertree_save_descendants(int num) #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/groups_%06d/%s_%06d", All.OutputDir, num - 1, "subhalo_desc", num - 1); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%06d/%s_%06d", All.OutputDir, num - 1, "subhalo_desc", num - 1); else - sprintf(buf, "%s%s_%06d", All.OutputDir, "subhalo_desc", num - 1); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, "subhalo_desc", num - 1); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/groups_%03d/%s_%03d", All.OutputDir, num - 1, "subhalo_desc", num - 1); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%03d/%s_%03d", All.OutputDir, num - 1, "subhalo_desc", num - 1); else - sprintf(buf, "%s%s_%03d", All.OutputDir, "subhalo_desc", num - 1); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, "subhalo_desc", num - 1); #endif write_multiple_files(buf, All.NumFilesPerSnapshot); @@ -106,11 +106,11 @@ void descendant_io::mergertree_read_descendants(int num) char fname[MAXLEN_PATH_EXTRA], fname_multiple[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(fname_multiple, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_desc", num); - sprintf(fname, "%s%s_%06d", All.OutputDir, "subhalo_desc", num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_desc", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, "subhalo_desc", num); #else - sprintf(fname_multiple, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_desc", num); - sprintf(fname, "%s%s_%03d", All.OutputDir, "subhalo_desc", num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_desc", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, "subhalo_desc", num); #endif TotNsubhalos = 0; @@ -267,7 +267,7 @@ void descendant_io::get_datagroup_name(int type, char *buf) switch(type) { case 0: - sprintf(buf, "/Subhalo"); + snprintf(buf, MAXLEN_PATH, "/Subhalo"); break; default: Terminate("wrong group"); diff --git a/src/mergertree/io_halotrees.cc b/src/mergertree/io_halotrees.cc index 1de1ecfd168f010d5ea08c4381cd9d71afddbf56..08a5001df219c7ae2cc1774cacaec6e81edfda58 100644 --- a/src/mergertree/io_halotrees.cc +++ b/src/mergertree/io_halotrees.cc @@ -48,7 +48,7 @@ halotrees_io::halotrees_io(mergertree *MergerTree_ptr, MPI_Comm comm, int format this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_TREECAT; - sprintf(this->info, "MERGERTREE: writing mergertrees"); + snprintf(this->info, MAXLEN_PATH, "MERGERTREE: writing mergertrees"); /* overview table for trees in the file */ @@ -139,16 +139,16 @@ void halotrees_io::halotrees_save_trees(void) { if(ThisTask == 0) { - sprintf(buf, "%s/treedata", All.OutputDir); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/treedata", All.OutputDir); mkdir(buf, 02755); } MPI_Barrier(Communicator); } if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/treedata/%s", All.OutputDir, "trees"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/treedata/%s", All.OutputDir, "trees"); else - sprintf(buf, "%s%s", All.OutputDir, "trees"); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "trees"); write_multiple_files(buf, All.NumFilesPerSnapshot); } @@ -313,13 +313,13 @@ void halotrees_io::get_datagroup_name(int type, char *buf) switch(type) { case 0: - sprintf(buf, "/TreeTable"); + snprintf(buf, MAXLEN_PATH, "/TreeTable"); break; case 1: - sprintf(buf, "/TreeHalos"); + snprintf(buf, MAXLEN_PATH, "/TreeHalos"); break; case 2: - sprintf(buf, "/TreeTimes"); + snprintf(buf, MAXLEN_PATH, "/TreeTimes"); break; default: Terminate("wrong group"); diff --git a/src/mergertree/io_progenitors.cc b/src/mergertree/io_progenitors.cc index 9e5c257101dd806cac1aa23d19b7d70fc35bb5ab..f2afbb3f428206e138f716149139e8ceb6544552 100644 --- a/src/mergertree/io_progenitors.cc +++ b/src/mergertree/io_progenitors.cc @@ -48,7 +48,7 @@ progenitors_io::progenitors_io(mergertree *MergerTree_ptr, MPI_Comm comm, int fo this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_PROGCAT; - sprintf(this->info, "MERGERTREE: writing progenitor information"); + snprintf(this->info, MAXLEN_PATH, "MERGERTREE: writing progenitor information"); init_field("PSNR", "ProgSubhaloNr", MEM_INT64, FILE_INT64, READ_IF_PRESENT, 1, A_PROG, NULL, io_func_progsubhalonr, CURRSUBS, 0, 0, 0, 0, 0, 0, 0, true); @@ -76,9 +76,9 @@ void progenitors_io::mergertree_save_progenitors(int num) if(ThisTask == 0) { #ifdef ALT_NAMING - sprintf(buf, "%s/groups_%06d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH, "%s/groups_%06d", All.OutputDir, num); #else - sprintf(buf, "%s/groups_%03d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH, "%s/groups_%03d", All.OutputDir, num); #endif mkdir(buf, 02755); } @@ -87,14 +87,14 @@ void progenitors_io::mergertree_save_progenitors(int num) #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_prog", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_prog", num); else - sprintf(buf, "%s%s_%06d", All.OutputDir, "subhalo_prog", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, "subhalo_prog", num); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_prog", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_prog", num); else - sprintf(buf, "%s%s_%03d", All.OutputDir, "subhalo_prog", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, "subhalo_prog", num); #endif write_multiple_files(buf, All.NumFilesPerSnapshot); @@ -105,11 +105,11 @@ void progenitors_io::mergertree_read_progenitors(int num) char fname[MAXLEN_PATH_EXTRA], fname_multiple[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(fname_multiple, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_prog", num); - sprintf(fname, "%s%s_%06d", All.OutputDir, "subhalo_prog", num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_prog", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, "subhalo_prog", num); #else - sprintf(fname_multiple, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_prog", num); - sprintf(fname, "%s%s_%03d", All.OutputDir, "subhalo_prog", num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_prog", num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, "subhalo_prog", num); #endif TotNsubhalos = 0; @@ -267,7 +267,7 @@ void progenitors_io::get_datagroup_name(int type, char *buf) switch(type) { case 0: - sprintf(buf, "/Subhalo"); + snprintf(buf, MAXLEN_PATH, "/Subhalo"); break; default: Terminate("wrong group"); diff --git a/src/mergertree/io_readsnap.cc b/src/mergertree/io_readsnap.cc index 0c29583f8aca6cf73364b2f7b797c5a5032904d2..aa4212974fad3fa774f570ebdf786de9cd4fa988 100644 --- a/src/mergertree/io_readsnap.cc +++ b/src/mergertree/io_readsnap.cc @@ -46,7 +46,7 @@ readsnap_io::readsnap_io(mergertree *MergerTree_ptr, MPI_Comm comm, int format) this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_SNAPSHOT; - sprintf(this->info, "MERGERTREE: reading snapshot IDs"); + snprintf(this->info, MAXLEN_PATH, "MERGERTREE: reading snapshot IDs"); init_field("ID ", "ParticleIDs", MEM_MY_ID_TYPE, FILE_MY_ID_TYPE, READ_IF_PRESENT, 1, A_MTRP, &MergerTree->MtrP[0].ID, NULL, ALL_TYPES, 0, 0, 0, 0, 0, 0, 0); @@ -83,11 +83,11 @@ void readsnap_io::mergertree_read_snap_ids(int num) char fname[MAXLEN_PATH_EXTRA], fname_multiple[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(fname_multiple, "%s/snapdir_%06d/%s_%06d", All.OutputDir, num, All.SnapshotFileBase, num); - sprintf(fname, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/snapdir_%06d/%s_%06d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, num); #else - sprintf(fname_multiple, "%s/snapdir_%03d/%s_%03d", All.OutputDir, num, All.SnapshotFileBase, num); - sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s_%03d", All.OutputDir, num, All.SnapshotFileBase, num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); #endif TIMER_START(CPU_SNAPSHOT); @@ -232,7 +232,7 @@ void readsnap_io::set_filenr_in_header(int numfiles) { header.num_files = numfil void readsnap_io::read_increase_numbers(int type, int n_for_this_task) { MergerTree->MtrP_NumPart += n_for_this_task; } -void readsnap_io::get_datagroup_name(int type, char *buf) { sprintf(buf, "/PartType%d", type); } +void readsnap_io::get_datagroup_name(int type, char *buf) { snprintf(buf, MAXLEN_PATH, "/PartType%d", type); } int readsnap_io::get_type_of_element(int index) { return MergerTree->MtrP[index].Type; } diff --git a/src/mergertree/io_readtrees_mbound.cc b/src/mergertree/io_readtrees_mbound.cc index af6d0c00d2d37d79e846e40b27c58105b6530377..7c21a70bb3b18b70a93dbaaeb90e1b8ef3a44e9a 100644 --- a/src/mergertree/io_readtrees_mbound.cc +++ b/src/mergertree/io_readtrees_mbound.cc @@ -57,7 +57,7 @@ readtrees_mbound_io::readtrees_mbound_io(mergertree *MergerTree_ptr, MPI_Comm co this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_TREECAT; - sprintf(this->info, "MERGERTREE: reading/writing mergertrees"); + snprintf(this->info, MAXLEN_PATH, "MERGERTREE: reading/writing mergertrees"); init_field("MTRL", "Length", MEM_INT, FILE_INT, READ_IF_PRESENT, 1, A_TT, &MergerTree->TreeTable[0].HaloCount, NULL, TREELENGTH, 0, 0, 0, 0, 0, 0, 0); @@ -85,9 +85,9 @@ void readtrees_mbound_io::read_trees_mostbound(void) char fname[MAXLEN_PATH_EXTRA]; if(All.NumFilesPerSnapshot > 1) - sprintf(fname, "%s/treedata/%s", All.OutputDir, "trees"); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s/treedata/%s", All.OutputDir, "trees"); else - sprintf(fname, "%s%s", All.OutputDir, "trees"); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s", All.OutputDir, "trees"); int num_files = find_files(fname, fname); @@ -104,7 +104,7 @@ void readtrees_mbound_io::read_trees_mostbound(void) if(rep == 0) { MergerTree->TreeTable = (halotrees_table *)Mem.mymalloc_movable(&MergerTree->TreeTable, "TreeTable", - (MergerTree->Ntrees + 1) * sizeof(halotrees_table)); + (MergerTree->Ntrees + 1) * sizeof(halotrees_table)); MergerTree->HaloIDdata = (treehalo_ids_type *)Mem.mymalloc_movable(&MergerTree->HaloIDdata, "HaloIDdata", (MergerTree->Nhalos + 1) * sizeof(treehalo_ids_type)); } @@ -236,10 +236,10 @@ void readtrees_mbound_io::get_datagroup_name(int type, char *buf) switch(type) { case 0: - sprintf(buf, "/TreeTable"); + snprintf(buf, MAXLEN_PATH, "/TreeTable"); break; case 1: - sprintf(buf, "/TreeHalos"); + snprintf(buf, MAXLEN_PATH, "/TreeHalos"); break; default: Terminate("wrong group"); diff --git a/src/mergertree/io_treelinks.cc b/src/mergertree/io_treelinks.cc index 3f189b46f4eda3c02afd04c4420426df7118842a..f587b1a8b293fec08978fa99a7c780263892f55e 100644 --- a/src/mergertree/io_treelinks.cc +++ b/src/mergertree/io_treelinks.cc @@ -48,7 +48,7 @@ treelinks_io::treelinks_io(mergertree *MergerTree_ptr, MPI_Comm comm, int format this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_TREELINK; - sprintf(this->info, "TREELINK: writing treelink information"); + snprintf(this->info, MAXLEN_PATH, "TREELINK: writing treelink information"); init_field("TRNR", "TreeID", MEM_INT64, FILE_INT64, READ_IF_PRESENT, 1, A_TL, &MergerTree->TreeLink[0].TreeID, NULL, TREELINK, 0, 0, 0, 0, 0, 0, 0, true); @@ -67,9 +67,9 @@ void treelinks_io::treelinks_save(int num) if(ThisTask == 0) { #ifdef ALT_NAMING - sprintf(buf, "%s/groups_%06d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%06d", All.OutputDir, num); #else - sprintf(buf, "%s/groups_%03d", All.OutputDir, num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%03d", All.OutputDir, num); #endif mkdir(buf, 02755); } @@ -78,14 +78,14 @@ void treelinks_io::treelinks_save(int num) #ifdef ALT_NAMING if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_treelink", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%06d/%s_%06d", All.OutputDir, num, "subhalo_treelink", num); else - sprintf(buf, "%s%s_%06d", All.OutputDir, "subhalo_treelink", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, "subhalo_treelink", num); #else if(All.NumFilesPerSnapshot > 1) - sprintf(buf, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_treelink", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/groups_%03d/%s_%03d", All.OutputDir, num, "subhalo_treelink", num); else - sprintf(buf, "%s%s_%03d", All.OutputDir, "subhalo_treelink", num); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, "subhalo_treelink", num); #endif write_multiple_files(buf, All.NumFilesPerSnapshot); @@ -158,7 +158,7 @@ void treelinks_io::get_datagroup_name(int type, char *buf) switch(type) { case 0: - sprintf(buf, "/Subhalo"); + snprintf(buf, MAXLEN_PATH, "/Subhalo"); break; default: Terminate("wrong group"); diff --git a/src/mergertree/mergertree.h b/src/mergertree/mergertree.h index 86f3cefe42bcd4ec4fd0c2484383eec6c1a8dcce..25f5abb54bacb60b73244b41ad5f6d27a8154d13 100644 --- a/src/mergertree/mergertree.h +++ b/src/mergertree/mergertree.h @@ -14,6 +14,8 @@ #ifdef MERGERTREE +#include "gadgetconfig.h" + #include <hdf5.h> #include "../data/simparticles.h" diff --git a/src/mergertree/postproc_descendants.cc b/src/mergertree/postproc_descendants.cc index 8f4232130db026609e809f2eef44a0fe606162df..c31680b76dfa6bdf5999aeb69cd484855ca9eb53 100644 --- a/src/mergertree/postproc_descendants.cc +++ b/src/mergertree/postproc_descendants.cc @@ -220,7 +220,7 @@ void mergertree::mergertree_match_ids_of_previous_snap(void) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -243,7 +243,7 @@ void mergertree::mergertree_match_ids_of_previous_snap(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(mergertree_particle_data), MPI_BYTE, + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(mergertree_particle_data), MPI_BYTE, recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(mergertree_particle_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); @@ -404,7 +404,7 @@ void mergertree::mergertree_assign_group_numbers(fof<simparticles> *FoF) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -427,7 +427,7 @@ void mergertree::mergertree_assign_group_numbers(fof<simparticles> *FoF) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, + myMPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, &import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } @@ -510,7 +510,7 @@ void mergertree::mergertree_assign_group_numbers(fof<simparticles> *FoF) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -533,7 +533,7 @@ void mergertree::mergertree_assign_group_numbers(fof<simparticles> *FoF) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_subhalo_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(subhalo_info), MPI_BYTE, + myMPI_Sendrecv(&export_subhalo_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(subhalo_info), MPI_BYTE, recvTask, TAG_DENS_B, &import_subhalo_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(subhalo_info), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); @@ -642,7 +642,7 @@ void mergertree::mergertree_match_ids_of_current_snap(void) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -665,7 +665,7 @@ void mergertree::mergertree_match_ids_of_current_snap(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(mergertree_particle_data), MPI_BYTE, + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(mergertree_particle_data), MPI_BYTE, recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(mergertree_particle_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); diff --git a/src/mergertree/rearrange.cc b/src/mergertree/rearrange.cc index caa44c884dddfcaa995bf09f4a3ea381e7d8cf65..9b59bfa93ea76a2c8d5af30ab7efd5c5e12f91aa 100644 --- a/src/mergertree/rearrange.cc +++ b/src/mergertree/rearrange.cc @@ -98,6 +98,10 @@ void sim::rearrange_lightcone(int argc, char **argv) endrun(); #endif + if(LightCone.lightcone_init_boxlist()) + endrun(); + + double linklength = 0; LightCone.lightcone_init_intposconverter(linklength); @@ -276,16 +280,12 @@ void sim::rearrange_fill_treetable(partset &Tp) Send_count[target_task]++; } - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); - int nexport = 0, nimport = 0; Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { - nexport += Send_count[j]; - nimport += Recv_count[j]; - if(j > 0) Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; } @@ -304,9 +304,9 @@ void sim::rearrange_fill_treetable(partset &Tp) { long long *treeid_tmp = (long long *)Mem.mymalloc("treeid_tmp", sizeof(long long) * Recv_count[recvTask]); - MPI_Sendrecv(&TreeID_list[Send_offset[recvTask]], Send_count[recvTask] * sizeof(long long), MPI_BYTE, recvTask, - TAG_DENS_A, treeid_tmp, Recv_count[recvTask] * sizeof(long long), MPI_BYTE, recvTask, TAG_DENS_A, - Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&TreeID_list[Send_offset[recvTask]], Send_count[recvTask] * sizeof(long long), MPI_BYTE, recvTask, + TAG_DENS_A, treeid_tmp, Recv_count[recvTask] * sizeof(long long), MPI_BYTE, recvTask, TAG_DENS_A, + Communicator, MPI_STATUS_IGNORE); for(int i = 0; i < Recv_count[recvTask]; i++) { diff --git a/src/mpi_utils/allreduce_sparse_double_sum.cc b/src/mpi_utils/allreduce_sparse_double_sum.cc index c600ccce88f5c318aae12ce22002e9800603a5b7..ab88c7652ffd2d7bb8d818fafbea8bfa60b37b46 100644 --- a/src/mpi_utils/allreduce_sparse_double_sum.cc +++ b/src/mpi_utils/allreduce_sparse_double_sum.cc @@ -74,7 +74,7 @@ void allreduce_sparse_double_sum(double *loc, double *glob, int N, MPI_Comm Comm } } - MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, Communicator); int nimport = 0, nexport = 0; @@ -126,9 +126,9 @@ void allreduce_sparse_double_sum(double *loc, double *glob, int N, MPI_Comm Comm int recvTask = thistask ^ ngrp; if(recvTask < ntask) if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, TAG_DENS_B, - &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, TAG_DENS_B, - Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, TAG_DENS_B, + &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, TAG_DENS_B, + Communicator, MPI_STATUS_IGNORE); } for(int i = 0; i < nimport; i++) @@ -155,7 +155,7 @@ void allreduce_sparse_double_sum(double *loc, double *glob, int N, MPI_Comm Comm for(int task = 1; task < ntask; task++) byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; - MPI_Allgatherv(loc_data, bytecounts[thistask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, Communicator); + myMPI_Allgatherv(loc_data, bytecounts[thistask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, Communicator); Mem.myfree(byteoffset); Mem.myfree(bytecounts); diff --git a/src/mpi_utils/generic_comm.h b/src/mpi_utils/generic_comm.h index 9fe6846b34eb81bdfd650e5386a1801b4a7634d8..aa7e91e35c4172a0dc2d9f4353f28df89c7460df 100644 --- a/src/mpi_utils/generic_comm.h +++ b/src/mpi_utils/generic_comm.h @@ -12,6 +12,8 @@ #ifndef GENERIC_COMM_H #define GENERIC_COMM_H +#include "gadgetconfig.h" + #include "../domain/domain.h" #include "../logs/logs.h" #include "../mpi_utils/mpi_utils.h" @@ -203,7 +205,7 @@ class generic_comm { Mem.dump_memory_table(); Terminate("It seems we have too little space left for properly sized ExportSpace... (%lld %lld) Need more memory.\n", - (long long)ExportSpace, (long long)Tp->NumPart * sizeof(int)) + (long long)ExportSpace, (long long)Tp->NumPart * sizeof(int)); } ExportSpace -= Tp->NumPart * sizeof(int); /* to account for the neighbor list buffer that the process allocated */ @@ -215,7 +217,7 @@ class generic_comm MinSpace = (D->NTask - 1) * (sizeof(data_partlist) + sizeof(T_in) + sizeof(T_out)) + D->NTopleaves * (sizeof(data_nodelist) + sizeof(int)); - sprintf(callorigin, "%s|%d|", file, line); + snprintf(callorigin, MAXLEN_PATH_EXTRA, "%s|%d|", file, line); if(ExportSpace < MinSpace) { @@ -293,7 +295,7 @@ class generic_comm * we have also two option experimental communication routines that use a sparse=communication pattern instead. */ /* the default */ - MPI_Alltoall(Send, sizeof(send_recv_counts), MPI_BYTE, Recv, sizeof(send_recv_counts), MPI_BYTE, D->Communicator); + myMPI_Alltoall(Send, sizeof(send_recv_counts), MPI_BYTE, Recv, sizeof(send_recv_counts), MPI_BYTE, D->Communicator); } /* initialize offset tables that we need for the communication @@ -486,14 +488,14 @@ class generic_comm size_t len = sizeof(T_in); /* get the particles */ - MPI_Sendrecv(&DataIn[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, - &DataGet[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, D->Communicator, - MPI_STATUS_IGNORE); + myMPI_Sendrecv(&DataIn[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, + &DataGet[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, D->Communicator, + MPI_STATUS_IGNORE); /* get the node info */ - MPI_Sendrecv(&NodeInfoIn[Send_offset_nodes[recvTask]], Send[recvTask].CountNodes * sizeof(node_info), MPI_BYTE, - recvTask, TAG_GRAV_B, &NodeInfoGet[NimportNodes], Recv[recvTask].CountNodes * sizeof(node_info), - MPI_BYTE, recvTask, TAG_GRAV_B, D->Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&NodeInfoIn[Send_offset_nodes[recvTask]], Send[recvTask].CountNodes * sizeof(node_info), MPI_BYTE, + recvTask, TAG_GRAV_B, &NodeInfoGet[NimportNodes], Recv[recvTask].CountNodes * sizeof(node_info), + MPI_BYTE, recvTask, TAG_GRAV_B, D->Communicator, MPI_STATUS_IGNORE); for(int k = 0; k < Recv[recvTask].Count; k++) DataGet[Nimport + k].Firstnode += NimportNodes; @@ -521,9 +523,9 @@ class generic_comm size_t len = sizeof(T_out); /* exchange the results */ - MPI_Sendrecv(&DataResult[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, - &DataOut[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, - D->Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&DataResult[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, + &DataOut[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, + D->Communicator, MPI_STATUS_IGNORE); Nimport += Recv[recvTask].Count; NimportNodes += Recv[recvTask].CountNodes; diff --git a/src/mpi_utils/healthtest.cc b/src/mpi_utils/healthtest.cc index 52283a471d361722f4097de383e6fdb9e472274c..0ea9e996eb16efb0223b47d2d81d79f9dfa0fb74 100644 --- a/src/mpi_utils/healthtest.cc +++ b/src/mpi_utils/healthtest.cc @@ -141,14 +141,14 @@ double sim::measure_cpu_performance(MPI_Comm Communicator) MPI_Bcast(name_maxnode, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, max_time.rank, Communicator); char buf[1000 + MPI_MAX_PROCESSOR_NAME]; - sprintf(buf, "processes_%s.txt", name_maxnode); + snprintf(buf, 1000 + MPI_MAX_PROCESSOR_NAME, "processes_%s.txt", name_maxnode); mpi_printf("HEALTHTEST: We are dumping a process list to the file '%s'\n", buf); if(ThisTask == max_time.rank) { char cmd[10000 + MPI_MAX_PROCESSOR_NAME]; - sprintf(cmd, "ps -ef >& %s", buf); + snprintf(cmd, 10000 + MPI_MAX_PROCESSOR_NAME, "ps -ef >& %s", buf); system(cmd); } @@ -194,8 +194,8 @@ double sim::measure_hyper_cube_speed(const char *tag, MPI_Comm Communicator) if(recvTask < loc_ntask) { double t0 = Logs.second(); - MPI_Sendrecv(sendbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A, recvbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A, - Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(sendbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A, recvbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A, + Communicator, MPI_STATUS_IGNORE); double t1 = Logs.second(); tall += Logs.timediff(t0, t1); diff --git a/src/mpi_utils/hypercube_allgatherv.cc b/src/mpi_utils/hypercube_allgatherv.cc index f13b9b6dd1da8905e2c3c0c7e7810e2b174ec358..6f6b4f4d2ba0e5dd86efa7316b11250a4649fb17 100644 --- a/src/mpi_utils/hypercube_allgatherv.cc +++ b/src/mpi_utils/hypercube_allgatherv.cc @@ -19,14 +19,16 @@ #include "../data/allvars.h" #include "../data/dtypes.h" - -#ifdef MPI_HYPERCUBE_ALLGATHERV +#include "../mpi_utils/mpi_utils.h" #define TAG 100 -int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, - MPI_Datatype recvtype, MPI_Comm comm) +int myMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, + MPI_Datatype recvtype, MPI_Comm comm) { +#ifndef MPI_HYPERCUBE_ALLGATHERV + return MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm); +#else int ntask, thistask, ptask, ngrp, size_sendtype, size_recvtype; MPI_Status status; @@ -44,14 +46,22 @@ int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype int recvtask = thistask ^ ngrp; if(recvtask < ntask) - MPI_Sendrecv(sendbuf, sendcount, sendtype, recvtask, TAG, (char *)recvbuf + displs[recvtask] * size_recvtype, - recvcount[recvtask], recvtype, recvtask, TAG, comm, &status); + { + if(sendbuf == MPI_IN_PLACE) + myMPI_Sendrecv((char *)recvbuf + displs[thistask] * size_recvtype, recvcount[thistask], sendtype, recvtask, TAG, + (char *)recvbuf + displs[recvtask] * size_recvtype, recvcount[recvtask], recvtype, recvtask, TAG, comm, + &status); + else + myMPI_Sendrecv(sendbuf, sendcount, sendtype, recvtask, TAG, (char *)recvbuf + displs[recvtask] * size_recvtype, + recvcount[recvtask], recvtype, recvtask, TAG, comm, &status); + } } - if((char *)sendbuf != (char *)recvbuf + displs[thistask] * size_recvtype) - memcpy((char *)recvbuf + displs[thistask] * size_recvtype, sendbuf, sendcount * size_sendtype); + if(sendbuf != MPI_IN_PLACE) + if((char *)sendbuf != (char *)recvbuf + displs[thistask] * size_recvtype) + memcpy((char *)recvbuf + displs[thistask] * size_recvtype, sendbuf, sendcount * size_sendtype); return 0; -} #endif +} diff --git a/src/mpi_utils/mpi_utils.h b/src/mpi_utils/mpi_utils.h index f2051551b8888c60fba7f316cd77321a266caaf0..42eac8b79ecac2469ec78c8dfaad65c89082dc36 100644 --- a/src/mpi_utils/mpi_utils.h +++ b/src/mpi_utils/mpi_utils.h @@ -11,6 +11,8 @@ #ifndef MPI_UTILS_H #define MPI_UTILS_H +#include "gadgetconfig.h" + #include <mpi.h> #include "../data/dtypes.h" #include "../data/mymalloc.h" @@ -122,8 +124,11 @@ void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recv int myMPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, - MPI_Datatype recvtype, MPI_Comm comm); +int myMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, + MPI_Datatype recvtype, MPI_Comm comm); + +int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm); void allreduce_sparse_double_sum(double *loc, double *glob, int N, MPI_Comm comm); @@ -220,7 +225,7 @@ void allreduce_sum(T *glob, int N, MPI_Comm Communicator) if(rep == 0) { - MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, Communicator); send_offset[0] = 0; @@ -246,9 +251,9 @@ void allreduce_sum(T *glob, int N, MPI_Comm Communicator) ind_data *import_data = (ind_data *)Mem.mymalloc("import_data", nimport * sizeof(ind_data)); - MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, - TAG_DENS_B, import_data, recv_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, TAG_DENS_B, - Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, + TAG_DENS_B, import_data, recv_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, TAG_DENS_B, + Communicator, MPI_STATUS_IGNORE); for(int i = 0; i < nimport; i++) { @@ -278,8 +283,8 @@ void allreduce_sum(T *glob, int N, MPI_Comm Communicator) int recvTask = thistask ^ ngrp; if(recvTask < ntask) if(blocksize[thistask] > 0 || blocksize[recvTask] > 0) - MPI_Sendrecv(loc_data, blocksize[thistask] * sizeof(T), MPI_BYTE, recvTask, TAG_DENS_A, &glob[blockstart[recvTask]], - blocksize[recvTask] * sizeof(T), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(loc_data, blocksize[thistask] * sizeof(T), MPI_BYTE, recvTask, TAG_DENS_A, &glob[blockstart[recvTask]], + blocksize[recvTask] * sizeof(T), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } Mem.myfree(loc_data); diff --git a/src/mpi_utils/myalltoall.cc b/src/mpi_utils/myalltoall.cc index 4bb31cb204a72cb8bf15c083228608afb5b6f970..25f0ff2dcccafda3851b430105bb7eb9bc264219 100644 --- a/src/mpi_utils/myalltoall.cc +++ b/src/mpi_utils/myalltoall.cc @@ -40,7 +40,7 @@ int myMPI_Alltoallv_new_prep(int *sendcnt, int *recvcnt, int *rdispls, MPI_Comm MPI_Comm_rank(comm, &rank); if(method == 0 || method == 1) - MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, comm); + myMPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, comm); else if(method == 10) { for(int i = 0; i < nranks; ++i) @@ -80,6 +80,10 @@ void myMPI_Alltoallv_new(void *sendbuf, int *sendcnt, int *sdispls, MPI_Datatype MPI_Type_size(sendtype, &itsz); size_t tsz = itsz; // to enforce size_t data type in later computations +#ifdef MPI_HYPERCUBE_ALLTOALL + method = 1; +#endif + if(method == 0) // standard Alltoallv MPI_Alltoallv(sendbuf, sendcnt, sdispls, sendtype, recvbuf, recvcnt, rdispls, recvtype, comm); else if(method == 1) // blocking sendrecv @@ -90,7 +94,6 @@ void myMPI_Alltoallv_new(void *sendbuf, int *sendcnt, int *sdispls, MPI_Datatype while(lptask < nranks) lptask <<= 1; int tag = 42; - MPI_Status status; if(recvcnt[rank] > 0) // local communication memcpy(PCHAR(recvbuf) + tsz * rdispls[rank], PCHAR(sendbuf) + tsz * sdispls[rank], tsz * recvcnt[rank]); @@ -100,8 +103,8 @@ void myMPI_Alltoallv_new(void *sendbuf, int *sendcnt, int *sdispls, MPI_Datatype int otask = rank ^ ngrp; if(otask < nranks) if(sendcnt[otask] > 0 || recvcnt[otask] > 0) - MPI_Sendrecv(PCHAR(sendbuf) + tsz * sdispls[otask], sendcnt[otask], sendtype, otask, tag, - PCHAR(recvbuf) + tsz * rdispls[otask], recvcnt[otask], recvtype, otask, tag, comm, &status); + myMPI_Sendrecv(PCHAR(sendbuf) + tsz * sdispls[otask], sendcnt[otask], sendtype, otask, tag, + PCHAR(recvbuf) + tsz * rdispls[otask], recvcnt[otask], recvtype, otask, tag, comm, MPI_STATUS_IGNORE); } } else if(method == 2) // asynchronous communication @@ -145,6 +148,7 @@ void myMPI_Alltoallv_new(void *sendbuf, int *sendcnt, int *sdispls, MPI_Datatype int *disp_at_sender = (int *)Mem.mymalloc("disp_at_sender", nranks * sizeof(int)); disp_at_sender[rank] = sdispls[rank]; MPI_Win win; + // TODO:supply info object with "no_lock" MPI_Win_create(sdispls, nranks * sizeof(MPI_INT), sizeof(MPI_INT), MPI_INFO_NULL, comm, &win); MPI_Win_fence(0, win); for(int i = 1; i < nranks; ++i) @@ -180,6 +184,7 @@ void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *rec char *sendbuf = (char *)sendb; char *recvbuf = (char *)recvb; +#ifndef MPI_HYPERCUBE_ALLTOALL if(big_flag == 0) { int ntask; @@ -206,6 +211,7 @@ void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *rec Mem.myfree(scount); } else +#endif { /* here we definitely have some large messages. We default to the * pair-wise protocol, which should be most robust anyway. @@ -238,6 +244,7 @@ void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recv char *sendbuf = (char *)sendb; char *recvbuf = (char *)recvb; +#ifndef MPI_HYPERCUBE_ALLTOALL if(big_flag == 0) { int ntask; @@ -264,6 +271,7 @@ void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recv Mem.myfree(scount); } else +#endif { /* here we definitely have some large messages. We default to the * pair-wise protocoll, which should be most robust anyway. @@ -289,3 +297,39 @@ void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recv } } } + +int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm) + +{ +#ifndef MPI_HYPERCUBE_ALLTOALL + return MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); +#else + int ntask, ptask, thistask, size_sendtype, size_recvtype; + + MPI_Comm_rank(comm, &thistask); + MPI_Comm_size(comm, &ntask); + + MPI_Type_size(sendtype, &size_sendtype); + MPI_Type_size(recvtype, &size_recvtype); + + for(ptask = 0; ntask > (1 << ptask); ptask++) + ; + + for(int ngrp = 1; ngrp < (1 << ptask); ngrp++) + { + int recvtask = thistask ^ ngrp; + + if(recvtask < ntask) + myMPI_Sendrecv((char *)sendbuf + recvtask * sendcount * size_sendtype, sendcount, sendtype, recvtask, TAG_PDATA + ngrp, + (char *)recvbuf + recvtask * recvcount * size_recvtype, recvcount, recvtype, recvtask, TAG_PDATA + ngrp, comm, + MPI_STATUS_IGNORE); + } + + memcpy((char *)recvbuf + thistask * recvcount * size_recvtype, (char *)sendbuf + thistask * sendcount * size_sendtype, + sendcount * size_sendtype); + + return 0; + +#endif +} diff --git a/src/mpi_utils/shared_mem_handler.cc b/src/mpi_utils/shared_mem_handler.cc index 2bc5042691bda896e4b6c16e04155b939952e94a..953de143bc1340fc1f08a303bfa80e0bbf2d501e 100644 --- a/src/mpi_utils/shared_mem_handler.cc +++ b/src/mpi_utils/shared_mem_handler.cc @@ -9,6 +9,8 @@ * \brief implements code for the shared-memory fetching of remote date through designated MPI handler ranks */ +#include "gadgetconfig.h" + #include <hdf5.h> #include <mpi.h> #include <stdio.h> @@ -87,6 +89,9 @@ void shmem::shared_memory_handler(void) prepare_offset_table(NULL, tree_info[handle].SphP_offsets); prepare_offset_table(NULL, tree_info[handle].Foreign_Nodes_offsets); prepare_offset_table(NULL, tree_info[handle].Foreign_Points_offsets); + + MPI_Barrier(SharedMemComm); // this barrier is in principle superfluous, but on some systems, + // the MPI_Gather in prepare_offset_table() can return prematurely before all data has arrived } else if(tag == TAG_HEADER) // signals that we are freeing addresses we stored for tree access { diff --git a/src/mpi_utils/shared_mem_handler.h b/src/mpi_utils/shared_mem_handler.h index befcf6154d5f8921667be889e01fd2ecca5c8b93..0c43af867a1c2eb04541c18f2f994a6e5b78e924 100644 --- a/src/mpi_utils/shared_mem_handler.h +++ b/src/mpi_utils/shared_mem_handler.h @@ -12,6 +12,8 @@ #ifndef SHAREDMEM_H #define SHAREDMEM_H +#include "gadgetconfig.h" + #include <hdf5.h> #include <mpi.h> #include <stdio.h> @@ -64,6 +66,10 @@ class shmem void **SharedMemBaseAddr; +#ifdef ALLOCATE_SHARED_MEMORY_VIA_POSIX + char **SharedMemBaseAddrRaw; +#endif + char *TableData; char *EwaldData; diff --git a/src/mpi_utils/sizelimited_sendrecv.cc b/src/mpi_utils/sizelimited_sendrecv.cc index b0ed42a35a589a8e8f9a2150e40f477f568320e3..ac054dc468c6a8a49fa85bb94edf549c46f091c1 100644 --- a/src/mpi_utils/sizelimited_sendrecv.cc +++ b/src/mpi_utils/sizelimited_sendrecv.cc @@ -6,7 +6,8 @@ /*! \file sizelimited_sendrecv.cc * - * \brief implements a wrapper around MPI_Sendrecv that if needed transmits the data in smaller pieces than a prescribed maximum size + * \brief implements a wrapper around myMPI_Sendrecv that if needed transmits the data in smaller pieces than a prescribed maximum + * size */ #include "gadgetconfig.h" diff --git a/src/ngbtree/ngbtree.h b/src/ngbtree/ngbtree.h index 1d4402478bf17de434c837984acc7284b1546cb6..87a077feb006045f70a28db73ba6d5d5fb9289e8 100644 --- a/src/ngbtree/ngbtree.h +++ b/src/ngbtree/ngbtree.h @@ -12,6 +12,8 @@ #ifndef NGBTREE_H_ #define NGBTREE_H_ +#include "gadgetconfig.h" + #include "../data/simparticles.h" #include "../time_integration/driftfac.h" #include "../tree/tree.h" diff --git a/src/ngbtree/ngbtree_build.cc b/src/ngbtree/ngbtree_build.cc index 126132bd89d395640cc774b01afc1b679563e08f..56b2ebea37051b1912d5c035ccebd6e4a4ef2f8f 100644 --- a/src/ngbtree/ngbtree_build.cc +++ b/src/ngbtree/ngbtree_build.cc @@ -114,8 +114,8 @@ void ngbtree::exchange_topleafdata(void) } } - MPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, - D->Communicator); + myMPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, + D->Communicator); for(int task = 0; task < D->NTask; task++) recvcounts[task] = 0; @@ -502,9 +502,9 @@ void ngbtree::finish_vounds_update(int nchanged, int *nodelist) tot_nodelist = (int *)Mem.mymalloc("tot_nodelist", tot_nchanged * sizeof(int)); glob_leaf_node_data = (leafnode_data *)Mem.mymalloc("glob_leaf_node_data", tot_nchanged * sizeof(leafnode_data)); - MPI_Allgatherv(nodelist, nchanged, MPI_INT, tot_nodelist, recvcounts, recvoffset, MPI_INT, D->Communicator); - MPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, - D->Communicator); + myMPI_Allgatherv(nodelist, nchanged, MPI_INT, tot_nodelist, recvcounts, recvoffset, MPI_INT, D->Communicator); + myMPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, + D->Communicator); if(TreeSharedMem_ThisTask == 0) /* only one of the shared memory threads needs to update the toplevel tree */ { @@ -609,9 +609,9 @@ void ngbtree::finish_maxhsml_update(int nchanged, int *nodelist) tot_nodelist = (int *)Mem.mymalloc("tot_nodelist", tot_nchanged * sizeof(int)); glob_leaf_node_data = (leafnode_data *)Mem.mymalloc("glob_leaf_node_data", tot_nchanged * sizeof(leafnode_data)); - MPI_Allgatherv(nodelist, nchanged, MPI_INT, tot_nodelist, recvcounts, recvoffset, MPI_INT, D->Communicator); - MPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, - D->Communicator); + myMPI_Allgatherv(nodelist, nchanged, MPI_INT, tot_nodelist, recvcounts, recvoffset, MPI_INT, D->Communicator); + myMPI_Allgatherv(loc_leaf_node_data, bytecounts[D->ThisTask], MPI_BYTE, glob_leaf_node_data, bytecounts, byteoffset, MPI_BYTE, + D->Communicator); if(TreeSharedMem_ThisTask == 0) /* only one of the shared memory threads needs to update the toplevel tree */ { diff --git a/src/ngenic/ngenic.cc b/src/ngenic/ngenic.cc index b90c3cbf282650ecc3de53038100497d716fb26c..c86b63a1187370f716d191b31f86031f7f2e9731 100644 --- a/src/ngenic/ngenic.cc +++ b/src/ngenic/ngenic.cc @@ -459,7 +459,7 @@ void ngenic::ngenic_distribute_particles(void) Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev]; } - MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator); nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; for(int j = 0; j < NTask; j++) @@ -659,7 +659,7 @@ void ngenic::ngenic_get_derivate_from_fourier_field(int axes1, int axes2, fft_co double smth = 1; #ifdef CORRECT_CIC - if(axes2 >= 0) + if(axes2 < 0) { /* do deconvolution of CIC interpolation */ double fx = 1, fy = 1, fz = 1; @@ -1284,14 +1284,14 @@ void ngenic::print_spec(void) { if(ThisTask == 0) { - char buf[3 * MAXLEN_PATH]; - sprintf(buf, "%s/inputspec_%s.txt", All.OutputDir, All.SnapshotFileBase); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/inputspec_%s.txt", All.OutputDir, All.SnapshotFileBase); FILE *fd = fopen(buf, "w"); - double gf = ngenic_growth_factor(0.001, 1.0) / (1.0 / 0.001); + double gf = Driftfac.linear_growth_factor(0.001, 1.0) / (1.0 / 0.001); - double DDD = ngenic_growth_factor(All.cf_atime, 1.0); + double DDD = Driftfac.linear_growth_factor(All.cf_atime, 1.0); fprintf(fd, "%12g %12g\n", All.cf_redshift, DDD); /* print actual starting redshift and linear growth factor for this cosmology */ @@ -1340,8 +1340,8 @@ void ngenic::print_spec(void) { if(All.cf_atime < 1.0) { - char buf[3 * MAXLEN_PATH]; - sprintf(buf, "%s/growthfac.txt", All.OutputDir); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/growthfac.txt", All.OutputDir); FILE *fd = fopen(buf, "w"); @@ -1351,7 +1351,7 @@ void ngenic::print_spec(void) { double a = exp(log(All.cf_atime) + ((log(1.0) - log(All.cf_atime)) / NSTEPS) * i); - double d = ngenic_growth_factor(a, 1.0); + double d = Driftfac.linear_growth_factor(a, 1.0); fprintf(fd, "%12g %12g\n", a, 1.0 / d); } @@ -1365,8 +1365,8 @@ void ngenic::print_spec(void) if(ThisTask == 0) { - char buf[3 * MAXLEN_PATH]; - sprintf(buf, "%s/variance.txt", All.OutputDir); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/variance.txt", All.OutputDir); FILE *fd = fopen(buf, "w"); diff --git a/src/ngenic/ngenic.h b/src/ngenic/ngenic.h index a3fdb7c01728e16b422105e7fe816fc8a35774b0..09811ef3b29b9de58f24f1784f430214469c50bc 100644 --- a/src/ngenic/ngenic.h +++ b/src/ngenic/ngenic.h @@ -12,6 +12,8 @@ #ifndef NGENIC_H #define NGENIC_H +#include "gadgetconfig.h" + #ifdef NGENIC #ifndef PERIODIC @@ -51,7 +53,6 @@ class ngenic : public pm_mpi_fft #ifdef RADIATION double ngenic_dlogDdloga(double k); #endif - double ngenic_growth_factor(double astart, double aend); void ngenic_initialize_powerspectrum(void); void free_power_table(void); @@ -114,22 +115,8 @@ class ngenic : public pm_mpi_fft double ngenic_powerspec_eh(double k); double ngenic_tophat_sigma2(double R); double ngenic_tk_eh(double k); - double ngenic_growth(double a); void read_power_table(void); - static double ngenic_growth_int(double a, void *param) - { -#if defined(RADIATION) && !defined(SMOOTHMATTER) - return pow(a / (All.Omega0 + (1 - All.Omega0 - All.OmegaLambda) * a + All.OmegaLambda * a * a * a + All.OmegaR / a), 1.5); -#elif !defined(RADIATION) && defined(SMOOTHMATTER) - return pow(a / (All.Omega0 + All.OmegaSmooth + (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth) * a + All.OmegaLambda * a * a * a), 1.5); -#elif defined(RADIATION) && defined(SMOOTHMATTER) - return pow(a / (All.Omega0 + All.OmegaSmooth + (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth - All.OmegaR) * a + All.OmegaLambda * a * a * a + All.OmegaR / a), 1.5); -#else - return pow(a / (All.Omega0 + (1 - All.Omega0 - All.OmegaLambda) * a + All.OmegaLambda * a * a * a), 1.5); -#endif - } - double fnl(double x, double A, double B, double alpha, double beta, double V, double gf) /* Peacock & Dodds formula */ { return x * pow((1 + B * beta * x + pow(A * x, alpha * beta)) / (1 + pow(pow(A * x, alpha) * gf * gf * gf / (V * sqrt(x)), beta)), diff --git a/src/ngenic/power.cc b/src/ngenic/power.cc index a790ff6e5c9703fc551b4f93a7139c8f0d19053b..ab31f0b52a245a02b20caaa23f1ab453f88d96d4 100644 --- a/src/ngenic/power.cc +++ b/src/ngenic/power.cc @@ -74,10 +74,10 @@ void ngenic::free_power_table(void) { Mem.myfree(PowerTable); } void ngenic::read_power_table(void) { FILE *fd; - char buf[MAXLEN_PATH]; + char buf[MAXLEN_PATH_EXTRA]; double k, p; - sprintf(buf, All.PowerSpectrumFile); + snprintf(buf, MAXLEN_PATH_EXTRA, All.PowerSpectrumFile); if(!(fd = fopen(buf, "r"))) { @@ -100,7 +100,7 @@ void ngenic::read_power_table(void) PowerTable = (pow_table *)Mem.mymalloc("PowerTable", NPowerTable * sizeof(pow_table)); - sprintf(buf, All.PowerSpectrumFile); + snprintf(buf, MAXLEN_PATH_EXTRA, All.PowerSpectrumFile); if(!(fd = fopen(buf, "r"))) { @@ -130,8 +130,6 @@ void ngenic::read_power_table(void) void ngenic::ngenic_initialize_powerspectrum(void) { - double res; - AA = 6.4 / All.ShapeGamma * (3.085678e24 / All.UnitLength_in_cm); BB = 3.0 / All.ShapeGamma * (3.085678e24 / All.UnitLength_in_cm); CC = 1.7 / All.ShapeGamma * (3.085678e24 / All.UnitLength_in_cm); @@ -153,8 +151,8 @@ void ngenic::ngenic_initialize_powerspectrum(void) #ifdef DIFFERENT_TRANSFER_FUNC Type = 1; #endif - Norm = 1.0; - res = ngenic_tophat_sigma2(R8); + Norm = 1.0; + double res = ngenic_tophat_sigma2(R8); if(ThisTask == 0 && All.PowerSpectrumType == 2) printf("\nNormalization of spectrum in file: Sigma8 = %g\n", sqrt(res)); @@ -164,7 +162,7 @@ void ngenic::ngenic_initialize_powerspectrum(void) if(ThisTask == 0 && All.PowerSpectrumType == 2) printf("Normalization adjusted to Sigma8=%g (Normfac=%g)\n\n", All.Sigma8, Norm); - Dplus = ngenic_growth_factor(All.cf_atime, 1.0); + Dplus = Driftfac.linear_growth_factor(All.cf_atime, 1.0); } mpi_printf("NGENIC: Dplus=%g\n", Dplus); } @@ -303,69 +301,22 @@ double ngenic::ngenic_tophat_sigma2(double R) return result; } -double ngenic::ngenic_growth_factor(double astart, double aend) { return ngenic_growth(aend) / ngenic_growth(astart); } - -double ngenic::ngenic_growth(double a) -{ - double hubble_a; - -#if defined(RADIATION) && !defined(SMOOTHMATTER) - hubble_a = sqrt(All.Omega0 / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda - All.OmegaR) / (a * a) + All.OmegaLambda + All.OmegaR / (a * a * a * a)); -#elif !defined(RADIATION) && defined(SMOOTHMATTER) - hubble_a = sqrt((All.Omega0 + All.OmegaSmooth) / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth) / (a * a) + All.OmegaLambda); -#elif defined(RADIATION) && defined(SMOOTHMATTER) - hubble_a = sqrt((All.Omega0 + All.OmegaSmooth) / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth - All.OmegaR) / (a * a) + All.OmegaLambda + All.OmegaR / (a * a * a * a)); -#else - hubble_a = sqrt(All.Omega0 / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda) / (a * a) + All.OmegaLambda); -#endif - - const int worksize = 100000; - - double result, abserr; - gsl_function F; - - gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(worksize); - F.function = &ngenic_growth_int; - - gsl_integration_qag(&F, 0, a, 0, 1.0e-8, worksize, GSL_INTEG_GAUSS41, workspace, &result, &abserr); - - gsl_integration_workspace_free(workspace); - - return hubble_a * result; -} - double ngenic::ngenic_f1_omega(double a) { double omega_a; -#if defined(RADIATION) && !defined(SMOOTHMATTER) - omega_a = All.Omega0 / (All.Omega0 + a * (1 - All.Omega0 - All.OmegaLambda - All.OmegaR) + a * a * a * All.OmegaLambda + All.OmegaR / a); -#elif !defined(RADIATION) && defined(SMOOTHMATTER) - omega_a = All.Omega0 / (All.Omega0 + All.OmegaSmooth + a * (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth) + a * a * a * All.OmegaLambda); -#elif defined(RADIATION) && defined(SMOOTHMATTER) - omega_a = All.Omega0 / (All.Omega0 + All.OmegaSmooth + a * (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth - All.OmegaR) + a * a * a * All.OmegaLambda + All.OmegaR / a); -#else - omega_a = All.Omega0 / (All.Omega0 + a * (1 - All.Omega0 - All.OmegaLambda) + a * a * a * All.OmegaLambda); -#endif + omega_a = Driftfac.get_OmegaMatter_a(a); - return pow(omega_a, 0.6); + return pow(omega_a, 5.0 / 9); } double ngenic::ngenic_f2_omega(double a) { double omega_a; -#if defined(RADIATION) && !defined(SMOOTHMATTER) - omega_a = All.Omega0 / (All.Omega0 + a * (1 - All.Omega0 - All.OmegaLambda - All.OmegaR) + a * a * a * All.OmegaLambda + All.OmegaR / a); -#elif !defined(RADIATION) && defined(SMOOTHMATTER) - omega_a = All.Omega0 / (All.Omega0 + All.OmegaSmooth + a * (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth) + a * a * a * All.OmegaLambda); -#elif defined(RADIATION) && defined(SMOOTHMATTER) - omega_a = All.Omega0 / (All.Omega0 + All.OmegaSmooth + a * (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth - All.OmegaR) + a * a * a * All.OmegaLambda + All.OmegaR / a); -#else - omega_a = All.Omega0 / (All.Omega0 + a * (1 - All.Omega0 - All.OmegaLambda) + a * a * a * All.OmegaLambda); -#endif + omega_a = Driftfac.get_OmegaMatter_a(a); - return 2 * pow(omega_a, 4.0 / 7); + return 2 * pow(omega_a, 6.0 / 11); } #ifdef RADIATION diff --git a/src/pm/pm.h b/src/pm/pm.h index 91cc56ef2cfb330fe1641b6e311dd9fbf309f2f1..21de53dcbc52ec90ba4c87dbe9fae81a8440e8b5 100644 --- a/src/pm/pm.h +++ b/src/pm/pm.h @@ -14,6 +14,8 @@ #if defined(PMGRID) || defined(NGENIC) +#include "gadgetconfig.h" + #include <fftw3.h> typedef ptrdiff_t fft_ptrdiff_t; diff --git a/src/pm/pm_mpi_fft.cc b/src/pm/pm_mpi_fft.cc index 767a901e0cb2bbcf7c9a4b4a755fbc8a2aef2a6c..259bc9286dfcff9d46e58929e50db2a3fca2635a 100644 --- a/src/pm/pm_mpi_fft.cc +++ b/src/pm/pm_mpi_fft.cc @@ -588,7 +588,7 @@ void pm_mpi_fft::my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out void pm_mpi_fft::my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward) { - size_t n; + long long n; fft_real *data_real = (fft_real *)data, *workspace_real = (fft_real *)workspace; fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace; @@ -732,7 +732,7 @@ void pm_mpi_fft::my_fft_column_remap(fft_complex *data, int Ndims[3], /* global if(just_count_flag) { - MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, Communicator); for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++) { @@ -1200,7 +1200,7 @@ void pm_mpi_fft::my_fft_column_transpose(fft_real *data, int Ndims[3], /* global } } if(just_count_flag) - MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, Communicator); } #endif diff --git a/src/pm/pm_mpi_fft.h b/src/pm/pm_mpi_fft.h index 2a54ef1ea636a627073e61a9228b04929806c9c6..1d0b04e5b418d30dac3294cc1b503e86864c288d 100644 --- a/src/pm/pm_mpi_fft.h +++ b/src/pm/pm_mpi_fft.h @@ -12,6 +12,8 @@ #ifndef PM_MPI_FFT_H #define PM_MPI_FFT_H +#include "gadgetconfig.h" + #include "../mpi_utils/setcomm.h" #ifndef FFTW @@ -63,11 +65,7 @@ class pm_mpi_fft : public virtual setcomm int transposed_firstcol, transposed_ncol; int second_transposed_firstcol, second_transposed_ncol; - size_t second_transposed_ncells; - - // int pivotcol; /* to go from column number to task */ - // int avg; - // int tasklastsection; + long long second_transposed_ncells; size_t *offsets_send_A; size_t *offsets_recv_A; diff --git a/src/pm/pm_nonperiodic.cc b/src/pm/pm_nonperiodic.cc index b948d8333c99ecfcc56f4241b5ef59d2dcf60d4f..a3cf221f95c60716f98eb6f27b7a5425e6f7a302 100644 --- a/src/pm/pm_nonperiodic.cc +++ b/src/pm/pm_nonperiodic.cc @@ -195,7 +195,11 @@ void pm_nonperiodic::pm_init_regionsize(void) } else { +#if defined(RANDOMIZE_DOMAINCENTER_TYPES) || defined(PLACEHIGHRESREGION) blocksize = Sp->PlacingBlocksize; +#else + Terminate("we should not get here"); +#endif } mpi_printf( @@ -224,8 +228,12 @@ void pm_nonperiodic::pm_init_regionsize(void) } else { +#if defined(RANDOMIZE_DOMAINCENTER_TYPES) || defined(PLACEHIGHRESREGION) left = (Sp->ReferenceIntPos[HIGH_MESH][i] + Sp->Xmintot[HIGH_MESH][i]) & Sp->PlacingMask; right = left + Sp->PlacingBlocksize; +#else + Terminate("we should not get here"); +#endif } Sp->Xmintot[mesh][i] = left - Sp->ReferenceIntPos[mesh][i]; @@ -311,7 +319,7 @@ void pm_nonperiodic::pm_init_nonperiodic(simparticles *Sp_ptr) #ifndef FFT_COLUMN_BASED int stride = GRIDz; #else - int stride = 1; + int stride = 1; #endif myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndim, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz, @@ -575,7 +583,7 @@ void pm_nonperiodic::pmforce_nonperiodic_zoom_optimized_prepare_density(int grnr rhogrid[ii] = 0; /* exchange data and add contributions to the local mesh-path */ - MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, Communicator); for(int level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { @@ -868,7 +876,7 @@ void pm_nonperiodic::pmforce_nonperiodic_uniform_optimized_prepare_density(int g Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev]; } - MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator); nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; for(int j = 0; j < NTask; j++) diff --git a/src/pm/pm_periodic.cc b/src/pm/pm_periodic.cc index f525e12e2b37495250a0b8b217297d1674b73ff6..aa5df1c2d8e0e9128c76032f459f8b18207d666a 100644 --- a/src/pm/pm_periodic.cc +++ b/src/pm/pm_periodic.cc @@ -378,7 +378,7 @@ void pm_periodic::pmforce_zoom_optimized_prepare_density(int mode, int *typelist rhogrid = (fft_real *)Mem.mymalloc_clear("rhogrid", maxfftsize * sizeof(fft_real)); /* exchange data and add contributions to the local mesh-path */ - MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, Communicator); for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { @@ -735,7 +735,7 @@ void pm_periodic::pmforce_uniform_optimized_prepare_density(int mode, int *typel if(rep == 0) { - MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator); nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; for(int j = 0; j < NTask; j++) @@ -1196,7 +1196,7 @@ void pm_periodic::pmforce_uniform_optimized_readout_forces_or_potential_xz(fft_r MyIntPosType IntPos[3]; }; - partbuf *partin, *partout; + partbuf *partin = NULL, *partout = NULL; size_t nimport = 0, nexport = 0; particle_data *P = Sp->P; @@ -1300,7 +1300,7 @@ void pm_periodic::pmforce_uniform_optimized_readout_forces_or_potential_xz(fft_r if(rep == 0) { - MPI_Alltoall(send_count, sizeof(size_t), MPI_BYTE, recv_count, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(send_count, sizeof(size_t), MPI_BYTE, recv_count, sizeof(size_t), MPI_BYTE, Communicator); nimport = 0, nexport = 0; recv_offset[0] = send_offset[0] = 0; @@ -1508,7 +1508,7 @@ void pm_periodic::pmforce_uniform_optimized_readout_forces_or_potential_zy(fft_r MyIntPosType IntPos[3]; }; - partbuf *partin, *partout; + partbuf *partin = NULL, *partout = NULL; size_t nimport = 0, nexport = 0; particle_data *P = Sp->P; @@ -1612,7 +1612,7 @@ void pm_periodic::pmforce_uniform_optimized_readout_forces_or_potential_zy(fft_r if(rep == 0) { - MPI_Alltoall(send_count, sizeof(size_t), MPI_BYTE, recv_count, sizeof(size_t), MPI_BYTE, Communicator); + myMPI_Alltoall(send_count, sizeof(size_t), MPI_BYTE, recv_count, sizeof(size_t), MPI_BYTE, Communicator); nimport = 0, nexport = 0; recv_offset[0] = send_offset[0] = 0; @@ -2348,9 +2348,6 @@ double pm_periodic::pmperiodic_tallbox_long_range_potential(double x, double y, double r = sqrt(x * x + y * y + z * z); - if(r == 0) - return 0; - double xx, yy, zz; switch(GRAVITY_TALLBOX) { @@ -2390,11 +2387,24 @@ double pm_periodic::pmperiodic_tallbox_long_range_potential(double x, double y, for(int nx = -qxmax; nx <= qxmax; nx++) for(int ny = -qymax; ny <= qymax; ny++) { - double dx = x - nx * BOXX; - double dy = y - ny * BOXY; - double r = sqrt(dx * dx + dy * dy + z * z); - if(r > 0) - sum1 += erfc(alpha * r) / r; + if(nx != 0 || ny != 0) + { + double dx = x - nx * BOXX; + double dy = y - ny * BOXY; + double r = sqrt(dx * dx + dy * dy + z * z); + if(r > 0) + sum1 += erfc(alpha * r) / r; + } + else + { + // in the nx/ny=0 case, correct for the short range force + double alpha_star = 0.5 / (((double)ASMTH) / PMGRID); + double u = alpha_star * r; + if(r > 0) + sum1 += (erfc(alpha * r) - erfc(u)) / r; + else + sum1 += 2.0 / sqrt(M_PI) * (alpha_star - alpha); + } } double alpha2 = alpha * alpha; @@ -2472,14 +2482,14 @@ void pm_periodic::calculate_power_spectra(int num) if(ThisTask == 0) { char buf[MAXLEN_PATH_EXTRA]; - sprintf(buf, "%s/powerspecs", All.OutputDir); + snprintf(buf, MAXLEN_PATH_EXTRA, "%s/powerspecs", All.OutputDir); mkdir(buf, 02755); } #ifdef ALT_NAMING - sprintf(power_spec_fname, "%s/powerspecs/powerspec_%06d.txt", All.OutputDir, num); + snprintf(power_spec_fname, MAXLEN_PATH_EXTRA, "%s/powerspecs/powerspec_%06d.txt", All.OutputDir, num); #else - sprintf(power_spec_fname, "%s/powerspecs/powerspec_%03d.txt", All.OutputDir, num); + snprintf(power_spec_fname, MAXLEN_PATH_EXTRA, "%s/powerspecs/powerspec_%03d.txt", All.OutputDir, num); #endif pmforce_do_powerspec(typeflag); /* calculate power spectrum for all particle types */ @@ -2501,9 +2511,9 @@ void pm_periodic::calculate_power_spectra(int num) typeflag[i] = 1; #ifdef ALT_NAMING - sprintf(power_spec_fname, "%s/powerspecs/powerspec_type%d_%06d.txt", All.OutputDir, i, num); + snprintf(power_spec_fname, MAXLEN_PATH_EXTRA, "%s/powerspecs/powerspec_type%d_%06d.txt", All.OutputDir, i, num); #else - sprintf(power_spec_fname, "%s/powerspecs/powerspec_type%d_%03d.txt", All.OutputDir, i, num); + snprintf(power_spec_fname, MAXLEN_PATH_EXTRA, "%s/powerspecs/powerspec_type%d_%03d.txt", All.OutputDir, i, num); #endif pmforce_do_powerspec(typeflag); /* calculate power spectrum for type i */ diff --git a/src/sort/cxxsort.h b/src/sort/cxxsort.h index 13d352ec429d347d32e0482e5d89c4d0dc02fae7..6ec8fb65d221b4202738a1ec9f1f3d0b68ae8cb8 100644 --- a/src/sort/cxxsort.h +++ b/src/sort/cxxsort.h @@ -11,6 +11,8 @@ #ifndef GADGET4_CXXSORT_H #define GADGET4_CXXSORT_H +#include "gadgetconfig.h" + #include <algorithm> #include "../data/allvars.h" diff --git a/src/sort/parallel_sort.h b/src/sort/parallel_sort.h index 7869b4174c136b15d1cefa7b86e6c34077ef8219..ba3ed87e77b5c5827b43fce9ecaf3ce2b6722776 100644 --- a/src/sort/parallel_sort.h +++ b/src/sort/parallel_sort.h @@ -12,9 +12,10 @@ #ifndef PARALLEL_SORT_H #define PARALLEL_SORT_H -#include "cxxsort.h" +#include "gadgetconfig.h" #include "../data/mymalloc.h" +#include "../sort/cxxsort.h" //#define CHECK_LOCAL_RANK @@ -312,7 +313,7 @@ inline double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm) /* now compute the global ranks by summing the local ranks */ /* Note: the last element in current_loc_rank is not defined. It will be summed by the last processor, and stored in the last * element of current_glob_rank */ - MPI_Alltoall(current_loc_rank, sizeof(size_t), MPI_BYTE, list, sizeof(size_t), MPI_BYTE, MPI_CommLocal); + myMPI_Alltoall(current_loc_rank, sizeof(size_t), MPI_BYTE, list, sizeof(size_t), MPI_BYTE, MPI_CommLocal); rank = 0; for(int j = 0; j < Local_NTask; j++) rank += list[j]; @@ -357,10 +358,11 @@ inline double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm) } } - MPI_Alltoall(source_range_len_list, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, MPI_CommLocal); - MPI_Alltoall(source_median_element_list, size, MPI_BYTE, median_element_list, size, MPI_BYTE, MPI_CommLocal); - MPI_Alltoall(source_tie_breaking_rank_list, sizeof(size_t), MPI_BYTE, tie_breaking_rank_list, sizeof(size_t), MPI_BYTE, - MPI_CommLocal); + myMPI_Alltoall(source_range_len_list, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, + MPI_CommLocal); + myMPI_Alltoall(source_median_element_list, size, MPI_BYTE, median_element_list, size, MPI_BYTE, MPI_CommLocal); + myMPI_Alltoall(source_tie_breaking_rank_list, sizeof(size_t), MPI_BYTE, tie_breaking_rank_list, sizeof(size_t), MPI_BYTE, + MPI_CommLocal); if(Local_ThisTask < Local_NTask - 1) { @@ -456,16 +458,10 @@ inline double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm) /* At this point we have found all the elements corresponding to the desired split points */ /* we can now go ahead and determine how many elements of the local CPU have to go to each other CPU */ - if(nmemb * size > (1LL << 31)) - Terminate("currently, local data must be smaller than 2 GB"); - /* note: to restrict this limitation, the send/recv count arrays have to made 64-bit, - * and the MPI data exchange though MPI_Alltoall has to be modified such that buffers > 2 GB become possible - */ - - int *send_count = (int *)Mem.mymalloc("send_count", Local_NTask * sizeof(int)); - int *recv_count = (int *)Mem.mymalloc("recv_count", Local_NTask * sizeof(int)); - int *send_offset = (int *)Mem.mymalloc("send_offset", Local_NTask * sizeof(int)); - int *recv_offset = (int *)Mem.mymalloc("recv_offset", Local_NTask * sizeof(int)); + size_t *send_count = (size_t *)Mem.mymalloc("send_count", Local_NTask * sizeof(size_t)); + size_t *recv_count = (size_t *)Mem.mymalloc("recv_count", Local_NTask * sizeof(size_t)); + size_t *send_offset = (size_t *)Mem.mymalloc("send_offset", Local_NTask * sizeof(size_t)); + size_t *recv_offset = (size_t *)Mem.mymalloc("recv_offset", Local_NTask * sizeof(size_t)); for(int i = 0; i < Local_NTask; i++) send_count[i] = 0; @@ -492,7 +488,7 @@ inline double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm) send_count[target]++; } - MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_CommLocal); + myMPI_Alltoall(send_count, sizeof(size_t), MPI_BYTE, recv_count, sizeof(size_t), MPI_BYTE, MPI_CommLocal); size_t nimport = 0; @@ -524,7 +520,7 @@ inline double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm) T *basetmp = (T *)Mem.mymalloc("basetmp", nmemb * size); /* exchange the data */ - MPI_Alltoallv(begin, send_count, send_offset, MPI_BYTE, basetmp, recv_count, recv_offset, MPI_BYTE, MPI_CommLocal); + myMPI_Alltoallv(begin, send_count, send_offset, basetmp, recv_count, recv_offset, sizeof(char), 1, MPI_CommLocal); memcpy(static_cast<void *>(begin), static_cast<void *>(basetmp), nmemb * size); Mem.myfree(basetmp); diff --git a/src/sph/density.cc b/src/sph/density.cc index d152b6779009830362b434bc16c8f77a06c12aea..5cd6380014717f052cfb38f41d2140aa0960ed75 100644 --- a/src/sph/density.cc +++ b/src/sph/density.cc @@ -551,12 +551,6 @@ void sph::density(int *list, int ntarget) } #endif -#ifdef TIMEDEP_ART_VISC - double dt = (Tp->P[target].getTimeBinHydro() ? (((integertime)1) << Tp->P[target].getTimeBinHydro()) : 0) * - All.Timebase_interval; - double dtime = All.cf_atime * dt / All.cf_atime_hubble_a; - SphP[target].set_viscosity_coefficient(dtime); -#endif #ifdef ADAPTIVE_HYDRO_SOFTENING Tp->P[target].setSofteningClass(Tp->get_softeningtype_for_hydro_particle(target)); #endif @@ -669,6 +663,17 @@ void sph::density(int *list, int ntarget) } while(ndensities > 0); +#ifdef TIMEDEP_ART_VISC + for(int i = 0; i < ntarget; i++) + { + int target = list[i]; + double dt = + (Tp->P[target].getTimeBinHydro() ? (((integertime)1) << Tp->P[target].getTimeBinHydro()) : 0) * All.Timebase_interval; + double dtime = All.cf_atime * dt / All.cf_atime_hubble_a; + Tp->SphP[target].set_viscosity_coefficient(dtime); + } +#endif + TIMER_START(CPU_DENSIMBALANCE); MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX, D->Communicator); diff --git a/src/sph/sph.h b/src/sph/sph.h index ac76000e33ba8893e680ef1b01aeb82d5575b2ab..18196572141ac3be9b98e059de13a02e6fbe81c8 100644 --- a/src/sph/sph.h +++ b/src/sph/sph.h @@ -12,6 +12,8 @@ #ifndef SPH_H #define SPH_H +#include "gadgetconfig.h" + #include "../mpi_utils/shared_mem_handler.h" #include "../ngbtree/ngbtree.h" diff --git a/src/subfind/subfind.cc b/src/subfind/subfind.cc index be21714bde8f4b0ffb11c4d05473767f04bb7ec6..1b8e42f2edc88afaf2b23d900ce094866b04f904 100644 --- a/src/subfind/subfind.cc +++ b/src/subfind/subfind.cc @@ -162,7 +162,7 @@ void fof<partset>::subfind_find_subhalos(int num, const char *basename, const ch for(int task = 1; task < NTask; task++) byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; - MPI_Allgatherv(locProcAssign, bytecounts[ThisTask], MPI_BYTE, ProcAssign, bytecounts, byteoffset, MPI_BYTE, Communicator); + myMPI_Allgatherv(locProcAssign, bytecounts[ThisTask], MPI_BYTE, ProcAssign, bytecounts, byteoffset, MPI_BYTE, Communicator); Mem.myfree(byteoffset); Mem.myfree(bytecounts); @@ -402,6 +402,7 @@ void fof<partset>::subfind_find_subhalos(int num, const char *basename, const ch else subfind_processing(&SubDomain, SERIAL_SUBFIND); /* we have several groups in full to be done by the local CPU */ } + MPI_Barrier(Communicator); double ti1 = Logs.second(); mpi_printf("SUBFIND: Processing overall took (total time=%g sec)\n", Logs.timediff(ti0, ti1)); @@ -687,7 +688,7 @@ void fof<partset>::subfind_assign_subhalo_offsettype(void) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -710,7 +711,7 @@ void fof<partset>::subfind_assign_subhalo_offsettype(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, + myMPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, &import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } @@ -726,7 +727,7 @@ void fof<partset>::subfind_assign_subhalo_offsettype(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, + myMPI_Sendrecv(&import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, &export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } @@ -817,7 +818,7 @@ void fof<partset>::subfind_redetermine_groupnr(void) if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -840,7 +841,7 @@ void fof<partset>::subfind_redetermine_groupnr(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, + myMPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, &import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } @@ -876,7 +877,7 @@ void fof<partset>::subfind_redetermine_groupnr(void) int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, + myMPI_Sendrecv(&import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, &export_group_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_info), MPI_BYTE, recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE); } diff --git a/src/subfind/subfind_density.cc b/src/subfind/subfind_density.cc index 4bee2aa8649526515dd88636be84c977b7cfa7be..6fafad8c40dc831fc5b5234e01fd4a28c86e677e 100644 --- a/src/subfind/subfind_density.cc +++ b/src/subfind/subfind_density.cc @@ -524,7 +524,7 @@ void fof<partset>::subfind_density_hsml_guess(void) /* set the initial guess for if(hsml_prev) Tp->PS[i].v.DM_Hsml = hsml_prev; else - Tp->PS[i].v.DM_Hsml = All.SofteningTable[Tp->P[i].getType()]; + Tp->PS[i].v.DM_Hsml = All.SofteningTable[All.SofteningClassOfPartType[Tp->P[i].getType()]]; } } } diff --git a/src/subfind/subfind_distribute.cc b/src/subfind/subfind_distribute.cc index 9314bd00182a7edb0d4065ad74aa3183ce9859f2..f4c3a3e52e98fbf3abbbb865115cac2c9f660ffe 100644 --- a/src/subfind/subfind_distribute.cc +++ b/src/subfind/subfind_distribute.cc @@ -57,7 +57,7 @@ void fof<partset>::subfind_distribute_groups(void) Send_count[target]++; } - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; int nexport = 0, nimport = 0; @@ -110,7 +110,7 @@ void fof<partset>::subfind_distribute_groups(void) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the group info */ - MPI_Sendrecv(&send_Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_properties), MPI_BYTE, recvTask, + myMPI_Sendrecv(&send_Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_properties), MPI_BYTE, recvTask, TAG_DENS_A, &Group[Ngroups + Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_properties), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE); } @@ -163,7 +163,7 @@ void fof<partset>::subfind_distribute_particles(MPI_Comm Communicator) } } - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); int nimport = 0, nexport = 0; Recv_offset[0] = 0, Send_offset[0] = 0; @@ -231,7 +231,7 @@ void fof<partset>::subfind_distribute_particles(MPI_Comm Communicator) { if(Recv_count[target] > 0) { - MPI_Irecv(Tp->P + Recv_offset[target], Recv_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA, + MPI_Irecv(Tp->P + Recv_offset[target], Recv_count[target] * sizeof(typename partset::pdata), MPI_BYTE, target, TAG_PDATA, Communicator, &requests[n_requests++]); MPI_Irecv(Tp->PS + Recv_offset[target], Recv_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY, Communicator, &requests[n_requests++]); @@ -247,8 +247,8 @@ void fof<partset>::subfind_distribute_particles(MPI_Comm Communicator) { if(Send_count[target] > 0) { - MPI_Issend(partBuf + Send_offset[target], Send_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA, - Communicator, &requests[n_requests++]); + MPI_Issend(partBuf + Send_offset[target], Send_count[target] * sizeof(typename partset::pdata), MPI_BYTE, target, + TAG_PDATA, Communicator, &requests[n_requests++]); MPI_Issend(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY, Communicator, &requests[n_requests++]); } @@ -267,13 +267,13 @@ void fof<partset>::subfind_distribute_particles(MPI_Comm Communicator) { if(Send_count[target] > 0 || Recv_count[target] > 0) { - MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA, - Tp->P + Recv_offset[target], Recv_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA, - Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(typename partset::pdata), MPI_BYTE, target, + TAG_PDATA, Tp->P + Recv_offset[target], Recv_count[target] * sizeof(typename partset::pdata), MPI_BYTE, + target, TAG_PDATA, Communicator, MPI_STATUS_IGNORE); - MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY, - Tp->PS + Recv_offset[target], Recv_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY, - Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY, + Tp->PS + Recv_offset[target], Recv_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY, + Communicator, MPI_STATUS_IGNORE); } } } diff --git a/src/subfind/subfind_history.cc b/src/subfind/subfind_history.cc index b709a2489027ec033b02da42bbde46105732f23d..45abc8ba0a121708a4b47499a7e47598a534ad03 100644 --- a/src/subfind/subfind_history.cc +++ b/src/subfind/subfind_history.cc @@ -167,7 +167,7 @@ void fof<partset>::subfind_hbt_single_group(domain<partset> *SubDomain, domain<p for(int i = 1; i < SubNTask; i++) offset[i] = offset[i - 1] + countlist[i - 1]; - MPI_Allgatherv(loc_candidates, count, MPI_BYTE, all_candidates, countlist, offset, MPI_BYTE, SubComm); + myMPI_Allgatherv(loc_candidates, count, MPI_BYTE, all_candidates, countlist, offset, MPI_BYTE, SubComm); /* sort the candidates by subhalonr */ mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr); @@ -696,7 +696,7 @@ void fof<partset>::subfind_hbt_single_group(domain<partset> *SubDomain, domain<p hbt_subhalo_t *all_subhalo_list = (hbt_subhalo_t *)Mem.mymalloc("all_subhalo_list", countall * sizeof(hbt_subhalo_t)); - MPI_Allgatherv(subhalo_list, sizelocsubhalolist, MPI_BYTE, all_subhalo_list, countlist, offset, MPI_BYTE, SubComm); + myMPI_Allgatherv(subhalo_list, sizelocsubhalolist, MPI_BYTE, all_subhalo_list, countlist, offset, MPI_BYTE, SubComm); /* sort locally */ mycxxsort(all_subhalo_list, all_subhalo_list + countall, subfind_hbt_compare_subhalolist_prevsubhalonr); diff --git a/src/subfind/subfind_orphanids.cc b/src/subfind/subfind_orphanids.cc index 3ae109ccf516d89e915346a23566686b9111fe0b..4acd73cab6166cf20f6806459237442974a593ce 100644 --- a/src/subfind/subfind_orphanids.cc +++ b/src/subfind/subfind_orphanids.cc @@ -93,7 +93,7 @@ void fof<simparticles>::subfind_match_ids_of_previously_most_bound_ids(simpartic if(mode == 0) { - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); Recv_offset[0] = Send_offset[0] = 0; for(int j = 0; j < NTask; j++) { @@ -116,9 +116,9 @@ void fof<simparticles>::subfind_match_ids_of_previously_most_bound_ids(simpartic int recvTask = ThisTask ^ ngrp; if(recvTask < NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(MyIDType), MPI_BYTE, recvTask, TAG_DENS_B, - &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(MyIDType), MPI_BYTE, recvTask, TAG_DENS_B, - Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(MyIDType), MPI_BYTE, recvTask, TAG_DENS_B, + &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(MyIDType), MPI_BYTE, recvTask, TAG_DENS_B, + Communicator, MPI_STATUS_IGNORE); } /* incoming data should already be sorted, so now do the match */ diff --git a/src/subfind/subfind_readid_io.cc b/src/subfind/subfind_readid_io.cc index bf6847c2d28dbe114be5ab03f4fdd9ca7fdb1a2d..ce4c226bbaf70556740cff1f7f807e0787a70f27 100644 --- a/src/subfind/subfind_readid_io.cc +++ b/src/subfind/subfind_readid_io.cc @@ -48,7 +48,7 @@ subreadid_io::subreadid_io(idstoredata *IdStore_ptr, MPI_Comm comm, int format) this->header_size = sizeof(header); this->header_buf = &header; this->type_of_file = FILE_IS_SNAPSHOT; - sprintf(this->info, "MERGERTREE: reading snapshot IDs"); + snprintf(this->info, MAXLEN_PATH, "MERGERTREE: reading snapshot IDs"); init_field("ID ", "ParticleIDs", MEM_MY_ID_TYPE, FILE_MY_ID_TYPE, READ_IF_PRESENT, 1, A_IDS, IdStore->ID, NULL, ALL_TYPES, 0, 0, 0, 0, 0, 0, 0); @@ -82,11 +82,13 @@ void subreadid_io::previously_bound_read_snap_ids(int num) char fname[MAXLEN_PATH_EXTRA], fname_multiple[MAXLEN_PATH_EXTRA]; #ifdef ALT_NAMING - sprintf(fname_multiple, "%s/snapdir_%06d/%s-prevmostboundonly_%06d", All.OutputDir, num, All.SnapshotFileBase, num); - sprintf(fname, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/snapdir_%06d/%s-prevmostboundonly_%06d", All.OutputDir, num, All.SnapshotFileBase, + num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%06d", All.OutputDir, All.SnapshotFileBase, num); #else - sprintf(fname_multiple, "%s/snapdir_%03d/%s-prevmostboundonly_%03d", All.OutputDir, num, All.SnapshotFileBase, num); - sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); + snprintf(fname_multiple, MAXLEN_PATH_EXTRA, "%s/snapdir_%03d/%s-prevmostboundonly_%03d", All.OutputDir, num, All.SnapshotFileBase, + num); + snprintf(fname, MAXLEN_PATH_EXTRA, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); #endif TIMER_START(CPU_SNAPSHOT); @@ -239,7 +241,7 @@ void subreadid_io::set_filenr_in_header(int numfiles) { header.num_files = numfi void subreadid_io::read_increase_numbers(int type, int n_for_this_task) { IdStore->NumPart += n_for_this_task; } -void subreadid_io::get_datagroup_name(int type, char *buf) { sprintf(buf, "/PartType%d", type); } +void subreadid_io::get_datagroup_name(int type, char *buf) { snprintf(buf, MAXLEN_PATH, "/PartType%d", type); } int subreadid_io::get_type_of_element(int index) { return 0; /* empty */ } diff --git a/src/subfind/subfind_treepotential.cc b/src/subfind/subfind_treepotential.cc index f478c749b3554bfba358caf26b2e2a371e8e4348..c3d4c2d7c9e547efeda2ba1bcbfbd5a076cf7ded 100644 --- a/src/subfind/subfind_treepotential.cc +++ b/src/subfind/subfind_treepotential.cc @@ -161,7 +161,7 @@ class potdata_comm : public generic_comm<potdata_in, potdata_out, T_tree, T_doma nop = Tree->get_nodep(no, shmrank); - if(nop->level == 0) + if(nop->level <= LEVEL_ALWAYS_OPEN) { /* we always open the root node (its full node length couldn't be stored in the integer type */ no = nop->nextnode; diff --git a/src/system/pinning.h b/src/system/pinning.h index 27cbce0fbd052534793cf906bb876ff945204299..77fe441567205dcf81f2f43c456573f703bf8dc9 100644 --- a/src/system/pinning.h +++ b/src/system/pinning.h @@ -12,6 +12,8 @@ #ifndef PINNING_H #define PINNING_H +#include "gadgetconfig.h" + #include <gsl/gsl_rng.h> #include <math.h> #include <mpi.h> @@ -24,7 +26,6 @@ #include "../main/main.h" #include "../mpi_utils/setcomm.h" #include "../system/system.h" -#include "gadgetconfig.h" /*! \file pinning.c * \brief examines cpu topology and binds processes and threads to cores diff --git a/src/system/system.cc b/src/system/system.cc index 8530d80952db8eca18e095deda04acef842b36f9..484b8ce86cae5afc6661f58c06817d0798ea7a46 100644 --- a/src/system/system.cc +++ b/src/system/system.cc @@ -218,7 +218,7 @@ void sim::mpi_report_comittable_memory(void) double avgsize[7]; int i, imem, mintask[7], maxtask[7]; long long Mem[7]; - char label[512]; + char label[MAXLEN_PATH]; Mem[0] = report_comittable_memory(&Mem[1], &Mem[2], &Mem[3], &Mem[4]); Mem[5] = Mem[1] - Mem[0]; @@ -262,25 +262,25 @@ void sim::mpi_report_comittable_memory(void) switch(imem) { case 0: - sprintf(label, "AvailMem"); + snprintf(label, MAXLEN_PATH, "AvailMem"); break; case 1: - sprintf(label, "Total Mem"); + snprintf(label, MAXLEN_PATH, "Total Mem"); break; case 2: - sprintf(label, "Committed_AS"); + snprintf(label, MAXLEN_PATH, "Committed_AS"); break; case 3: - sprintf(label, "SwapTotal"); + snprintf(label, MAXLEN_PATH, "SwapTotal"); break; case 4: - sprintf(label, "SwapFree"); + snprintf(label, MAXLEN_PATH, "SwapFree"); break; case 5: - sprintf(label, "AllocMem"); + snprintf(label, MAXLEN_PATH, "AllocMem"); break; case 6: - sprintf(label, "avail /dev/shm"); + snprintf(label, MAXLEN_PATH, "avail /dev/shm"); break; } printf("%s:\t Largest = %10.2f Mb (on task=%4d), Smallest = %10.2f Mb (on task=%4d), Average = %10.2f Mb\n", label, diff --git a/src/system/system.h b/src/system/system.h index 882325ee481044ac23fbe32807e0885619d1eced..f452e2411375a0e69cdfa70de0205ba7d51e4f0a 100644 --- a/src/system/system.h +++ b/src/system/system.h @@ -12,6 +12,8 @@ #ifndef SYSTEM_H #define SYSTEM_H +#include "gadgetconfig.h" + #include <gsl/gsl_rng.h> #include <stdio.h> diff --git a/src/time_integration/driftfac.cc b/src/time_integration/driftfac.cc index 0197eed6b2396e1dfde657d892efdf5a68b46417..dab37b4c58079e281814055193f89602bf2e5450 100644 --- a/src/time_integration/driftfac.cc +++ b/src/time_integration/driftfac.cc @@ -13,6 +13,7 @@ #include <gsl/gsl_integration.h> #include <gsl/gsl_math.h> +#include <gsl/gsl_odeiv2.h> #include <math.h> #include <mpi.h> #include <stdio.h> @@ -266,3 +267,63 @@ integertime driftfac::get_gravkick_factor_inverse(double fac) return time0; } + +double driftfac::linear_growth_factor(const double astart, const double aend) +{ + // if we have a simple cosmology without radiation, with matter, a cosmological constant, and flat space, + // we can apply the simple integral solution: + + /// return linear_growth_simple(aend) / linear_growth_simple(astart); + + // but in the more general case, we need to integrate the ODE directly to find the correct + // growth factor. + + return linear_growth_ode(aend) / linear_growth_ode(astart); +} + +double driftfac::linear_growth_ode(const double a) +{ + gsl_odeiv2_system sys = {growth_ode_int, NULL, 2, NULL}; + gsl_odeiv2_driver *d = gsl_odeiv2_driver_alloc_y_new(&sys, gsl_odeiv2_step_rk8pd, 1e-6, 1e-6, 0.0); + + double amin = 0.00001; // start time + + double epsilon = 3 + 2 * amin * E_of_a_diff(amin) / E_of_a(amin); + double w = 1 - get_OmegaMatter_a(amin); + double n = 0.25 * (-1 - epsilon + sqrt(pow(1 + epsilon, 2) + 24 * (1 - w))); + + // ICs + double D[2] = {1.0, n / amin}; + double acurrent = amin; + + int status = gsl_odeiv2_driver_apply(d, &acurrent, a, D); + + if(status != GSL_SUCCESS) + Terminate("error, ODE return value=%d\n", status); + + gsl_odeiv2_driver_free(d); + + return D[0]; +} + +double driftfac::linear_growth_simple(const double a) +{ + /* this simple integral solution is only correct for LCDM variants, i.e. + * no radition, flat space, and simple cosmological constant + */ + const int worksize = WORKSIZE; + + double result, abserr; + gsl_function F; + + gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(worksize); + F.function = &growth_simple_int; + + gsl_integration_qag(&F, 0, a, 0, 1.0e-8, worksize, GSL_INTEG_GAUSS41, workspace, &result, &abserr); + + gsl_integration_workspace_free(workspace); + + const double hubble_a = hubble_function(a) / All.Hubble; + + return hubble_a * result; +} diff --git a/src/time_integration/driftfac.h b/src/time_integration/driftfac.h index 8e53e59006593ae428ce6dcc53f502fc63481d1c..1cd0027f5778b07f2e2403fcfd626adb82790cbc 100644 --- a/src/time_integration/driftfac.h +++ b/src/time_integration/driftfac.h @@ -12,6 +12,8 @@ #ifndef DRIFTFAC_H #define DRIFTFAC_H +#include "gadgetconfig.h" + #include <gsl/gsl_integration.h> #include <gsl/gsl_math.h> #include <math.h> @@ -22,7 +24,6 @@ #include "../data/dtypes.h" #include "../main/main.h" -#include "gadgetconfig.h" class driftfac { @@ -36,23 +37,41 @@ class driftfac double get_scalefactor_for_comoving_distance(double dist); integertime get_gravkick_factor_inverse(double fac); - static double hubble_function(double a) + static double E_of_a(double a) { #if defined(RADIATION) && !defined(SMOOTHMATTER) - double hubble_a = All.Omega0 / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda - All.OmegaR) / (a * a) + All.OmegaLambda + All.OmegaR / (a * a * a * a); + return sqrt(All.Omega0 / pow(a, 3) + All.OmegaCurvature / pow(a, 2) + All.OmegaLambda + All.OmegaR / pow(a, 4)); #elif !defined(RADIATION) && defined(SMOOTHMATTER) - double hubble_a = (All.Omega0 + All.OmegaSmooth) / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth) / (a * a) + All.OmegaLambda; + return sqrt((All.Omega0 + All.OmegaSmooth) / pow(a, 3) + All.OmegaCurvature / pow(a, 2) + All.OmegaLambda; #elif defined(RADIATION) && defined(SMOOTHMATTER) - double hubble_a = (All.Omega0 + All.OmegaSmooth) / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda - All.OmegaSmooth - All.OmegaR) / (a * a) + All.OmegaLambda + All.OmegaR / (a * a * a * a); + return sqrt((All.Omega0 + All.OmegaSmooth) / pow(a, 3) + All.OmegaCurvature / pow(a, 2) + All.OmegaLambda + All.OmegaR / pow(a, 4)); #else - double hubble_a = All.Omega0 / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda) / (a * a) + All.OmegaLambda; + return sqrt(All.Omega0 / pow(a, 3) + All.OmegaCurvature / pow(a, 2) + All.OmegaLambda); #endif + } - hubble_a = All.Hubble * sqrt(hubble_a); - - return hubble_a; + static double E_of_a_diff(double a) + { +#if defined(RADIATION) && !defined(SMOOTHMATTER) + return (-3 * All.Omega0 / pow(a, 4) + -2 * All.OmegaCurvature / pow(a, 3) + -4 * All.OmegaR / pow(a, 5)) / (2 * E_of_a(a)); +#elif !defined(RADIATION) && defined(SMOOTHMATTER) + return (-3 * (All.Omega0 + All.OmegaSmooth) / pow(a, 4) + -2 * All.OmegaCurvature / pow(a, 3)) / (2 * E_of_a(a)); +#elif defined(RADIATION) && defined(SMOOTHMATTER) + return (-3 * (All.Omega0 + All.OmegaSmooth) / pow(a, 4) + -2 * All.OmegaCurvature / pow(a, 3) + -4 * All.OmegaR / pow(a, 5)) / (2 * E_of_a(a)); +#else + return (-3 * All.Omega0 / pow(a, 4) + -2 * All.OmegaCurvature / pow(a, 3)) / (2 * E_of_a(a)); +#endif } + double linear_growth_factor(double astart, double aend); + + static double get_Omega0_a(double a) { return All.Omega0 / pow(a, 3) * pow(All.Hubble / hubble_function(a), 2); } + static double get_OmegaLambda_a(double a) { return All.OmegaLambda * pow(All.Hubble / hubble_function(a), 2); } + static double get_OmegaCurvature_a(double a) { return All.OmegaCurvature / pow(a, 2) * pow(All.Hubble / hubble_function(a), 2); } + static double get_OmegaMatter_a(double a) { return get_Omega0_a(a); } + + static double hubble_function(double a) { return All.Hubble * E_of_a(a); } + private: #define DRIFT_TABLE_LENGTH 1000 @@ -68,6 +87,9 @@ class driftfac double logTimeBegin; double logTimeMax; + double linear_growth_simple(double a); + double linear_growth_ode(double a); + static double drift_integ(double a, void *param) { double h = hubble_function(a); @@ -88,6 +110,21 @@ class driftfac return 1 / (h * pow(a, 3 * GAMMA_MINUS1) * a); } + + static double growth_simple_int(const double a, void *param) + { + if(a == 0) + return 0; + else + return 1.0 / pow(a * hubble_function(a) / All.Hubble, 3); + } + + static int growth_ode_int(double a, const double y[], double dyda[], void *params) + { + dyda[0] = y[1]; + dyda[1] = -(3.0 / a + E_of_a_diff(a) / E_of_a(a)) * y[1] + 1.5 * get_OmegaMatter_a(a) / pow(a, 2) * y[0]; + return GSL_SUCCESS; + } }; extern driftfac Driftfac; diff --git a/src/time_integration/kicks.cc b/src/time_integration/kicks.cc index 1d552ae959efad629c2f5178e941caf61873a140..5f12f475d66078ac111c8f368623b3b5e164c644 100644 --- a/src/time_integration/kicks.cc +++ b/src/time_integration/kicks.cc @@ -278,7 +278,7 @@ void sim::do_gravity_step_second_half(void) char fullmark[8]; if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) - sprintf(fullmark, "(*)"); + snprintf(fullmark, 8, "(*)"); else fullmark[0] = 0; diff --git a/src/time_integration/timestep.cc b/src/time_integration/timestep.cc index c6087b717e80eb6b47db56436ee602e345350de9..970e2b109b55c89116d53abcc9c19ab56cb5e710 100644 --- a/src/time_integration/timestep.cc +++ b/src/time_integration/timestep.cc @@ -323,7 +323,8 @@ integertime simparticles::get_timestep_hydro(int p /*!< particle index */) { if(SphP[p].Sfr > 0) { - double dt_sfr = 0.1 * P[p].getMass() / SphP[p].Sfr; + double dt_sfr = + 0.1 * P[p].getMass() / (SphP[p].Sfr / ((All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR))); if(dt_sfr < dt) dt = dt_sfr; } diff --git a/src/tree/tree.cc b/src/tree/tree.cc index a487a55824f9a367f59b4875d28516ca8ce38086..7e06f7a12df2e7bfe307a8e162799d8a8cf30833 100644 --- a/src/tree/tree.cc +++ b/src/tree/tree.cc @@ -290,7 +290,7 @@ int tree<node, partset, point_data, foreign_point_data>::treebuild_construct(voi Send_count[task]++; } - MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, D->Communicator); + myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, D->Communicator); NumPartImported = 0; NumPartExported = 0; @@ -359,9 +359,9 @@ int tree<node, partset, point_data, foreign_point_data>::treebuild_construct(voi int recvTask = D->ThisTask ^ ngrp; if(recvTask < D->NTask) if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) - MPI_Sendrecv(&export_Points[Send_offset[recvTask]], Send_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask, - TAG_DENS_A, &Points[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask, - TAG_DENS_A, D->Communicator, MPI_STATUS_IGNORE); + myMPI_Sendrecv(&export_Points[Send_offset[recvTask]], Send_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask, + TAG_DENS_A, &Points[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask, + TAG_DENS_A, D->Communicator, MPI_STATUS_IGNORE); } Mem.myfree(export_Points); @@ -703,9 +703,10 @@ int tree<node, partset, point_data, foreign_point_data>::create_empty_nodes( { if(All.TreeAllocFactor > MAX_TREE_ALLOC_FACTOR) { - char buf[MAXLEN_PATH]; - sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", D->ThisTask, - D->NTopnodes); + char buf[MAXLEN_PATH_EXTRA]; + snprintf(buf, MAXLEN_PATH_EXTRA, + "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", D->ThisTask, + D->NTopnodes); Tp->dump_particles(); Terminate(buf); } @@ -957,6 +958,10 @@ void tree<node, partset, point_data, foreign_point_data>::prepare_shared_memory_ Shmem.inform_offset_table(Foreign_Nodes); Shmem.inform_offset_table(Foreign_Points); + MPI_Barrier(Shmem.SharedMemComm); // this barrier is in principle superfluous, but on some systems, + // the MPI_Gather in prepare_offset_table() can return prematurely + // on the target rank before all data has arrived + /* the following is needed to make sure that the shared memory handler on different nodes is already properly initialized */ MPI_Barrier(D->Communicator); } diff --git a/src/tree/tree.h b/src/tree/tree.h index 5385a880d2f82817226262434623fcce535306d6..873c52e93f38c86a1af97fb6a453ca78fa340f54 100644 --- a/src/tree/tree.h +++ b/src/tree/tree.h @@ -23,6 +23,8 @@ #define TREE_MAX_ITER 100 +#include "gadgetconfig.h" + #include <mpi.h> #include "../domain/domain.h" @@ -159,8 +161,8 @@ class tree int NextFreeNode; MPI_Comm TreeSharedMemComm; - int TreeSharedMem_ThisTask; - int TreeSharedMem_NTask; + int TreeSharedMem_ThisTask = 0; + int TreeSharedMem_NTask = 0; int TreeInfoHandle; diff --git a/src/vectorclass/instrset.h b/src/vectorclass/instrset.h index c025df3f3a4f460c5f72434711200bf46be62e76..a0c3d7cf470bfa84a768225ac717da17b2c39f11 100644 --- a/src/vectorclass/instrset.h +++ b/src/vectorclass/instrset.h @@ -20,6 +20,8 @@ #ifndef INSTRSET_H #define INSTRSET_H 125 +#include "gadgetconfig.h" + // Detect 64 bit mode #if(defined(_M_AMD64) || defined(_M_X64) || defined(__amd64)) && !defined(__x86_64__) #define __x86_64__ 1 // There are many different macros for this, decide on only one diff --git a/src/vectorclass/instrset_detect.cpp b/src/vectorclass/instrset_detect.cpp index a0cf8dfd8a50e0d4e93e519194e52dce43275c97..760762ae653fcdfca74ef9accdf7efc90d4c35a8 100644 --- a/src/vectorclass/instrset_detect.cpp +++ b/src/vectorclass/instrset_detect.cpp @@ -10,6 +10,8 @@ * (c) Copyright 2012-2017 GNU General Public License http://www.gnu.org/licenses \*****************************************************************************/ +#include "gadgetconfig.h" + #include "instrset.h" #ifdef VCL_NAMESPACE