p2p_distr_mpi.hpp 50 KB
Newer Older
1
2
3
4
/******************************************************************************
*                                                                             *
*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
*                                                                             *
Cristian Lalescu's avatar
Cristian Lalescu committed
5
*  This file is part of TurTLE.                                               *
6
*                                                                             *
Cristian Lalescu's avatar
Cristian Lalescu committed
7
*  TurTLE is free software: you can redistribute it and/or modify             *
8
9
10
11
*  it under the terms of the GNU General Public License as published          *
*  by the Free Software Foundation, either version 3 of the License,          *
*  or (at your option) any later version.                                     *
*                                                                             *
Cristian Lalescu's avatar
Cristian Lalescu committed
12
*  TurTLE is distributed in the hope that it will be useful,                  *
13
14
15
16
17
*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
*  GNU General Public License for more details.                               *
*                                                                             *
*  You should have received a copy of the GNU General Public License          *
Cristian Lalescu's avatar
Cristian Lalescu committed
18
*  along with TurTLE.  If not, see <http://www.gnu.org/licenses/>             *
19
20
21
22
23
24
25
*                                                                             *
* Contact: Cristian.Lalescu@ds.mpg.de                                         *
*                                                                             *
******************************************************************************/



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#ifndef P2P_DISTR_MPI_HPP
#define P2P_DISTR_MPI_HPP

#include <mpi.h>

#include <vector>
#include <memory>
#include <cassert>

#include <type_traits>
#include <omp.h>
#include <algorithm>

#include "scope_timer.hpp"
#include "particles_utils.hpp"
#include "p2p_tree.hpp"
42
#include "lock_free_bool_array.hpp"
43
44
45
46

template <class partsize_t, class real_number>
class p2p_distr_mpi {
protected:
Cristian Lalescu's avatar
Cristian Lalescu committed
47
    static const int MaxNbRhs = 10;
48
49
50
51

    enum MpiTag{
        TAG_NB_PARTICLES,
        TAG_POSITION_PARTICLES,
52
        TAG_INDEX_PARTICLES,
53
54
55
56
57
58
59
60
        TAG_RESULT_PARTICLES,
    };

    struct NeighborDescriptor{
        partsize_t nbParticlesToExchange;
        int destProc;
        int nbLevelsToExchange;
        bool isRecv;
61
        int nbReceived;
62
63
64
65

        std::unique_ptr<real_number[]> toRecvAndMerge;
        std::unique_ptr<real_number[]> toCompute;
        std::unique_ptr<real_number[]> results;
66
        std::unique_ptr<partsize_t[]> indexes;
67
68
69
    };

    enum Action{
70
        NOTHING_TODO = 512,
71
72
73
        RECV_PARTICLES,
        COMPUTE_PARTICLES,
        RELEASE_BUFFER_PARTICLES,
74
        MERGE_PARTICLES
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
    };

    MPI_Comm current_com;

    int my_rank;
    int nb_processes;
    int nb_processes_involved;

    const std::pair<int,int> current_partition_interval;
    const int current_partition_size;
    const std::array<size_t,3> field_grid_dim;

    std::unique_ptr<int[]> partition_interval_size_per_proc;
    std::unique_ptr<int[]> partition_interval_offset_per_proc;

    std::unique_ptr<partsize_t[]> current_offset_particles_for_partition;

    std::vector<std::pair<Action,int>> whatNext;
    std::vector<MPI_Request> mpiRequests;
    std::vector<NeighborDescriptor> neigDescriptors;

    std::array<real_number,3> spatial_box_width;
    std::array<real_number,3> spatial_box_offset;

99
    const real_number cutoff_radius_compute;
100
    const int nb_cells_factor;
101
102
103
    const real_number cutoff_radius;
    std::array<long int,3> nb_cell_levels;

104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
    template <class DataType, int sizeElement>
    static void permute_copy(const partsize_t offsetIdx, const partsize_t nbElements,
                             const std::pair<long int,partsize_t> permutation[],
                             DataType data[], std::vector<unsigned char>* buffer){
        buffer->resize(nbElements*sizeof(DataType)*sizeElement);
        DataType* dataBuffer = reinterpret_cast<DataType*>(buffer->data());

        // Permute
        for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){
            const partsize_t srcData = permutation[idxPart].second;
            const partsize_t destData = idxPart;
            for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){
                dataBuffer[destData*sizeElement + idxVal]
                        = data[srcData*sizeElement + idxVal];
            }
        }

        // Copy back
        for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){
            const partsize_t srcData = idxPart;
            const partsize_t destData = idxPart+offsetIdx;
            for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){
                data[destData*sizeElement + idxVal]
                        = dataBuffer[srcData*sizeElement + idxVal];
            }
        }
    }

132
    static int foundGridFactor(const real_number in_cutoff_radius, const std::array<real_number,3>& in_spatial_box_width){
133
        int idx_factor = 1;
134
        while(in_cutoff_radius <= in_spatial_box_width[IDXC_Z]/real_number(idx_factor+1)){
135
136
            idx_factor += 1;
        }
137
        return idx_factor;
138
139
    }

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
public:
    ////////////////////////////////////////////////////////////////////////////

    p2p_distr_mpi(MPI_Comm in_current_com,
                     const std::pair<int,int>& in_current_partitions,
                     const std::array<size_t,3>& in_field_grid_dim,
                     const std::array<real_number,3>& in_spatial_box_width,
                     const std::array<real_number,3>& in_spatial_box_offset,
                     const real_number in_cutoff_radius)
        : current_com(in_current_com),
            my_rank(-1), nb_processes(-1),nb_processes_involved(-1),
            current_partition_interval(in_current_partitions),
            current_partition_size(current_partition_interval.second-current_partition_interval.first),
            field_grid_dim(in_field_grid_dim),
            spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset),
155
            cutoff_radius_compute(in_cutoff_radius),
156
            nb_cells_factor(foundGridFactor(in_cutoff_radius, in_spatial_box_width)),
157
            cutoff_radius(in_spatial_box_width[IDXC_Z]/real_number(nb_cells_factor)){
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

        AssertMpi(MPI_Comm_rank(current_com, &my_rank));
        AssertMpi(MPI_Comm_size(current_com, &nb_processes));

        partition_interval_size_per_proc.reset(new int[nb_processes]);
        AssertMpi( MPI_Allgather( const_cast<int*>(&current_partition_size), 1, MPI_INT,
                                  partition_interval_size_per_proc.get(), 1, MPI_INT,
                                  current_com) );
        assert(partition_interval_size_per_proc[my_rank] == current_partition_size);

        partition_interval_offset_per_proc.reset(new int[nb_processes+1]);
        partition_interval_offset_per_proc[0] = 0;
        for(int idxProc = 0 ; idxProc < nb_processes ; ++idxProc){
            partition_interval_offset_per_proc[idxProc+1] = partition_interval_offset_per_proc[idxProc] + partition_interval_size_per_proc[idxProc];
        }

        current_offset_particles_for_partition.reset(new partsize_t[current_partition_size+1]);

        nb_processes_involved = nb_processes;
        while(nb_processes_involved != 0 && partition_interval_size_per_proc[nb_processes_involved-1] == 0){
            nb_processes_involved -= 1;
        }
        assert(nb_processes_involved != 0);
        for(int idx_proc_involved = 0 ; idx_proc_involved < nb_processes_involved ; ++idx_proc_involved){
            assert(partition_interval_size_per_proc[idx_proc_involved] != 0);
        }

185
        assert(int(field_grid_dim[IDXC_Z]) == partition_interval_offset_per_proc[nb_processes_involved]);
186

187
188
189
        nb_cell_levels[IDXC_X] = nb_cells_factor;
        nb_cell_levels[IDXC_Y] = nb_cells_factor;
        nb_cell_levels[IDXC_Z] = nb_cells_factor;
190
191
192
193
194
195
    }

    virtual ~p2p_distr_mpi(){}

    ////////////////////////////////////////////////////////////////////////////

196
197
198
199
200
201
202
203
    int getGridFactor() const{
        return nb_cells_factor;
    }

    real_number getGridCutoff() const{
        return cutoff_radius;
    }

204
    long int get_cell_coord_x_from_index(const long int index) const{
205
        return index % nb_cell_levels[IDXC_X];
206
207
208
    }

    long int get_cell_coord_y_from_index(const long int index) const{
209
210
        return (index % (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]))
                / nb_cell_levels[IDXC_X];
211
212
213
    }

    long int get_cell_coord_z_from_index(const long int index) const{
214
        return index / (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]);
215
216
217
    }

    long int first_cell_level_proc(const int dest_proc) const{
218
        const real_number field_section_width_z = spatial_box_width[IDXC_Z]/real_number(field_grid_dim[IDXC_Z]);
219
220
221
222
        return static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc]))/cutoff_radius);
    }

    long int last_cell_level_proc(const int dest_proc) const{
223
        const real_number field_section_width_z = spatial_box_width[IDXC_Z]/real_number(field_grid_dim[IDXC_Z]);
224
        const long int limite = static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc+1])
225
                                     - std::numeric_limits<real_number>::epsilon())/cutoff_radius);
226
227
228
229
230
        if(static_cast<real_number>(limite)*cutoff_radius
                == field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc+1])){
            return limite-1;
        }
        return limite;
231
232
    }

Cristian Lalescu's avatar
Cristian Lalescu committed
233
    real_number apply_pbc(real_number pos, IDX_COMPONENT_3D dim) const{
234
235
236
237
238
239
240
241
242
        while( pos < spatial_box_offset[dim] ){
            pos += spatial_box_width[dim];
        }
        while( spatial_box_width[dim]+spatial_box_offset[dim] <= pos){
            pos -= spatial_box_width[dim];
        }
        return pos;
    }

243
244
    std::array<long int,3> get_cell_coordinate(const real_number pos_x, const real_number pos_y,
                                               const real_number pos_z) const {
245
246
247
        const real_number diff_x = apply_pbc(pos_x,IDXC_X) - spatial_box_offset[IDXC_X];
        const real_number diff_y = apply_pbc(pos_y,IDXC_Y) - spatial_box_offset[IDXC_Y];
        const real_number diff_z = apply_pbc(pos_z,IDXC_Z) - spatial_box_offset[IDXC_Z];
248
        std::array<long int,3> coord;
249
250
251
        coord[IDXC_X] = static_cast<long int>(diff_x/cutoff_radius);
        coord[IDXC_Y] = static_cast<long int>(diff_y/cutoff_radius);
        coord[IDXC_Z] = static_cast<long int>(diff_z/cutoff_radius);
252
253
254
255
256
257
        return coord;
    }

    long int get_cell_idx(const real_number pos_x, const real_number pos_y,
                          const real_number pos_z) const {
        std::array<long int,3> coord = get_cell_coordinate(pos_x, pos_y, pos_z);
258
        return ((coord[IDXC_Z]*nb_cell_levels[IDXC_Y])+coord[IDXC_Y])*nb_cell_levels[IDXC_X]+coord[IDXC_X];
259
260
261
    }

    real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1,
Berenger Bramas's avatar
Berenger Bramas committed
262
263
                                    const real_number x2, const real_number y2, const real_number z2,
                                    const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const {
264
        real_number diff_x = std::abs(apply_pbc(x1,IDXC_X)-apply_pbc(x2,IDXC_X)+xshift_coef*spatial_box_width[IDXC_X]);
Berenger Bramas's avatar
Berenger Bramas committed
265
        assert(diff_x <= 2*cutoff_radius);
266

267
        real_number diff_y = std::abs(apply_pbc(y1,IDXC_X)-apply_pbc(y2,IDXC_X)+yshift_coef*spatial_box_width[IDXC_Y]);
Berenger Bramas's avatar
Berenger Bramas committed
268
        assert(diff_y <= 2*cutoff_radius);
269

270
        real_number diff_z = std::abs(apply_pbc(z1,IDXC_X)-apply_pbc(z2,IDXC_X)+zshift_coef*spatial_box_width[IDXC_Z]);
Berenger Bramas's avatar
Berenger Bramas committed
271
        assert(diff_z <= 2*cutoff_radius);
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
272
273
274

        return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z);
    }
275

276
    template <class computer_class, int size_particle_positions, int size_particle_rhs>
277
278
279
    void compute_distr(computer_class& in_computer,
                       const partsize_t current_my_nb_particles_per_partition[],
                       real_number particles_positions[],
280
                       std::unique_ptr<real_number[]> particles_current_rhs[], const int nb_rhs,
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
                       partsize_t inout_index_particles[]){
        TIMEZONE("compute_distr");

        // Some processes might not be involved
        if(nb_processes_involved <= my_rank){
            return;
        }

        const long int my_top_z_cell_level = last_cell_level_proc(my_rank);
        const long int my_down_z_cell_level = first_cell_level_proc(my_rank);
        const long int my_nb_cell_levels = 1+my_top_z_cell_level-my_down_z_cell_level;

        current_offset_particles_for_partition[0] = 0;
        partsize_t myTotalNbParticles = 0;
        for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
            myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition];
            current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition];
        }

        // Compute box idx for each particle
        std::unique_ptr<long int[]> particles_coord(new long int[current_offset_particles_for_partition[current_partition_size]]);

        {
            for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
                #pragma omp parallel for schedule(static)
                for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){
307
308
309
                    particles_coord[idxPart] = get_cell_idx(particles_positions[(idxPart)*size_particle_positions + IDXC_X],
                                                                              particles_positions[(idxPart)*size_particle_positions + IDXC_Y],
                                                                              particles_positions[(idxPart)*size_particle_positions + IDXC_Z]);
310
311
312
313
314
                    assert(my_down_z_cell_level <= get_cell_coord_z_from_index(particles_coord[idxPart]));
                    assert(get_cell_coord_z_from_index(particles_coord[idxPart]) <= my_top_z_cell_level);
                }
            }

315
            std::vector<std::pair<long int,partsize_t>> part_to_sort;
316
317
318
319
320
321
322

            // Sort each partition in cells
            for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
                part_to_sort.clear();

                for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){
                    part_to_sort.emplace_back();
323
324
                    part_to_sort.back().first = particles_coord[idxPart];
                    part_to_sort.back().second = idxPart;
325
326
                }

327
                assert(partsize_t(part_to_sort.size()) == (current_my_nb_particles_per_partition[idxPartition]));
328
329

                std::sort(part_to_sort.begin(), part_to_sort.end(),
330
331
332
                          [](const std::pair<long int,partsize_t>& p1,
                             const std::pair<long int,partsize_t>& p2){
                    return p1.first < p2.first;
333
                });
334
335

                // Permute array using buffer
336
		// permute 4th function parameter using buffer, based on information in first 3 parameters
337
                std::vector<unsigned char> buffer;
338
339
340
341
342
343
                permute_copy<real_number, size_particle_positions>(
				current_offset_particles_for_partition[idxPartition],
                                current_my_nb_particles_per_partition[idxPartition],
                                part_to_sort.data(),
			       	particles_positions,
			       	&buffer);
344
345
346
347
348
349
350
351
                for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
                    permute_copy<real_number, size_particle_rhs>(
                                    current_offset_particles_for_partition[idxPartition],
                                    current_my_nb_particles_per_partition[idxPartition],
                                    part_to_sort.data(),
                                    particles_current_rhs[idx_rhs].get(),
                                    &buffer);
                }
352
353
354
355
356
357
358
359
360
361
362
363
                permute_copy<partsize_t, 1>(
				current_offset_particles_for_partition[idxPartition],
                                current_my_nb_particles_per_partition[idxPartition],
                                part_to_sort.data(),
			       	inout_index_particles,
			       	&buffer);
                permute_copy<long int, 1>(
				current_offset_particles_for_partition[idxPartition],
                                current_my_nb_particles_per_partition[idxPartition],
                                part_to_sort.data(),
			       	particles_coord.get(),
			       	&buffer);
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
            }
        }

        // Build the tree
        p2p_tree<std::vector<std::pair<partsize_t,partsize_t>>> my_tree(nb_cell_levels);

        for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
            long int current_cell_idx = -1;
            partsize_t current_nb_particles_in_cell = 0;
            partsize_t current_cell_offset = 0;

            for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ;
                            idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){
                if(particles_coord[idx_part] != current_cell_idx){
                    if(current_nb_particles_in_cell){
                        my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell);
                    }
                    current_cell_idx = particles_coord[idx_part];
                    current_nb_particles_in_cell = 1;
                    current_cell_offset = idx_part;
384
385
386
                }
                else{
                    current_nb_particles_in_cell += 1;
387
388
389
390
391
392
393
394
395
                }
            }
            if(current_nb_particles_in_cell){
                my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell);

            }
        }

        // Offset per cell layers
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
396
        long int previous_index = 0;
397
        variable_used_only_in_assert(previous_index);
398
399
400
401
        std::unique_ptr<partsize_t[]> particles_offset_layers(new partsize_t[my_nb_cell_levels+1]());
        for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
            for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ;
                            idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
402
403
404
405
406
407
                const long int part_box_z_index = get_cell_coord_z_from_index(particles_coord[idx_part]);
                assert(my_down_z_cell_level <= part_box_z_index);
                assert(part_box_z_index <= my_top_z_cell_level);
                particles_offset_layers[part_box_z_index+1-my_down_z_cell_level] += 1;
                assert(previous_index <= part_box_z_index);
                previous_index = part_box_z_index;
408
409
            }
        }
410
        for(long int idx_layer = 0 ; idx_layer < my_nb_cell_levels ; ++idx_layer){
411
412
413
414
415
416
417
418
419
420
421
422
423
            particles_offset_layers[idx_layer+1] += particles_offset_layers[idx_layer];
        }

        // Reset vectors
        assert(whatNext.size() == 0);
        assert(mpiRequests.size() == 0);
        neigDescriptors.clear();

        // Find process with at least one neighbor
        {
            int dest_proc = (my_rank+1)%nb_processes_involved;
            while(dest_proc != my_rank
                  && (my_top_z_cell_level == first_cell_level_proc(dest_proc)
424
                      || (my_top_z_cell_level+1)%nb_cell_levels[IDXC_Z] == first_cell_level_proc(dest_proc))){
425
426
427
                // Find if we have to send 1 or 2 cell levels
                int nb_levels_to_send = 1;
                if(my_nb_cell_levels > 1 // I have more than one level
428
                        && (my_top_z_cell_level-1+2)%nb_cell_levels[IDXC_Z] <= last_cell_level_proc(dest_proc)){
429
430
431
432
433
434
435
436
                    nb_levels_to_send += 1;
                }

                NeighborDescriptor descriptor;
                descriptor.destProc = dest_proc;
                descriptor.nbLevelsToExchange = nb_levels_to_send;
                descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send];
                descriptor.isRecv = false;
437
                descriptor.nbReceived = 0;
438
439
440
441
442
443
444
445
446

                neigDescriptors.emplace_back(std::move(descriptor));

                dest_proc = (dest_proc+1)%nb_processes_involved;
            }

            int src_proc = (my_rank-1+nb_processes_involved)%nb_processes_involved;
            while(src_proc != my_rank
                  && (last_cell_level_proc(src_proc) == my_down_z_cell_level
447
                      || (last_cell_level_proc(src_proc)+1)%nb_cell_levels[IDXC_Z] == my_down_z_cell_level)){
448
449
450
                // Find if we have to send 1 or 2 cell levels
                int nb_levels_to_recv = 1;
                if(my_nb_cell_levels > 1 // I have more than one level
451
                        && first_cell_level_proc(src_proc) <= (my_down_z_cell_level-1+2)%nb_cell_levels[IDXC_Z]){
452
453
454
455
456
457
458
459
                    nb_levels_to_recv += 1;
                }

                NeighborDescriptor descriptor;
                descriptor.destProc = src_proc;
                descriptor.nbLevelsToExchange = nb_levels_to_recv;
                descriptor.nbParticlesToExchange = -1;
                descriptor.isRecv = true;
460
                descriptor.nbReceived = 0;
461
462
463
464
465
466
467
468
469
470
471
472
473
474

                neigDescriptors.emplace_back(std::move(descriptor));

                src_proc = (src_proc-1+nb_processes_involved)%nb_processes_involved;
            }
        }

        //////////////////////////////////////////////////////////////////////
        /// Exchange the number of particles in each partition
        /// Could involve only here but I do not think it will be a problem
        //////////////////////////////////////////////////////////////////////

        assert(whatNext.size() == 0);
        assert(mpiRequests.size() == 0);
475
476
477
478
#ifndef NDEBUG // Just for assertion
        std::vector<int> willsend(nb_processes_involved, 0);
        std::vector<int> willrecv(nb_processes_involved, 0);
#endif
479
480
481
482
483
484
485
486
487
488
489

        for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){
            NeighborDescriptor& descriptor = neigDescriptors[idxDescr];

            if(descriptor.isRecv == false){
                whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                mpiRequests.emplace_back();
                AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToExchange),
                                    1, particles_utils::GetMpiType(partsize_t()),
                                    descriptor.destProc, TAG_NB_PARTICLES,
                                    current_com, &mpiRequests.back()));
490
491
492
#ifndef NDEBUG // Just for assertion
                willsend[descriptor.destProc] += 1;
#endif
493
494
495
496
                if(descriptor.nbParticlesToExchange){
                    whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                    mpiRequests.emplace_back();
                    assert(descriptor.nbParticlesToExchange*size_particle_positions < std::numeric_limits<int>::max());
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
497
                    AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_positions]),
498
499
500
501
                              int(descriptor.nbParticlesToExchange*size_particle_positions), particles_utils::GetMpiType(real_number()),
                              descriptor.destProc, TAG_POSITION_PARTICLES,
                              current_com, &mpiRequests.back()));

502
503
504
505
506
507
508
509
                    whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                    mpiRequests.emplace_back();
                    assert(descriptor.nbParticlesToExchange*size_particle_positions < std::numeric_limits<int>::max());
                    AssertMpi(MPI_Isend(const_cast<partsize_t*>(&inout_index_particles[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]]),
                              int(descriptor.nbParticlesToExchange), particles_utils::GetMpiType(partsize_t()),
                              descriptor.destProc, TAG_INDEX_PARTICLES,
                              current_com, &mpiRequests.back()));

510
511
                    assert(descriptor.toRecvAndMerge == nullptr);
                    descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToExchange*size_particle_rhs]);
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
512
                    whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr});
513
514
515
516
517
518
519
520
                    mpiRequests.emplace_back();
                    assert(descriptor.nbParticlesToExchange*size_particle_rhs < std::numeric_limits<int>::max());
                    AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToExchange*size_particle_rhs),
                                        particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_RESULT_PARTICLES,
                                        current_com, &mpiRequests.back()));
                }
            }
            else{
521
522
523
#ifndef NDEBUG // Just for assertion
                willrecv[descriptor.destProc] += 1;
#endif
524
525
526
527
528
529
530
531
                whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr});
                mpiRequests.emplace_back();
                AssertMpi(MPI_Irecv(&descriptor.nbParticlesToExchange,
                      1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_NB_PARTICLES,
                      current_com, &mpiRequests.back()));
            }
        }

532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
#ifndef NDEBUG // Just for assertion
        {
            if(myrank == 0){
                std::vector<int> willsendall(nb_processes_involved*nb_processes_involved, 0);// TODO debug
                std::vector<int> willrecvall(nb_processes_involved*nb_processes_involved, 0);// TODO debug

                MPI_Gather(willrecv.data(), nb_processes_involved, MPI_INT, willrecvall.data(),
                            nb_processes_involved, MPI_INT, 0, MPI_COMM_WORLD);
                MPI_Gather(willsend.data(), nb_processes_involved, MPI_INT, willsendall.data(),
                            nb_processes_involved, MPI_INT, 0, MPI_COMM_WORLD);

                for(int idxproc = 0 ; idxproc < nb_processes_involved ; ++idxproc){
                    for(int idxtest = 0 ; idxtest < nb_processes_involved ; ++idxtest){
                        assert(willsendall[idxproc*nb_processes_involved + idxtest]
                                == willrecvall[idxtest*nb_processes_involved + idxproc]);
                    }
                }
            }
            else{
                MPI_Gather(willrecv.data(), nb_processes_involved, MPI_INT, nullptr,
                            0, MPI_INT, 0, MPI_COMM_WORLD);
                MPI_Gather(willsend.data(), nb_processes_involved, MPI_INT, nullptr,
                            0, MPI_INT, 0, MPI_COMM_WORLD);
            }
        }
#endif

559
560
        lock_free_bool_array cells_locker(512);

561
562
563
564
565
        std::vector<std::unique_ptr<computer_class>> computer_for_all_threads(omp_get_num_threads()-1);
        for(int idxThread = 1 ; idxThread < omp_get_num_threads() ; ++idxThread){
            computer_for_all_threads[idxThread-1].reset(new computer_class(in_computer));
        }

566
567
568
        TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads())
        #pragma omp parallel default(shared)
        {
569
            computer_class& computer_thread = (omp_get_thread_num() == 0 ? in_computer : *computer_for_all_threads[omp_get_thread_num()-1]);
570
571
572
            #pragma omp master
            {
                while(mpiRequests.size()){
Berenger Bramas's avatar
Berenger Bramas committed
573
                    TIMEZONE("wait-loop");
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
                    assert(mpiRequests.size() == whatNext.size());

                    int idxDone = int(mpiRequests.size());
                    {
                        TIMEZONE("wait");
                        AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE));
                    }
                    const std::pair<Action, int> releasedAction = whatNext[idxDone];
                    std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]);
                    std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]);
                    mpiRequests.pop_back();
                    whatNext.pop_back();

                    //////////////////////////////////////////////////////////////////////
                    /// Data to exchange particles
                    //////////////////////////////////////////////////////////////////////
                    if(releasedAction.first == RECV_PARTICLES){
Berenger Bramas's avatar
Berenger Bramas committed
591
                        TIMEZONE("post-recv-particles");
592
                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
593
                        assert(descriptor.isRecv);
594
595
596
597
                        const int destProc = descriptor.destProc;
                        const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange;
                        assert(NbParticlesToReceive != -1);
                        assert(descriptor.toCompute == nullptr);
598
                        assert(descriptor.indexes == nullptr);
599
600
601

                        if(NbParticlesToReceive){
                            descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]);
602
                            whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second});
603
604
605
606
607
                            mpiRequests.emplace_back();
                            assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max());
                            AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions),
                                                particles_utils::GetMpiType(real_number()), destProc, TAG_POSITION_PARTICLES,
                                                current_com, &mpiRequests.back()));
608

609
                            descriptor.indexes.reset(new partsize_t[NbParticlesToReceive]);
610
611
612
613
614
615
                            whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second});
                            mpiRequests.emplace_back();
                            assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max());
                            AssertMpi(MPI_Irecv(descriptor.indexes.get(), int(NbParticlesToReceive),
                                                particles_utils::GetMpiType(partsize_t()), destProc, TAG_INDEX_PARTICLES,
                                                current_com, &mpiRequests.back()));
616
617
618
619
620
621
622
623
                        }
                    }

                    //////////////////////////////////////////////////////////////////////
                    /// Computation
                    //////////////////////////////////////////////////////////////////////
                    if(releasedAction.first == COMPUTE_PARTICLES){
                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
624
625
626
627
628
629
630
631
632
633
                        descriptor.nbReceived += 1;
                        assert(descriptor.nbReceived <= 2);

                        if(descriptor.nbReceived == 2){
                            TIMEZONE("compute-particles");
                            NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
                            assert(descriptor.isRecv);
                            const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange;

                            assert(descriptor.toCompute != nullptr);
634
                            assert(descriptor.indexes != nullptr);
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
                            descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]);
                            computer_thread.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive);

                            // Compute
                            partsize_t idxPart = 0;
                            while(idxPart != NbParticlesToReceive){
                                const long int current_cell_idx = get_cell_idx(descriptor.toCompute[idxPart*size_particle_positions + IDXC_X],
                                                                               descriptor.toCompute[idxPart*size_particle_positions + IDXC_Y],
                                                                               descriptor.toCompute[idxPart*size_particle_positions + IDXC_Z]);
                                partsize_t nb_parts_in_cell = 1;
                                while(idxPart+nb_parts_in_cell != NbParticlesToReceive
                                      && current_cell_idx == get_cell_idx(descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_X],
                                                                         descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Y],
                                                                         descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Z])){
                                    nb_parts_in_cell += 1;
                                }
651

652
653
654
655
656
                                #pragma omp task default(shared) firstprivate(idxPart, nb_parts_in_cell, current_cell_idx)
                                {
                                    const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
                                    long int neighbors_indexes[27];
                                    std::array<real_number,3> shift[27];
657
658
659
660
661
662
                                    const int nbNeighbors = my_tree.getNeighbors(
						    current_cell_idx,
						    neighbors,
						    neighbors_indexes,
						    shift,
						    true);
663
664
665
666
667
668
669
670

                                    // with other interval
                                    for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
                                        cells_locker.lock(neighbors_indexes[idx_neighbor]);

                                        for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
                                            for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){
                                                for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
671
672
673
674
675
676
677
678
                                                    const real_number dist_r2 = compute_distance_r2(
								    descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_X],
                                                                    descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Y],
                                                                    descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Z],
                                                                    particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
                                                                    particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
                                                                    particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
                                                                    shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
679
680
681
682
683
684
685
                                                    if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                                        computer_thread.template compute_interaction<size_particle_positions, size_particle_rhs>(
                                                                            descriptor.indexes[(idxPart+idx_p1)],
                                                                            &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions],
                                                                            &descriptor.results[(idxPart+idx_p1)*size_particle_rhs],
                                                                            inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
                                                                            &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
686
                                                                            &particles_current_rhs[0][((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
687
688
689
690
691
                                                                            dist_r2,
									    cutoff_radius_compute,
									    shift[idx_neighbor][IDXC_X],
									    shift[idx_neighbor][IDXC_Y],
									    shift[idx_neighbor][IDXC_Z]);
692
                                                    }
693
                                                }
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
694
                                            }
695
                                        }
696

697
698
                                        cells_locker.unlock(neighbors_indexes[idx_neighbor]);
                                    }
699
700
                                }

701
702
                                idxPart += nb_parts_in_cell;
                            }
703

704
                            #pragma omp taskwait
705

706
707
708
709
710
711
712
713
714
                            // Send back
                            const int destProc = descriptor.destProc;
                            whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second});
                            mpiRequests.emplace_back();
                            assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max());
                            AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs),
                                                particles_utils::GetMpiType(real_number()), destProc, TAG_RESULT_PARTICLES,
                                                current_com, &mpiRequests.back()));
                            delete[] descriptor.toCompute.release();
715
                            delete[] descriptor.indexes.release();
716
                        }
717
718
                    }
                    //////////////////////////////////////////////////////////////////////
719
                    /// Release memory that was sent back
720
                    //////////////////////////////////////////////////////////////////////
721
722
                    if(releasedAction.first == RELEASE_BUFFER_PARTICLES){
                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
723
                        assert(descriptor.results != nullptr);
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
724
                        assert(descriptor.isRecv);
Cristian Lalescu's avatar
Cristian Lalescu committed
725
                        delete[] descriptor.results.release();
726
727
728
729
                    }
                    //////////////////////////////////////////////////////////////////////
                    /// Merge
                    //////////////////////////////////////////////////////////////////////
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
730
                    if(releasedAction.first == MERGE_PARTICLES){
Berenger Bramas's avatar
Berenger Bramas committed
731
                        TIMEZONE("merge");
732
                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
733
                        assert(descriptor.isRecv == false);
734
                        assert(descriptor.toRecvAndMerge != nullptr);
735
                        computer_thread.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0][particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs],
Berenger Bramas's avatar
Debug    
Berenger Bramas committed
736
                                descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToExchange);
Cristian Lalescu's avatar
Cristian Lalescu committed
737
                        delete[] descriptor.toRecvAndMerge.release();
738
739
740
741
742
743
744
745
                    }
                }
            }
        }

        assert(whatNext.size() == 0);
        assert(mpiRequests.size() == 0);

746
747
748
749
750
751
752
        {
            computer_class& computer_thread = (omp_get_thread_num() == 0 ? in_computer : *computer_for_all_threads[omp_get_thread_num()-1]);
            // Compute self data
            for(const auto& iter_cell : my_tree){
                TIMEZONE("proceed-leaf");
                const long int currenct_cell_idx = iter_cell.first;
                const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second;
753

754
755
756
757
758
    #pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr)
                {
                    const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr);

                    cells_locker.lock(currenct_cell_idx);
759

760
761
                    for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
                        // self interval
762
                        for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
763
                            for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
764
765
766
                                const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
                                                                                particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
                                                                                particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
767
768
769
                                                                                particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X],
                                                                                particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y],
                                                                                particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z],
770
771
                                                                                0, 0, 0);
                                if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
772
                                    computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
773
                                                        inout_index_particles[(intervals[idx_1].first+idx_p1)],
774
                                                        &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
775
                                                        &particles_current_rhs[0][(intervals[idx_1].first+idx_p1)*size_particle_rhs],
776
                                                        inout_index_particles[(intervals[idx_1].first+idx_p2)],
777
                                                        &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
778
                                                        &particles_current_rhs[0][(intervals[idx_1].first+idx_p2)*size_particle_rhs],
779
                                                        dist_r2, cutoff_radius_compute, 0, 0, 0);
780
781
                                }
                            }
782
                        }
783
784
785
786
787
788
789
790
791
792
793
794
795
796

                        // with other interval
                        for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){
                            for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
                                for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){
                                    const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
                                                                                    particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
                                                                                    particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
                                                                                    particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
                                                                                    particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
                                                                                    particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
                                                                                    0, 0, 0);
                                    if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                        computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
797
                                                            inout_index_particles[(intervals[idx_1].first+idx_p1)],
798
                                                            &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
799
                                                            &particles_current_rhs[0][(intervals[idx_1].first+idx_p1)*size_particle_rhs],
800
                                                            inout_index_particles[(intervals[idx_2].first+idx_p2)],
801
                                                            &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
802
                                                            &particles_current_rhs[0][(intervals[idx_2].first+idx_p2)*size_particle_rhs],
803
804
805
806
807
                                                            dist_r2, cutoff_radius_compute, 0, 0, 0);
                                    }
                                }
                            }
                        }
808
809
                    }

810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
                    const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
                    long int neighbors_indexes[27];
                    std::array<real_number,3> shift[27];
                    const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false);

                    for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
                        // with other interval
                        for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
                            if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){
                                cells_locker.lock(neighbors_indexes[idx_neighbor]);

                                for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
                                    for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
                                        for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
                                            const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
                                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
                                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
                                                                                            shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
                                            if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                                computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
833
                                                                    inout_index_particles[(intervals[idx_1].first+idx_p1)],
834
                                                                    &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
835
                                                                    &particles_current_rhs[0][(intervals[idx_1].first+idx_p1)*size_particle_rhs],
836
                                                                    inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
837
                                                                    &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
838
                                                                    &particles_current_rhs[0][((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
839
840
                                                                    dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
                                            }
841
                                        }
842
                                    }
843
                                }
844
                                cells_locker.unlock(neighbors_indexes[idx_neighbor]);
845
846
847
                            }
                        }
                    }
848

849
850
                    cells_locker.unlock(currenct_cell_idx);
                }
851
852
            }
        }
853
854
855
856

        for(int idxThread = 1 ; idxThread < omp_get_num_threads() ; ++idxThread){
            in_computer.merge(*computer_for_all_threads[idxThread-1]);
        }
857
858
859
860
    }
};

#endif