distributed_particles.cpp 17.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/**********************************************************************
*                                                                     *
*  Copyright 2015 Max Planck Institute                                *
*                 for Dynamics and Self-Organization                  *
*                                                                     *
*  This file is part of bfps.                                         *
*                                                                     *
*  bfps is free software: you can redistribute it and/or modify       *
*  it under the terms of the GNU General Public License as published  *
*  by the Free Software Foundation, either version 3 of the License,  *
*  or (at your option) any later version.                             *
*                                                                     *
*  bfps is distributed in the hope that it will be useful,            *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
*  GNU General Public License for more details.                       *
*                                                                     *
*  You should have received a copy of the GNU General Public License  *
*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
*                                                                     *
* Contact: Cristian.Lalescu@ds.mpg.de                                 *
*                                                                     *
**********************************************************************/



Cristian Lalescu's avatar
Cristian Lalescu committed
27
#define NDEBUG
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

#include <cmath>
#include <cassert>
#include <cstring>
#include <string>
#include <sstream>

#include "base.hpp"
#include "distributed_particles.hpp"
#include "fftw_tools.hpp"


extern int myrank, nprocs;

template <int particle_type, class rnumber, int interp_neighbours>
distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles(
        const char *NAME,
Cristian Lalescu's avatar
Cristian Lalescu committed
45
        const hid_t data_file_id,
46
47
        interpolator<rnumber, interp_neighbours> *FIELD,
        const int TRAJ_SKIP,
Cristian Lalescu's avatar
Cristian Lalescu committed
48
49
50
51
52
        const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
            NAME,
            TRAJ_SKIP,
            data_file_id,
            FIELD->descriptor->comm)
53
54
55
56
{
    assert((INTEGRATION_STEPS <= 6) &&
           (INTEGRATION_STEPS >= 1));
    this->vel = FIELD;
57
    this->rhs.resize(INTEGRATION_STEPS);
58
59
60
61
62
63
64
65
    this->integration_steps = INTEGRATION_STEPS;
}

template <int particle_type, class rnumber, int interp_neighbours>
distributed_particles<particle_type, rnumber, interp_neighbours>::~distributed_particles()
{
}

66
67
template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
68
        interpolator<rnumber, interp_neighbours> *field,
69
        std::unordered_map<int, single_particle_state<POINT3D>> &y)
70
{
71
    double *yy = new double[3];
72
73
    y.clear();
    for (auto &pp: this->state)
74
    {
75
        (*field)(pp.second.data, yy);
76
        y[pp.first] = yy;
77
    }
78
    delete[] yy;
79
80
}

81
82
83
84
85
template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs(
        const std::unordered_map<int, single_particle_state<particle_type>> &x,
        std::unordered_map<int, single_particle_state<particle_type>> &y)
{
86
    double *yy = new double[this->ncomponents];
87
88
89
    y.clear();
    for (auto &pp: x)
    {
90
        (*this->vel)(pp.second.data, yy);
91
92
        y[pp.first] = yy;
    }
93
    delete[] yy;
94
95
}

96
97
98
99
100
template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
        interpolator<rnumber, interp_neighbours> *field,
        const char *dset_name)
{
101
    std::unordered_map<int, single_particle_state<POINT3D>> y;
102
    this->sample(field, y);
103
    this->write(dset_name, y);
104
105
}

106
107
108
109
110
111
112
template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
{
    for (int i=this->integration_steps-2; i>=0; i--)
        rhs[i+1] = rhs[i];
}

113
114
115
116
117
template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute(
        std::unordered_map<int, single_particle_state<particle_type>> &x,
        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals)
{
118
    //DEBUG_MSG("entered redistribute\n");
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    /* neighbouring rank offsets */
    int ro[2];
    ro[0] = -1;
    ro[1] = 1;
    /* neighbouring ranks */
    int nr[2];
    nr[0] = MOD(this->myrank+ro[0], this->nprocs);
    nr[1] = MOD(this->myrank+ro[1], this->nprocs);
    /* particles to send, particles to receive */
    std::vector<int> ps[2], pr[2];
    /* number of particles to send, number of particles to receive */
    int nps[2], npr[2];
    int rsrc, rdst;
    /* get list of id-s to send */
    for (auto &pp: x)
        for (int i=0; i<2; i++)
            if (this->vel->get_rank(pp.second.data[2]) == nr[i])
                ps[i].push_back(pp.first);
137
    /* prepare data for send recv */
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
    for (int i=0; i<2; i++)
        nps[i] = ps[i].size();
    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
        for (int i=0; i<2; i++)
        {
            rdst = MOD(rsrc+ro[i], this->nprocs);
            if (this->myrank == rsrc)
                MPI_Send(
                        nps+i,
                        1,
                        MPI_INTEGER,
                        rdst,
                        2*(rsrc*this->nprocs + rdst)+i,
                        this->comm);
            if (this->myrank == rdst)
                MPI_Recv(
                        npr+1-i,
                        1,
                        MPI_INTEGER,
                        rsrc,
                        2*(rsrc*this->nprocs + rdst)+i,
                        this->comm,
                        MPI_STATUS_IGNORE);
        }
162
163
    //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]);
    //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]);
164
165
166
    for (int i=0; i<2; i++)
        pr[i].resize(npr[i]);

Cristian Lalescu's avatar
Cristian Lalescu committed
167
168
169
    int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1];
    buffer_size = (buffer_size > npr[0])? buffer_size : npr[0];
    buffer_size = (buffer_size > npr[1])? buffer_size : npr[1];
170
    //DEBUG_MSG("buffer size is %d\n", buffer_size);
Cristian Lalescu's avatar
Cristian Lalescu committed
171
    double *buffer = new double[buffer_size*this->ncomponents*(1+vals.size())];
172
173
174
175
176
177
178
179
180
181
182
183
184
    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
        for (int i=0; i<2; i++)
        {
            rdst = MOD(rsrc+ro[i], this->nprocs);
            if (this->myrank == rsrc && nps[i] > 0)
            {
                MPI_Send(
                        &ps[i].front(),
                        nps[i],
                        MPI_INTEGER,
                        rdst,
                        2*(rsrc*this->nprocs + rdst),
                        this->comm);
185
                int pcounter = 0;
186
187
188
189
                for (int p: ps[i])
                {
                    std::copy(x[p].data,
                              x[p].data + this->ncomponents,
190
                              buffer + pcounter*(1+vals.size())*this->ncomponents);
191
192
193
194
195
                    x.erase(p);
                    for (int tindex=0; tindex<vals.size(); tindex++)
                    {
                        std::copy(vals[tindex][p].data,
                                  vals[tindex][p].data + this->ncomponents,
196
                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*this->ncomponents);
197
198
                        vals[tindex].erase(p);
                    }
199
                    pcounter++;
200
201
202
                }
                MPI_Send(
                        buffer,
203
                        nps[i]*(1+vals.size())*this->ncomponents,
204
205
                        MPI_DOUBLE,
                        rdst,
Cristian Lalescu's avatar
Cristian Lalescu committed
206
                        2*(rsrc*this->nprocs + rdst)+1,
207
208
209
                        this->comm);
            }
            if (this->myrank == rdst && npr[1-i] > 0)
210
            {
211
212
213
214
215
216
217
218
219
220
                MPI_Recv(
                        &pr[1-i].front(),
                        npr[1-i],
                        MPI_INTEGER,
                        rsrc,
                        2*(rsrc*this->nprocs + rdst),
                        this->comm,
                        MPI_STATUS_IGNORE);
                MPI_Recv(
                        buffer,
221
                        npr[1-i]*(1+vals.size())*this->ncomponents,
222
223
                        MPI_DOUBLE,
                        rsrc,
Cristian Lalescu's avatar
Cristian Lalescu committed
224
                        2*(rsrc*this->nprocs + rdst)+1,
225
226
                        this->comm,
                        MPI_STATUS_IGNORE);
227
                int pcounter = 0;
228
229
                for (int p: pr[1-i])
                {
230
                    x[p] = buffer + (pcounter*(1+vals.size()))*this->ncomponents;
231
232
                    for (int tindex=0; tindex<vals.size(); tindex++)
                    {
233
                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*this->ncomponents;
234
                    }
235
                    pcounter++;
236
                }
237
            }
238
        }
Cristian Lalescu's avatar
Cristian Lalescu committed
239
    delete[] buffer;
240

241
242
243

#ifndef NDEBUG
    /* check that all particles at x are local */
244
    for (auto &pp: x)
245
246
247
248
249
250
251
        if (this->vel->get_rank(pp.second.data[2]) != this->myrank)
        {
            DEBUG_MSG("found particle %d with rank %d\n",
                    pp.first,
                    this->vel->get_rank(pp.second.data[2]));
            assert(false);
        }
252
#endif
253
    //DEBUG_MSG("exiting redistribute\n");
254
255
}

256
257
258
259
260
261


template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
        const int nsteps)
{
262
    this->get_rhs(this->state, this->rhs[0]);
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
    for (auto &pp: this->state)
        for (int i=0; i<this->ncomponents; i++)
            switch(nsteps)
            {
                case 1:
                    pp.second[i] += this->dt*this->rhs[0][pp.first][i];
                    break;
                case 2:
                    pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
                                            -   this->rhs[1][pp.first][i])/2;
                    break;
                case 3:
                    pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
                                            - 16*this->rhs[1][pp.first][i]
                                            +  5*this->rhs[2][pp.first][i])/12;
                    break;
                case 4:
                    pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
                                            - 59*this->rhs[1][pp.first][i]
                                            + 37*this->rhs[2][pp.first][i]
283
                                            -  9*this->rhs[3][pp.first][i])/24;
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
                    break;
                case 5:
                    pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
                                            - 2774*this->rhs[1][pp.first][i]
                                            + 2616*this->rhs[2][pp.first][i]
                                            - 1274*this->rhs[3][pp.first][i]
                                            +  251*this->rhs[4][pp.first][i])/720;
                    break;
                case 6:
                    pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
                                            - 7923*this->rhs[1][pp.first][i]
                                            + 9982*this->rhs[2][pp.first][i]
                                            - 7298*this->rhs[3][pp.first][i]
                                            + 2877*this->rhs[4][pp.first][i]
                                            -  475*this->rhs[5][pp.first][i])/1440;
                    break;
300
301
            }
    this->redistribute(this->state, this->rhs);
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
    this->roll_rhs();
}


template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::step()
{
    this->AdamsBashforth((this->iteration < this->integration_steps) ?
                            this->iteration+1 :
                            this->integration_steps);
    this->iteration++;
}


template <int particle_type, class rnumber, int interp_neighbours>
317
void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
318
{
Cristian Lalescu's avatar
Cristian Lalescu committed
319
320
    double *temp = new double[this->chunk_size*this->ncomponents];
    for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
Cristian Lalescu's avatar
Cristian Lalescu committed
321
    {
Cristian Lalescu's avatar
Cristian Lalescu committed
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
        //read state
        if (this->myrank == 0)
            this->read_state_chunk(cindex, temp);
        MPI_Bcast(
                temp,
                this->chunk_size*this->ncomponents,
                MPI_DOUBLE,
                0,
                this->comm);
        for (int p=0; p<this->chunk_size; p++)
        {
            if (this->vel->get_rank(temp[this->ncomponents*p+2]) == this->myrank)
                this->state[p+cindex*this->chunk_size] = temp + this->ncomponents*p;
        }
        //read rhs
        if (this->iteration > 0)
            for (int i=0; i<this->integration_steps; i++)
            {
                if (this->myrank == 0)
                    this->read_rhs_chunk(cindex, i, temp);
                MPI_Bcast(
                        temp,
                        this->chunk_size*this->ncomponents,
                        MPI_DOUBLE,
                        0,
                        this->comm);
                for (int p=0; p<this->chunk_size; p++)
                {
                    auto pp = this->state.find(p+cindex*this->chunk_size);
                    if (pp != this->state.end())
                        this->rhs[i][p+cindex*this->chunk_size] = temp + this->ncomponents*p;
                }
            }
Cristian Lalescu's avatar
Cristian Lalescu committed
355
    }
Cristian Lalescu's avatar
Cristian Lalescu committed
356
    DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size());
357
358
359
    delete[] temp;
}

360
361
362
363
364
365
366
template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
        const char *dset_name,
        std::unordered_map<int, single_particle_state<POINT3D>> &y)
{
    double *data = new double[this->nparticles*3];
    double *yy = new double[this->nparticles*3];
367
    for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
368
    {
369
        std::fill_n(yy, this->chunk_size*3, 0);
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
        for (int p=0; p<this->chunk_size; p++)
        {
            auto pp = y.find(p+cindex*this->chunk_size);
            if (pp != y.end())
                std::copy(pp->second.data,
                          pp->second.data + 3,
                          yy + pp->first*3);
        }
        MPI_Allreduce(
                yy,
                data,
                3*this->nparticles,
                MPI_DOUBLE,
                MPI_SUM,
                this->comm);
        if (this->myrank == 0)
            this->write_point3D_chunk(dset_name, cindex, data);
387
388
389
390
391
    }
    delete[] yy;
    delete[] data;
}

392
393
394
395
template <int particle_type, class rnumber, int interp_neighbours>
void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
        const bool write_rhs)
{
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
    double *temp0 = new double[this->chunk_size*this->ncomponents];
    double *temp1 = new double[this->chunk_size*this->ncomponents];
    for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
    {
        //write state
        std::fill_n(temp0, this->ncomponents*this->chunk_size, 0);
        for (int p=0; p<this->chunk_size; p++)
        {
            auto pp = this->state.find(p + cindex*this->chunk_size);
            if (pp != this->state.end())
                std::copy(pp->second.data,
                          pp->second.data + this->ncomponents,
                          temp0 + pp->first*this->ncomponents);
        }
        MPI_Allreduce(
                temp0,
                temp1,
                this->ncomponents*this->chunk_size,
                MPI_DOUBLE,
                MPI_SUM,
                this->comm);
        if (this->myrank == 0)
            this->write_state_chunk(cindex, temp1);
        //write rhs
420
        if (write_rhs)
421
422
            for (int i=0; i<this->integration_steps; i++)
            {
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
                std::fill_n(temp0, this->ncomponents*this->chunk_size, 0);
                for (int p=0; p<this->chunk_size; p++)
                {
                    auto pp = this->rhs[i].find(p + cindex*this->chunk_size);
                    if (pp != this->rhs[i].end())
                        std::copy(pp->second.data,
                                  pp->second.data + this->ncomponents,
                                  temp0 + pp->first*this->ncomponents);
                }
                MPI_Allreduce(
                        temp0,
                        temp1,
                        this->ncomponents*this->chunk_size,
                        MPI_DOUBLE,
                        MPI_SUM,
                        this->comm);
                if (this->myrank == 0)
                    this->write_rhs_chunk(cindex, i, temp1);
441
442
443
444
            }
    }
    delete[] temp0;
    delete[] temp1;
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
}


/*****************************************************************************/
template class distributed_particles<VELOCITY_TRACER, float, 1>;
template class distributed_particles<VELOCITY_TRACER, float, 2>;
template class distributed_particles<VELOCITY_TRACER, float, 3>;
template class distributed_particles<VELOCITY_TRACER, float, 4>;
template class distributed_particles<VELOCITY_TRACER, float, 5>;
template class distributed_particles<VELOCITY_TRACER, float, 6>;
template class distributed_particles<VELOCITY_TRACER, double, 1>;
template class distributed_particles<VELOCITY_TRACER, double, 2>;
template class distributed_particles<VELOCITY_TRACER, double, 3>;
template class distributed_particles<VELOCITY_TRACER, double, 4>;
template class distributed_particles<VELOCITY_TRACER, double, 5>;
template class distributed_particles<VELOCITY_TRACER, double, 6>;
/*****************************************************************************/