Skip to content
Snippets Groups Projects
Commit a91473a4 authored by Berenger Bramas's avatar Berenger Bramas
Browse files

Avoid Optimization bug with GCC 6.2 for particles_adams_bashforth

parent 3033a090
No related branches found
No related tags found
1 merge request!21Bugfix/nansampling
...@@ -9,10 +9,13 @@ ...@@ -9,10 +9,13 @@
template <class real_number, int size_particle_positions = 3, int size_particle_rhs = 3> template <class real_number, int size_particle_positions = 3, int size_particle_rhs = 3>
class particles_adams_bashforth { class particles_adams_bashforth {
static_assert(size_particle_positions == size_particle_rhs,
"Not having the same dimension for positions and rhs looks like a bug,"
"otherwise comment this assertion.");
public: public:
static const int Max_steps = 6; static const int Max_steps = 6;
void move_particles(real_number particles_positions[], void move_particles(real_number*__restrict__ particles_positions,
const int nb_particles, const int nb_particles,
const std::unique_ptr<real_number[]> particles_rhs[], const std::unique_ptr<real_number[]> particles_rhs[],
const int nb_rhs, const real_number dt) const{ const int nb_rhs, const real_number dt) const{
...@@ -30,76 +33,98 @@ public: ...@@ -30,76 +33,98 @@ public:
particles_utils::IntervalSplitter<int> interval(nb_particles, particles_utils::IntervalSplitter<int> interval(nb_particles,
omp_get_num_threads(), omp_get_num_threads(),
omp_get_thread_num()); omp_get_thread_num());
const int last_idx = interval.getMyOffset()+interval.getMySize();
const int value_start = interval.getMyOffset()*size_particle_positions;
const int value_end = (interval.getMyOffset()+interval.getMySize())*size_particle_positions;
// TODO full unroll + blocking // TODO full unroll + blocking
switch (nb_rhs){ switch (nb_rhs){
case 1: case 1:
for(int idx_part = interval.getMyOffset() ; idx_part < last_idx ; ++idx_part){ {
for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ const real_number* __restrict__ rhs_0 = particles_rhs[0].get();
for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){
// dt × [0] // dt × [0]
particles_positions[idx_part*size_particle_positions + idx_dim] particles_positions[idx_value] += dt * rhs_0[idx_value];
+= dt * particles_rhs[0][idx_part*size_particle_rhs + idx_dim];
} }
} }
break; break;
case 2: case 2:
for(int idx_part = interval.getMyOffset() ; idx_part < last_idx ; ++idx_part){ {
for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ const real_number* __restrict__ rhs_0 = particles_rhs[0].get();
const real_number* __restrict__ rhs_1 = particles_rhs[1].get();
for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){
// dt × (3[0] - [1])/2 // dt × (3[0] - [1])/2
particles_positions[idx_part*size_particle_positions + idx_dim] particles_positions[idx_value]
+= dt * (3.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] += dt * (3.*rhs_0[idx_value]
- particles_rhs[1][idx_part*size_particle_rhs + idx_dim])/2.; - rhs_1[idx_value])/2.;
} }
} }
break; break;
case 3: case 3:
for(int idx_part = interval.getMyOffset() ; idx_part < last_idx ; ++idx_part){ {
for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ const real_number* __restrict__ rhs_0 = particles_rhs[0].get();
const real_number* __restrict__ rhs_1 = particles_rhs[1].get();
const real_number* __restrict__ rhs_2 = particles_rhs[2].get();
for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){
// dt × (23[0] - 16[1] + 5[2])/12 // dt × (23[0] - 16[1] + 5[2])/12
particles_positions[idx_part*size_particle_positions + idx_dim] particles_positions[idx_value]
+= dt * (23.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] += dt * (23.*rhs_0[idx_value]
- 16.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] - 16.*rhs_1[idx_value]
+ 5.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim])/12.; + 5.*rhs_2[idx_value])/12.;
} }
} }
break; break;
case 4: case 4:
for(int idx_part = interval.getMyOffset() ; idx_part < last_idx ; ++idx_part){ {
for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ const real_number* __restrict__ rhs_0 = particles_rhs[0].get();
const real_number* __restrict__ rhs_1 = particles_rhs[1].get();
const real_number* __restrict__ rhs_2 = particles_rhs[2].get();
const real_number* __restrict__ rhs_3 = particles_rhs[3].get();
for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){
// dt × (55[0] - 59[1] + 37[2] - 9[3])/24 // dt × (55[0] - 59[1] + 37[2] - 9[3])/24
particles_positions[idx_part*size_particle_positions + idx_dim] particles_positions[idx_value]
+= dt * (55.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] += dt * (55.*rhs_0[idx_value]
- 59.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] - 59.*rhs_1[idx_value]
+ 37.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim] + 37.*rhs_2[idx_value]
- 9.*particles_rhs[3][idx_part*size_particle_rhs + idx_dim])/24.; - 9.*rhs_3[idx_value])/24.;
} }
} }
break; break;
case 5: case 5:
for(int idx_part = interval.getMyOffset() ; idx_part < last_idx ; ++idx_part){ {
for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ const real_number* __restrict__ rhs_0 = particles_rhs[0].get();
const real_number* __restrict__ rhs_1 = particles_rhs[1].get();
const real_number* __restrict__ rhs_2 = particles_rhs[2].get();
const real_number* __restrict__ rhs_3 = particles_rhs[3].get();
const real_number* __restrict__ rhs_4 = particles_rhs[4].get();
for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){
// dt × (1901[0] - 2774[1] + 2616[2] - 1274[3] + 251[4])/720 // dt × (1901[0] - 2774[1] + 2616[2] - 1274[3] + 251[4])/720
particles_positions[idx_part*size_particle_positions + idx_dim] particles_positions[idx_value]
+= dt * (1901.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] += dt * (1901.*rhs_0[idx_value]
- 2774.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] - 2774.*rhs_1[idx_value]
+ 2616.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim] + 2616.*rhs_2[idx_value]
- 1274.*particles_rhs[3][idx_part*size_particle_rhs + idx_dim] - 1274.*rhs_3[idx_value]
+ 251.*particles_rhs[4][idx_part*size_particle_rhs + idx_dim])/720.; + 251.*rhs_4[idx_value])/720.;
} }
} }
break; break;
case 6: case 6:
for(int idx_part = interval.getMyOffset() ; idx_part < last_idx ; ++idx_part){ {
for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ const real_number* __restrict__ rhs_0 = particles_rhs[0].get();
const real_number* __restrict__ rhs_1 = particles_rhs[1].get();
const real_number* __restrict__ rhs_2 = particles_rhs[2].get();
const real_number* __restrict__ rhs_3 = particles_rhs[3].get();
const real_number* __restrict__ rhs_4 = particles_rhs[4].get();
const real_number* __restrict__ rhs_5 = particles_rhs[5].get();
for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){
// dt × (4277[0] - 7923[1] + 9982[2] - 7298[3] + 2877[4] - 475[5])/1440 // dt × (4277[0] - 7923[1] + 9982[2] - 7298[3] + 2877[4] - 475[5])/1440
particles_positions[idx_part*size_particle_positions + idx_dim] particles_positions[idx_value]
+= dt * (4277.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] += dt * (4277.*rhs_0[idx_value]
- 7923.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] - 7923.*rhs_1[idx_value]
+ 9982.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim] + 9982.*rhs_2[idx_value]
- 7298.*particles_rhs[3][idx_part*size_particle_rhs + idx_dim] - 7298.*rhs_3[idx_value]
+ 2877.*particles_rhs[4][idx_part*size_particle_rhs + idx_dim] + 2877.*rhs_4[idx_value]
- 475.*particles_rhs[5][idx_part*size_particle_rhs + idx_dim])/1440.; - 475.*rhs_5[idx_value])/1440.;
} }
} }
break; break;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment