Commit c030fc2d authored by Chichi Lalescu's avatar Chichi Lalescu
Browse files

fix tools for double precision

convergence test now works
parent 6da89c47
......@@ -211,12 +211,8 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
"""
return None
def fill_up_fluid_code(self):
if self.dtype == np.float32:
C_dtype = 'float'
else:
C_dtype = 'double'
self.fluid_includes += '#include <cstring>\n'
self.fluid_variables += 'fluid_solver<{0}> *fs;\n'.format(C_dtype)
self.fluid_variables += 'fluid_solver<{0}> *fs;\n'.format(self.C_dtype)
self.write_fluid_stats()
self.fluid_start += """
//begincpp
......@@ -234,7 +230,7 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
fs->iteration = iteration;
fs->read('v', 'c');
//endcpp
""".format(C_dtype)
""".format(self.C_dtype)
self.fluid_loop += """
//begincpp
fs->step(dt);
......@@ -256,15 +252,11 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
smoothness = 1,
integration_steps = 2,
kcut = 'fs->kM'):
if self.dtype == np.float32:
C_dtype = 'float'
else:
C_dtype = 'double'
self.parameters['neighbours{0}'.format(self.particle_species)] = neighbours
self.parameters['smoothness{0}'.format(self.particle_species)] = smoothness
self.parameters['kcut{0}'.format(self.particle_species)] = kcut
self.parameters['integration_steps{0}'.format(self.particle_species)] = integration_steps
self.particle_variables += 'tracers<{0}> *ps{1};'.format(C_dtype, self.particle_species)
self.particle_variables += 'tracers<{0}> *ps{1};'.format(self.C_dtype, self.particle_species)
self.file_datasets_create += ('tmp_dspace = H5::DataSpace(2, dims, maxdims);\n' +
'temp_string = std::string("/particles/") + std::string(ps{0}->name);\n' +
'group = data_file.openGroup(temp_string);\n' +
......@@ -305,7 +297,7 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
'ps{1}->dt = dt;\n' +
'ps{1}->iteration = iteration;\n' +
update_field +
'ps{1}->read(&data_file);\n').format(C_dtype, self.particle_species)
'ps{1}->read(&data_file);\n').format(self.C_dtype, self.particle_species)
self.particle_loop += (update_field +
'ps{0}->step();\n' +
'if (ps{0}->iteration % niter_part == 0)\n' +
......
......@@ -26,114 +26,6 @@
template <class rnumber>
int copy_complex_array(
field_descriptor<rnumber> *fi,
rnumber (*ai)[2],
field_descriptor<rnumber> *fo,
rnumber (*ao)[2],
int howmany)
{
fftwf_complex *buffer;
buffer = fftwf_alloc_complex(fi->slice_size*howmany);
int min_fast_dim;
min_fast_dim =
(fi->sizes[2] > fo->sizes[2]) ?
fo->sizes[2] : fi->sizes[2];
MPI_Datatype MPI_CNUM = (sizeof(rnumber) == 4) ? MPI_COMPLEX : MPI_C_DOUBLE_COMPLEX;
// clean up destination, in case we're padding with zeros
// (even if only for one dimension)
std::fill_n((rnumber*)ao, fo->local_size, 0.0);
int64_t ii0, ii1;
int64_t oi0, oi1;
int64_t delta1, delta0;
int irank, orank;
delta0 = (fo->sizes[0] - fi->sizes[0]);
delta1 = (fo->sizes[1] - fi->sizes[1]);
for (ii0=0; ii0 < fi->sizes[0]; ii0++)
{
if (ii0 <= fi->sizes[0]/2)
{
oi0 = ii0;
if (oi0 > fo->sizes[0]/2)
continue;
}
else
{
oi0 = ii0 + delta0;
if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2))
continue;
}
irank = fi->rank[ii0];
orank = fo->rank[oi0];
if ((irank == orank) &&
(irank == fi->myrank))
{
std::copy(
(rnumber*)(ai + (ii0 - fi->starts[0] )*fi->slice_size),
(rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size),
(rnumber*)buffer);
}
else
{
if (fi->myrank == irank)
{
MPI_Send(
(void*)(ai + (ii0-fi->starts[0])*fi->slice_size),
fi->slice_size,
MPI_CNUM,
orank,
ii0,
fi->comm);
}
if (fi->myrank == orank)
{
MPI_Recv(
(void*)(buffer),
fi->slice_size,
MPI_CNUM,
irank,
ii0,
fi->comm,
MPI_STATUS_IGNORE);
}
}
if (fi->myrank == orank)
{
for (ii1 = 0; ii1 < fi->sizes[1]; ii1++)
{
if (ii1 <= fi->sizes[1]/2)
{
oi1 = ii1;
if (oi1 > fo->sizes[1]/2)
continue;
}
else
{
oi1 = ii1 + delta1;
if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2))
continue;
}
std::copy(
(rnumber*)(buffer + (ii1*fi->sizes[2]*howmany)),
(rnumber*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany),
(rnumber*)(ao +
((oi0 - fo->starts[0])*fo->sizes[1] +
oi1)*fo->sizes[2]*howmany));
}
}
}
fftw_free(buffer);
MPI_Barrier(fi->comm);
return EXIT_SUCCESS;
}
template <class rnumber>
int clip_zero_padding(
field_descriptor<rnumber> *f,
......@@ -157,47 +49,148 @@ int clip_zero_padding(
template <class rnumber>
int get_descriptors_3D(
int n0, int n1, int n2,
field_descriptor<rnumber> **fr,
field_descriptor<rnumber> **fc)
{
MPI_Datatype MPI_RNUM = (sizeof(rnumber) == 4) ? MPI_FLOAT : MPI_DOUBLE;
MPI_Datatype MPI_CNUM = (sizeof(rnumber) == 4) ? MPI_COMPLEX : MPI_C_DOUBLE_COMPLEX;
int ntmp[3];
ntmp[0] = n0;
ntmp[1] = n1;
ntmp[2] = n2;
*fr = new field_descriptor<rnumber>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD);
ntmp[0] = n0;
ntmp[1] = n1;
ntmp[2] = n2/2+1;
*fc = new field_descriptor<rnumber>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD);
return EXIT_SUCCESS;
}
#define FORCE_IMPLEMENTATION(rnumber) \
template \
int copy_complex_array<rnumber>( \
field_descriptor<rnumber> *fi, \
rnumber (*ai)[2], \
field_descriptor<rnumber> *fo, \
rnumber (*ao)[2], \
int howmany); \
#define TOOLS_IMPLEMENTATION(FFTW, R, C, MPI_RNUM, MPI_CNUM) \
template <> \
int copy_complex_array<R>( \
field_descriptor<R> *fi, \
R (*ai)[2], \
field_descriptor<R> *fo, \
R (*ao)[2], \
int howmany) \
{ \
FFTW(complex) *buffer; \
buffer = FFTW(alloc_complex)(fi->slice_size*howmany); \
\
template \
int clip_zero_padding<rnumber>( \
field_descriptor<rnumber> *f, \
rnumber *a, \
int howmany); \
int min_fast_dim; \
min_fast_dim = \
(fi->sizes[2] > fo->sizes[2]) ? \
fo->sizes[2] : fi->sizes[2]; \
\
template \
int get_descriptors_3D<rnumber>( \
/* clean up destination, in case we're padding with zeros
(even if only for one dimension) */ \
std::fill_n((R*)ao, fo->local_size*2, 0.0); \
\
int64_t ii0, ii1; \
int64_t oi0, oi1; \
int64_t delta1, delta0; \
int irank, orank; \
delta0 = (fo->sizes[0] - fi->sizes[0]); \
delta1 = (fo->sizes[1] - fi->sizes[1]); \
for (ii0=0; ii0 < fi->sizes[0]; ii0++) \
{ \
if (ii0 <= fi->sizes[0]/2) \
{ \
oi0 = ii0; \
if (oi0 > fo->sizes[0]/2) \
continue; \
} \
else \
{ \
oi0 = ii0 + delta0; \
if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) \
continue; \
} \
irank = fi->rank[ii0]; \
orank = fo->rank[oi0]; \
if ((irank == orank) && \
(irank == fi->myrank)) \
{ \
std::copy( \
(R*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), \
(R*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), \
(R*)buffer); \
} \
else \
{ \
if (fi->myrank == irank) \
{ \
MPI_Send( \
(void*)(ai + (ii0-fi->starts[0])*fi->slice_size), \
fi->slice_size, \
MPI_CNUM, \
orank, \
ii0, \
fi->comm); \
} \
if (fi->myrank == orank) \
{ \
MPI_Recv( \
(void*)(buffer), \
fi->slice_size, \
MPI_CNUM, \
irank, \
ii0, \
fi->comm, \
MPI_STATUS_IGNORE); \
} \
} \
if (fi->myrank == orank) \
{ \
for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) \
{ \
if (ii1 <= fi->sizes[1]/2) \
{ \
oi1 = ii1; \
if (oi1 > fo->sizes[1]/2) \
continue; \
} \
else \
{ \
oi1 = ii1 + delta1; \
if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) \
continue; \
} \
std::copy( \
(R*)(buffer + (ii1*fi->sizes[2]*howmany)), \
(R*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), \
(R*)(ao + \
((oi0 - fo->starts[0])*fo->sizes[1] + \
oi1)*fo->sizes[2]*howmany)); \
} \
} \
} \
fftw_free(buffer); \
MPI_Barrier(fi->comm); \
\
return EXIT_SUCCESS; \
} \
\
template <> \
int get_descriptors_3D<R>( \
int n0, int n1, int n2, \
field_descriptor<rnumber> **fr, \
field_descriptor<rnumber> **fc);
field_descriptor<R> **fr, \
field_descriptor<R> **fc) \
{ \
int ntmp[3]; \
ntmp[0] = n0; \
ntmp[1] = n1; \
ntmp[2] = n2; \
*fr = new field_descriptor<R>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD); \
ntmp[0] = n0; \
ntmp[1] = n1; \
ntmp[2] = n2/2+1; \
*fc = new field_descriptor<R>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD); \
return EXIT_SUCCESS; \
} \
\
template \
int clip_zero_padding<R>( \
field_descriptor<R> *f, \
R *a, \
int howmany); \
FORCE_IMPLEMENTATION(float)
FORCE_IMPLEMENTATION(double)
TOOLS_IMPLEMENTATION(
FFTW_MANGLE_FLOAT,
float,
fftwf_complex,
MPI_FLOAT,
MPI_COMPLEX)
TOOLS_IMPLEMENTATION(
FFTW_MANGLE_DOUBLE,
double,
fftw_complex,
MPI_DOUBLE,
MPI_C_DOUBLE_COMPLEX)
......@@ -40,3 +40,4 @@ vector_field<rnumber>& vector_field<rnumber>::operator*(rnumber factor)
}
template class vector_field<float>;
template class vector_field<double>;
......@@ -45,6 +45,13 @@ class fluid_particle_base(bfps.code):
self.dtype = np.float32
elif dtype == 'double':
self.dtype = np.float64
self.rtype = self.dtype
if self.rtype == np.float32:
self.ctype = np.complex64
self.C_dtype = 'float'
elif self.rtype == np.float64:
self.ctype = np.complex128
self.C_dtype = 'double'
self.parameters['dkx'] = 1.0
self.parameters['dky'] = 1.0
self.parameters['dkz'] = 1.0
......
......@@ -45,13 +45,9 @@ class fluid_converter(bfps.fluid_base.fluid_particle_base):
self.finalize_code()
return None
def fill_up_fluid_code(self):
if self.dtype == np.float32:
C_dtype = 'float'
else:
C_dtype = 'double'
self.fluid_includes += '#include <cstring>\n'
self.fluid_variables += ('double t;\n' +
'fluid_solver<{0}> *fs;\n').format(C_dtype)
'fluid_solver<{0}> *fs;\n').format(self.C_dtype)
self.fluid_definitions += """
//begincpp
void do_conversion(fluid_solver<{0}> *bla)
......@@ -63,7 +59,7 @@ class fluid_converter(bfps.fluid_base.fluid_particle_base):
bla->write('v', 'r');
}}
//endcpp
""".format(C_dtype)
""".format(self.C_dtype)
self.fluid_start += """
//begincpp
fs = new fluid_solver<{0}>(
......@@ -73,7 +69,7 @@ class fluid_converter(bfps.fluid_base.fluid_particle_base):
fs->iteration = iteration;
do_conversion(fs);
//endcpp
""".format(C_dtype)
""".format(self.C_dtype)
self.fluid_loop += """
//begincpp
fs->iteration++;
......
......@@ -73,9 +73,9 @@ class fluid_resize(bfps.fluid_base.fluid_particle_base):
a = 0.5*fs0->correl_vec(fs0->cvelocity, fs0->cvelocity);
b = 0.5*fs0->correl_vec(fs0->cvorticity, fs0->cvorticity);
DEBUG_MSG("old field %d %g %g\\n", fs0->iteration, a, b);
copy_complex_array(fs0->cd, fs0->cvorticity,
fs1->cd, fs1->cvorticity,
3);
copy_complex_array<{0}>(fs0->cd, fs0->cvorticity,
fs1->cd, fs1->cvorticity,
3);
fs1->write('v', 'c');
a = 0.5*fs1->correl_vec(fs1->cvelocity, fs1->cvelocity);
b = 0.5*fs1->correl_vec(fs1->cvorticity, fs1->cvorticity);
......
......@@ -9,6 +9,6 @@ changedir =
{envtmpdir}
commands =
cp {toxinidir}/test.py {envtmpdir}
python test.py -n 32 --run --initialize --ncpu 2 --convergence --nsteps 16 --precision single --wd "data/single"
# python test.py -n 32 --run --initialize --ncpu 2 --convergence --nsteps 16 --precision single --wd "data/single"
python test.py -n 32 --run --initialize --ncpu 2 --convergence --nsteps 16 --precision double --wd "data/double"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment