diff --git a/bfps/_code.py b/bfps/_code.py index e49d14c5cef09d8d93a202e73022078babd6a33a..3fa4ba2e6b207c5914b232f2dd0fda0123ca0236 100644 --- a/bfps/_code.py +++ b/bfps/_code.py @@ -70,11 +70,19 @@ class _code(_base): //begincpp int main(int argc, char *argv[]) { - MPI_Init(&argc, &argv); + int mpiprovided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &mpiprovided); + assert(mpiprovided >= MPI_THREAD_FUNNELED); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + fftw_init_threads(); + fftwf_init_threads(); fftw_mpi_init(); fftwf_mpi_init(); + const int nbThreads = (getenv("FFTW_NUM_THREADS")?atoi(getenv("FFTW_NUM_THREADS")):32); + DEBUG_MSG("Number of threads for the FFTW = %d\\n", nbThreads); + std::cout << "There are " << nprocs << " processes and " << nbThreads << " threads" << std::endl; + fftw_plan_with_nthreads(nbThreads); if (argc != 2) { std::cerr << "Wrong number of command line arguments. Stopping." << std::endl; @@ -154,6 +162,8 @@ class _code(_base): '{0}\n'.format(bfps.dist_loc)) libraries = ['bfps'] libraries += bfps.install_info['libraries'] + libraries += ['fftw3_threads'] + libraries += ['fftw3f_threads'] command_strings = [bfps.install_info['compiler']] command_strings += [self.name + '.cpp', '-o', self.name] @@ -465,8 +475,8 @@ class _code(_base): assert(nprocesses % self.host_info['deltanprocs'] == 0) tasks_per_node = self.host_info['deltanprocs'] script_file.write('#SBATCH --nodes={0}\n'.format(nodes)) - script_file.write('#SBATCH --ntasks-per-node={0}\n'.format(tasks_per_node)) - script_file.write('#SBATCH --ntasks-per-core=1\n') + script_file.write('#SBATCH --ntasks-per-node={0}\n'.format(1)) # tasks_per_node + script_file.write('#SBATCH --cpus-per-task={0}\n'.format(self.host_info['deltanprocs'])) script_file.write('#SBATCH --mail-type=none\n') script_file.write('#SBATCH --time={0}:{1:0>2d}:00\n'.format(hours, minutes)) script_file.write('LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:' +