diff --git a/src/Morton_shuffler.cpp b/src/Morton_shuffler.cpp index 11f40c595c222fe87eb84e32275e92b099669423..7de49c5029d46735e798a7a83c75fe0d9f3f951a 100644 --- a/src/Morton_shuffler.cpp +++ b/src/Morton_shuffler.cpp @@ -8,7 +8,8 @@ Morton_shuffler::Morton_shuffler( int nfiles) { this->d = d; - if (nprocs % nfiles != 0) + if ((nprocs % nfiles != 0) && + (nfiles % nprocs != 0)) { std::cerr << "Number of output files incompatible with number of processes.\n" @@ -44,6 +45,13 @@ Morton_shuffler::Morton_shuffler( //set up output file descriptor int out_rank, out_nprocs; out_nprocs = nprocs/nfiles; + if (out_nprocs == 0) + { + out_nprocs = 1; + this->files_per_proc = nfiles / nprocs; + } + else + this->files_per_proc = 1; this->out_group = myrank / out_nprocs; out_rank = myrank % out_nprocs; n[0] = ((N0/8) * (N1/8) * (N2/8)) / nfiles; @@ -128,11 +136,16 @@ int Morton_shuffler::shuffle( fftwf_free(rz); char temp_char[200]; - sprintf(temp_char, - "%s_z%.7x", - base_fname, - this->out_group*this->doutput->sizes[0]); - this->doutput->write(temp_char, rtmp); + for (int fcounter = 0; fcounter < this->files_per_proc; fcounter++) + { + sprintf(temp_char, + "%s_z%.7x", + base_fname, + (this->files_per_proc*this->out_group + fcounter)*this->doutput->sizes[0]); + this->doutput->write( + temp_char, + rtmp + fcounter*this->doutput->local_size); + } fftwf_free(rtmp); return EXIT_SUCCESS; } diff --git a/src/Morton_shuffler.hpp b/src/Morton_shuffler.hpp index 913f0d847c26bd910d3d13a9656807b7eef00ae8..08825cbd3f3267dae910556d75b7251fc219e5f8 100644 --- a/src/Morton_shuffler.hpp +++ b/src/Morton_shuffler.hpp @@ -61,7 +61,7 @@ class Morton_shuffler // communicator to use for output MPI_Comm out_communicator; - int out_group; + int out_group, files_per_proc; /* methods */ Morton_shuffler( diff --git a/test3.ipynb b/test3.ipynb index 5fa0f637dcd6778535c67ea6cdf3de16f3877d7a..23ff5b95bbfb2db00076617ccefd8cdc65aa4857 100644 --- a/test3.ipynb +++ b/test3.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:e744ea7dc72564d6f526a0a73b97b6f43c1ccd7d5c6c2767d8b6c5a5ec7f4487" + "signature": "sha256:bb9dddd64d9a5ac46de7f7d5ba2abecb7dd4bb75c76a6845ace376759bd57f78" }, "nbformat": 3, "nbformat_minor": 0, @@ -63,8 +63,16 @@ ], "language": "python", "metadata": {}, - "outputs": [], - "prompt_number": 14 + "outputs": [ + { + "output_type": "stream", + "stream": "stderr", + "text": [ + "-c:15: RuntimeWarning: divide by zero encountered in true_divide\n" + ] + } + ], + "prompt_number": 2 }, { "cell_type": "code", @@ -148,13 +156,15 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 15 + "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ - "def compute_cpp_data(branch = None):\n", + "def compute_cpp_data(\n", + " branch = None,\n", + " nfiles = 16):\n", " if not (type(branch) == type(None)):\n", " subprocess.call(['git', 'checkout', branch])\n", " if subprocess.call(['make', 'full.elf']) == 0:\n", @@ -164,39 +174,41 @@ " 'time',\n", " 'mpirun.mpich',\n", " '-np',\n", - " '32',\n", + " '8',\n", " './full.elf',\n", " '{0}'.format(n),\n", " '{0}'.format(N),\n", - " '2',\n", + " '{0}'.format(nfiles),\n", " '3'])\n", " else:\n", " print ('compilation error')\n", " return None\n", " \n", - "def get_cpp_data(branch = None, run = True):\n", + "def get_cpp_data(\n", + " branch = None,\n", + " run = True,\n", + " nfiles = 16):\n", " if run:\n", " subprocess.call(['rm',\n", " 'Rdata_z{0:0>7x}'.format(0),\n", " 'Rdata_z{0:0>7x}'.format(Rdata_py.shape[0]//2)])\n", - " compute_cpp_data(branch)\n", - " Rdata0 = np.fromfile(\n", - " 'Rdata_z{0:0>7x}'.format(0),\n", - " dtype = np.float32).reshape(-1, 8, 8, 8, 3)\n", - " Rdata1 = np.fromfile(\n", - " 'Rdata_z{0:0>7x}'.format(Rdata_py.shape[0]//2),\n", - " dtype = np.float32).reshape(-1, 8, 8, 8, 3)\n", - " return np.concatenate([Rdata0, Rdata1])\n", + " compute_cpp_data(branch, nfiles = nfiles)\n", + " Rdata = []\n", + " for nf in range(nfiles):\n", + " Rdata.append(np.fromfile(\n", + " 'Rdata_z{0:0>7x}'.format(nf*Rdata_py.shape[0]//nfiles),\n", + " dtype = np.float32).reshape(-1, 8, 8, 8, 3))\n", + " return np.concatenate(Rdata)\n", "\n", "#Rdata = get_cpp_data(branch = 'develop')\n", "# develop says 30 secs, inplace fft is 28 secs\n", "#Rdata = get_cpp_data(branch = 'feature-inplace_fft')\n", - "Rdata = get_cpp_data(run = True)" + "Rdata = get_cpp_data(run = True, nfiles = 8)" ], "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 16 + "prompt_number": 8 }, { "cell_type": "code", @@ -237,7 +249,7 @@ ] } ], - "prompt_number": 17 + "prompt_number": 9 }, { "cell_type": "code",