diff --git a/bfps/_code.py b/bfps/_code.py index ec1557d2f68ff7bad58abbf629fcdd7a21d8f341..aff52a6e82a5bce403ee0168f89d976d11467d1f 100644 --- a/bfps/_code.py +++ b/bfps/_code.py @@ -353,6 +353,7 @@ class _code(_base): '\n') script_file.write('echo "Start time is `date`"\n') script_file.write('cd ' + self.work_dir + '\n') + script_file.write('cp -s ../*.h5 ./\n') script_file.write('poe ' + os.path.join( self.work_dir, @@ -423,6 +424,7 @@ class _code(_base): '\n') script_file.write('echo "This is step $LOADL_STEP_ID out of {0}"\n'.format(njobs)) script_file.write('echo "Start time is `date`"\n') + script_file.write('cp -s ../*.h5 ./\n') script_file.write('cd ' + self.work_dir + '\n') script_file.write('poe ' + os.path.join( diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index aef3cb840c86b419cf834c76f4d917b2d87df60e..0d065cede5ba61519b3cfa4ddb854e9bfb3c95ad 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -774,6 +774,8 @@ kspace<be, dt>::kspace( std::fill_n(nshell_local, this->nshells, 0); }); + std::vector<std::unordered_map<int, double>> dealias_filter_threaded(omp_get_max_threads()); + KSPACE_CLOOP_K2_NXMODES( this,[&](ptrdiff_t /*cindex*/, hsize_t /*yindex*/, hsize_t /*zindex*/, int nxmodes, hsize_t /*xindex*/, double k2){ if (k2 < this->kM2) @@ -784,10 +786,16 @@ kspace<be, dt>::kspace( } if (dt == TWO_THIRDS){ // Should not be any race condition here it is a "write" - this->dealias_filter[int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); + dealias_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); } }); + for(int idxMerge = 0 ; idxMerge < int(dealias_filter_threaded.size()) ; ++idxMerge){ + for(const auto kv : dealias_filter_threaded[idxMerge]){ + this->dealias_filter[kv.first] = kv.second; + } + } + nshell_local_threaded.mergeParallel(); kshell_local_threaded.mergeParallel(); diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp index b50c7c8ce2d1be4949e3307a44566cc4b09c63cd..393f086d02676c0faf3d047cc938b4befea64b93 100644 --- a/bfps/cpp/fluid_solver_base.cpp +++ b/bfps/cpp/fluid_solver_base.cpp @@ -513,6 +513,8 @@ fluid_solver_base<rnumber>::fluid_solver_base( int64_t *nshell_local = new int64_t[this->nshells]; std::fill_n(nshell_local, this->nshells, 0.0); + std::vector<std::unordered_map<int, double>> Fourier_filter_threaded(omp_get_max_threads()); + CLOOP_K2_NXMODES( this, @@ -524,9 +526,16 @@ fluid_solver_base<rnumber>::fluid_solver_base( nshell_local[int(knorm/this->dk)] += nxmodes; kshell_local[int(knorm/this->dk)] += nxmodes*knorm; } - this->Fourier_filter[int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));} + Fourier_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));} ); + // Merge results + for(int idxMerge = 0 ; idxMerge < int(Fourier_filter_threaded.size()) ; ++idxMerge){ + for(const auto kv : Fourier_filter_threaded[idxMerge]){ + this->Fourier_filter[kv.first] = kv.second; + } + } + MPI_Allreduce( (void*)(nshell_local), (void*)(this->nshell), diff --git a/bfps/cpp/scope_timer.hpp b/bfps/cpp/scope_timer.hpp index 5fd6578c9e9f593191a44049c4f425448a53b21a..76fc69f16f1772aca3242756fcbaf58acf0eed29 100644 --- a/bfps/cpp/scope_timer.hpp +++ b/bfps/cpp/scope_timer.hpp @@ -37,7 +37,7 @@ #include <mpi.h> #include <cstring> #include <stdexcept> - +#include <fstream> #include "base.hpp" #include "bfps_timer.hpp" @@ -185,10 +185,10 @@ public: retMpi = MPI_Comm_size( inComm, &nbProcess); assert(retMpi == MPI_SUCCESS); - if((&outputStream == &std::cout || &outputStream == &std::clog) && myrank != nbProcess-1){ + if((&outputStream == &std::cout || &outputStream == &std::clog) && myRank != nbProcess-1){ // Print in reverse order char tmp; - retMpi = MPI_Recv(&tmp, 1, MPI_BYTE, myrank+1, 99, inComm, MPI_STATUS_IGNORE); + retMpi = MPI_Recv(&tmp, 1, MPI_BYTE, myRank+1, 99, inComm, MPI_STATUS_IGNORE); assert(retMpi == MPI_SUCCESS); } outputStream.flush(); @@ -230,10 +230,10 @@ public: } outputStream.flush(); - if((&outputStream == &std::cout || &outputStream == &std::clog) && myrank != 0){ + if((&outputStream == &std::cout || &outputStream == &std::clog) && myRank != 0){ // Print in reverse order char tmp; - retMpi = MPI_Send(&tmp, 1, MPI_BYTE, myrank-1, 99, inComm); + retMpi = MPI_Send(&tmp, 1, MPI_BYTE, myRank-1, 99, inComm); assert(retMpi == MPI_SUCCESS); } } @@ -283,7 +283,7 @@ public: } } - if(myrank != 0){ + if(myRank != 0){ const std::string strOutput = myResults.str(); int sizeOutput = strOutput.length(); retMpi = MPI_Send(&sizeOutput, 1, MPI_INT, 0, 99, inComm); @@ -444,6 +444,153 @@ public: outputStream.flush(); } + void showHtml(const MPI_Comm inComm) const { + int myRank, nbProcess; + int retMpi = MPI_Comm_rank( inComm, &myRank); + assert(retMpi == MPI_SUCCESS); + retMpi = MPI_Comm_size( inComm, &nbProcess); + assert(retMpi == MPI_SUCCESS); + + std::stringstream myResults; + + std::stack<std::pair<int, const std::shared_ptr<CoreEvent>>> events; + + for (int idx = static_cast<int>(root->getChildren().size()) - 1; idx >= 0; --idx) { + events.push({0, root->getChildren()[idx]}); + } + + myResults << "<h1>Process : " << myRank << "</h1>\n"; + + double totalDuration = 0; + for (int idx = + static_cast<int>(root->getChildren().size()) - 1; + idx >= 0; --idx) { + totalDuration += root->getChildren()[idx]->getDuration(); + } + + myResults << "<h2> " << root->getName() << " (" << totalDuration << "s)</h2>\n"; + myResults << "<ul>\n"; + int idxBox = myRank*100000; + + while (events.size()) { + const std::pair<int, const std::shared_ptr<CoreEvent>> eventToShow = + events.top(); + events.pop(); + + if(eventToShow.first == -1){ + myResults << "</ul>\n"; + myResults << "</li>\n"; + } + else if(eventToShow.second->getChildren().size() == 0){ + myResults << "<li>● <span title=\""; + if (eventToShow.second->getOccurrence() != 1) { + myResults << "Min = " << eventToShow.second->getMin() << "s ; Max = " << eventToShow.second->getMax() + << "s ; Average = " << eventToShow.second->getAverage() << "s ; Occurrence = " + << eventToShow.second->getOccurrence(); + } + myResults << "\">" << eventToShow.second->getName(); + myResults << " (" << 100*eventToShow.second->getDuration()/totalDuration << "% -- " ; + myResults << eventToShow.second->getDuration() <<"s)</span></li>\n"; + } + else{ + myResults << "<li><input type=\"checkbox\" id=\"c" << idxBox << "\" />\n"; + myResults << " <i class=\"fa fa-angle-double-right\">→ </i>\n"; + myResults << " <i class=\"fa fa-angle-double-down\">↓ </i>\n"; + myResults << " <label for=\"c" << idxBox++ << "\"><span title=\""; + if (eventToShow.second->getOccurrence() != 1) { + myResults << "Min = " << eventToShow.second->getMin() << "s ; Max = " << eventToShow.second->getMax() + << "s ; Average = " << eventToShow.second->getAverage() << "s ; Occurrence = " + << eventToShow.second->getOccurrence(); + } + myResults << "\">" << eventToShow.second->getName(); + myResults << " (" << 100*eventToShow.second->getDuration()/totalDuration << "% -- " ; + myResults << eventToShow.second->getDuration() <<"s)</span></label>\n"; + myResults << "<ul>\n"; + events.push({-1, std::shared_ptr<CoreEvent>()}); + + for (int idx = + static_cast<int>(eventToShow.second->getChildren().size()) - 1; + idx >= 0; --idx) { + events.push( + {eventToShow.first + 1, eventToShow.second->getChildren()[idx]}); + } + } + } + + myResults << "</ul>\n"; + + if(myRank != 0){ + const std::string strOutput = myResults.str(); + int sizeOutput = strOutput.length(); + retMpi = MPI_Send(&sizeOutput, 1, MPI_INT, 0, 99, inComm); + assert(retMpi == MPI_SUCCESS); + retMpi = MPI_Send((void*)strOutput.data(), sizeOutput, MPI_CHAR, 0, 100, inComm); + assert(retMpi == MPI_SUCCESS); + } + else{ + const std::string htmlOutput = (getenv("HTMLOUTPUT")?getenv("HTMLOUTPUT"):"timings.html"); + + std::cout << "Timing output html set to : " << htmlOutput << std::endl; + + std::ofstream htmlfile(htmlOutput); + + htmlfile << "<html>\ + <head>\ + <style>\ + input {\ + display: none;\ + }\ + input ~ ul {\ + display: none;\ + }\ + input:checked ~ ul {\ + display: block;\ + }\ + input ~ .fa-angle-double-down {\ + display: none;\ + }\ + input:checked ~ .fa-angle-double-right {\ + display: none;\ + }\ + input:checked ~ .fa-angle-double-down {\ + display: inline;\ + }\ + li {\ + display: block;\ + font-family: 'Arial';\ + font-size: 15px;\ + padding: 0.2em;\ + border: 1px solid transparent;\ + }\ + li:hover {\ + border: 1px solid grey;\ + border-radius: 3px;\ + background-color: lightgrey;\ + }\ + span:hover {\ + color: blue;\ + }\ + </style>\ + </head>\ + <body>"; + + std::vector<char> buffer; + for(int idxProc = nbProcess-1 ; idxProc > 0 ; --idxProc){ + int sizeRecv; + retMpi = MPI_Recv(&sizeRecv, 1, MPI_INT, idxProc, 99, inComm, MPI_STATUS_IGNORE); + assert(retMpi == MPI_SUCCESS); + buffer.resize(sizeRecv+1); + retMpi = MPI_Recv(buffer.data(), sizeRecv, MPI_CHAR, idxProc, 100, inComm, MPI_STATUS_IGNORE); + assert(retMpi == MPI_SUCCESS); + buffer[sizeRecv]='\0'; + htmlfile << buffer.data(); + } + htmlfile << myResults.str(); + htmlfile << "</body>\ + </html>"; + } + } + friend scope_timer; };