Skip to content
Snippets Groups Projects
Commit 8d1ccf1e authored by Berenger Bramas's avatar Berenger Bramas
Browse files

update the parallel loops to split the second dimension

parent 54c34e5f
No related branches found
No related tags found
2 merge requests!21Bugfix/nansampling,!3Bugfix/event manager show html
......@@ -204,11 +204,11 @@ class field
case FFTW:
#pragma omp parallel
{
const hsize_t start = OmpUtils::ForIntervalStart(this->rlayout->subsizes[0]);
const hsize_t end = OmpUtils::ForIntervalEnd(this->rlayout->subsizes[0]);
const hsize_t start = OmpUtils::ForIntervalStart(this->rlayout->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(this->rlayout->subsizes[1]);
for (hsize_t zindex = start; zindex < end; zindex++)
for (hsize_t yindex = 0; yindex < this->rlayout->subsizes[1]; yindex++)
for (hsize_t zindex = 0; zindex < this->rlayout->subsizes[0]; zindex++)
for (hsize_t yindex = start; yindex < end; yindex++)
{
ptrdiff_t rindex = (
zindex * this->rlayout->subsizes[1] + yindex)*(
......
......@@ -163,12 +163,13 @@ void CLOOP_NXMODES(ObjectType* obj, FuncType expression)
TIMEZONE("CLOOP_NXMODES");
#pragma omp parallel
{
const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]);
for (ptrdiff_t yindex = start; yindex < end; yindex++){
ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2];
for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++)
const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
for (ptrdiff_t zindex = start; zindex < end; zindex++)
{
ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
+ zindex*obj->cd->subsizes[2];
int nxmodes = 1;
ptrdiff_t xindex = 0;
expression();
......@@ -191,11 +192,12 @@ void CLOOP_K2(ObjectType* obj, FuncType expression)
TIMEZONE("CLOOP_K2");
#pragma omp parallel
{
const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]);
for (ptrdiff_t yindex = start; yindex < end; yindex++){
ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2];
for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++)
const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
for (ptrdiff_t zindex = start; zindex < end; zindex++){
ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
+ zindex*obj->cd->subsizes[2];
for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++)
{
double k2 = (obj->kx[xindex]*obj->kx[xindex] +
......@@ -207,6 +209,7 @@ void CLOOP_K2(ObjectType* obj, FuncType expression)
}
}
}
}
template <class ObjectType, class FuncType>
......@@ -214,12 +217,13 @@ void CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression)
{
#pragma omp parallel
{
const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]);
for (ptrdiff_t yindex = start; yindex < end; yindex++){
ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2];
for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++)
const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
for (ptrdiff_t zindex = start; zindex < end; zindex++)
{
ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
+ zindex*obj->cd->subsizes[2];
int nxmodes = 1;
ptrdiff_t xindex = 0;
double k2 = (obj->kx[xindex]*obj->kx[xindex] +
......@@ -247,10 +251,10 @@ void RLOOP(ObjectType* obj, FuncType expression)
{
#pragma omp parallel
{
const hsize_t start = OmpUtils::ForIntervalStart(obj->rd->subsizes[0]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->rd->subsizes[0]);
for (int zindex = start; zindex < end ; zindex++)
for (int yindex = 0; yindex < obj->rd->subsizes[1]; yindex++)
const hsize_t start = OmpUtils::ForIntervalStart(obj->rd->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(obj->rd->subsizes[1]);
for (int zindex = 0; zindex < obj->rd->subsizes[0] ; zindex++)
for (int yindex = start; yindex < end; yindex++)
{
ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2);
for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++)
......
......@@ -91,12 +91,13 @@ class kspace
{
#pragma omp parallel
{
const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[0]);
const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[0]);
const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
for (hsize_t yindex = start; yindex < end; yindex++){
ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2];
for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++){
for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
for (hsize_t zindex = start; zindex < end; zindex++){
ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2]
+ zindex*this->layout->subsizes[2];
for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
{
expression(cindex, xindex, yindex, zindex);
......@@ -111,15 +112,13 @@ class kspace
{
#pragma omp parallel
{
const double chunk = double(this->layout->subsizes[0])/double(omp_get_num_threads());
const hsize_t start = hsize_t(chunk*double(omp_get_thread_num()));
const hsize_t end = (omp_get_thread_num() == omp_get_num_threads()-1) ?
this->layout->subsizes[0]:
hsize_t(chunk*double(omp_get_thread_num()+1));
for (hsize_t yindex = start; yindex < end; yindex++){
ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2];
for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++){
const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
for (hsize_t zindex = start; zindex < end; zindex++){
ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2]
+ zindex*this->layout->subsizes[2];
for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
{
double k2 = (this->kx[xindex]*this->kx[xindex] +
......@@ -137,15 +136,13 @@ class kspace
{
#pragma omp parallel
{
const double chunk = double(this->layout->subsizes[0])/double(omp_get_num_threads());
const hsize_t start = hsize_t(chunk*double(omp_get_thread_num()));
const hsize_t end = (omp_get_thread_num() == omp_get_num_threads()-1) ?
this->layout->subsizes[0]:
hsize_t(chunk*double(omp_get_thread_num()+1));
for (hsize_t yindex = start; yindex < end; yindex++){
ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2];
for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++){
const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
for (hsize_t zindex = start; zindex < end; zindex++){
ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2]
+ zindex*this->layout->subsizes[2];
hsize_t xindex = 0;
double k2 = (
this->kx[xindex]*this->kx[xindex] +
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment