diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 9eeef6324f598352c3beb80e418a2e1cbcaaa8cf..172cc54e36408715a149811152fd38a471681bf3 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -915,19 +915,19 @@ template void pass8 (size_t ido, size_t l1, for (size_t k=0; k(a6); ROTX90(a7); - PMINPLACE(a0,a2); PMINPLACE(a1,a3); - PMINPLACE(a4,a6); + ROTX90(a3); PMINPLACE(a5,a7); ROTX45(a5); - ROTX90(a3); ROTX135(a7); + PMC(a0,a4,CC(0,0,k),CC(0,4,k)); + PMC(a2,a6,CC(0,2,k),CC(0,6,k)); + ROTX90(a6); + PMINPLACE(a0,a2); + PMINPLACE(a4,a6); PMC(CH(0,k,0),CH(0,k,4),a0,a1); PMC(CH(0,k,1),CH(0,k,5),a4,a5); PMC(CH(0,k,2),CH(0,k,6),a2,a3); @@ -937,19 +937,19 @@ template void pass8 (size_t ido, size_t l1, for (size_t k=0; k(a6); ROTX90(a7); - PMINPLACE(a0,a2); PMINPLACE(a1,a3); - PMINPLACE(a4,a6); + ROTX90(a3); PMINPLACE(a5,a7); ROTX45(a5); - ROTX90(a3); ROTX135(a7); + PMC(a0,a4,CC(0,0,k),CC(0,4,k)); + PMC(a2,a6,CC(0,2,k),CC(0,6,k)); + ROTX90(a6); + PMINPLACE(a0,a2); + PMINPLACE(a4,a6); PMC(CH(0,k,0),CH(0,k,4),a0,a1); PMC(CH(0,k,1),CH(0,k,5),a4,a5); PMC(CH(0,k,2),CH(0,k,6),a2,a3); @@ -958,30 +958,30 @@ template void pass8 (size_t ido, size_t l1, for (size_t i=1; i(a6); ROTX90(a7); - PMINPLACE(a0,a2); PMINPLACE(a1,a3); - PMINPLACE(a4,a6); + ROTX90(a3); PMINPLACE(a5,a7); ROTX45(a5); - ROTX90(a3); ROTX135(a7); + PMC(a0,a4,CC(i,0,k),CC(i,4,k)); + PMC(a2,a6,CC(i,2,k),CC(i,6,k)); + ROTX90(a6); + PMINPLACE(a0,a2); + PMINPLACE(a4,a6); PMINPLACE(a0,a1); + CH(i,k,0) = a0; + CH(i,k,4) = a1.template special_mul(WA(3,i)); PMINPLACE(a2,a3); + CH(i,k,2) = a2.template special_mul(WA(1,i)); + CH(i,k,6) = a3.template special_mul(WA(5,i)); PMINPLACE(a4,a5); - PMINPLACE(a6,a7); - CH(i,k,0) = a0; CH(i,k,1) = a4.template special_mul(WA(0,i)); - CH(i,k,2) = a2.template special_mul(WA(1,i)); - CH(i,k,3) = a6.template special_mul(WA(2,i)); - CH(i,k,4) = a1.template special_mul(WA(3,i)); CH(i,k,5) = a5.template special_mul(WA(4,i)); - CH(i,k,6) = a3.template special_mul(WA(5,i)); + PMINPLACE(a6,a7); + CH(i,k,3) = a6.template special_mul(WA(2,i)); CH(i,k,7) = a7.template special_mul(WA(6,i)); } }