Commit 5d50990d authored by Martin Reinecke's avatar Martin Reinecke
Browse files

some transpose and 4-step FFT experiments

parent 04d17c37
Pipeline #79483 passed with stages
in 17 minutes and 2 seconds
......@@ -203,7 +203,21 @@ py::array py_transpose(const py::array &in, py::array &out)
MR_fail("unsupported datatype");
}
py::array twiddle(py::array &in, bool forward)
{
auto in2 = to_mav<complex<double>,2>(in, true);
size_t s1=in2.shape(0), s2 = in2.shape(1);
UnityRoots<double,complex<double>> roots(s1*s2);
if (forward)
for (size_t i=0; i<s1; ++i)
for (size_t j=0; j<s2; ++j)
in2.v(i,j) *= conj(roots[i*j]);
else
for (size_t i=0; i<s1; ++i)
for (size_t j=0; j<s2; ++j)
in2.v(i,j) *= roots[i*j];
return in;
}
const char *misc_DS = R"""(
Various unsorted utilities
)""";
......@@ -225,6 +239,7 @@ void add_misc(py::module &msup)
m.def("ascontiguousarray",&py_ascontiguousarray, "in"_a);
m.def("transpose",&py_transpose, "in"_a, "out"_a);
m.def("twiddle",&twiddle, "in"_a, "forward"_a);
}
}
......
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2020 Max-Planck-Society
import ducc0.misc
import numpy as np
import pytest
import time
from numpy.testing import assert_
transpose = ducc0.misc.transpose
pmp = pytest.mark.parametrize
def transpose_classic(arr, axes):
return np.ascontiguousarray(np.transpose(arr, axes))
def transpose_new(arr, axes):
axinv = np.argsort(axes)
b = np.empty(np.array(arr.shape)[axes],dtype=arr.dtype)
b = np.transpose(b, axinv)
return np.transpose(transpose(arr,b),axes)
def test1():
rng = np.random.default_rng(42)
for i in range(1000):
ndim = rng.integers(2, 3)
axlen = max(500,int((2**20)**(1./ndim)))
shape = rng.integers(2, axlen, ndim)
axes = np.arange(ndim)
rng.shuffle(axes)
if np.all(axes == np.arange(ndim)):
continue
a = rng.random(shape)-0.5
t0 = time.time()
c = transpose_classic(a, axes)
ta = time.time()-t0
t0 = time.time()
b = transpose_new(a,axes)
tb = time.time()-t0
assert(c.shape==b.shape)
assert(c.strides==b.strides)
assert(np.all(b==c))
if __name__ == "__main__":
test1()
......@@ -122,11 +122,12 @@ template<typename T, typename Func> void sthelper2(const T * DUCC0_RESTRICT in,
return;
}
// OK, we have to do a real transpose
// select blockig sizes depending on critical strides
// select blocking sizes depending on critical strides
bool crit0 = critical(sizeof(T)*sti0) || critical(sizeof(T)*sto0);
bool crit1 = critical(sizeof(T)*sti1) || critical(sizeof(T)*sto1);
size_t bs0 = crit0 ? 8 : 8;
size_t bs1 = crit1 ? 8 : 8;
constexpr size_t bsnorm=64, bscrit=8;
size_t bs0 = crit0 ? bscrit : bsnorm;
size_t bs1 = crit1 ? bscrit : bsnorm;
// make sure that the smallest absolute stride goes in the innermost loop
if (min(abs(sti0),abs(sto0))<min(abs(sti1),abs(sto1)))
{
......@@ -142,11 +143,38 @@ template<typename T, typename Func> void sthelper2(const T * DUCC0_RESTRICT in,
{
size_t ii1e = min(s1, ii1+bs1);
for (size_t i0=ii0; i0<ii0e; ++i0)
{
if (ii1e==ii1+bsnorm)
{
if (sto1==1)
for (size_t i1=ii1; i1<ii1+bsnorm; ++i1)
func(in[i0*sti0+i1*sti1], out[i0*sto0+i1]);
else if (sti1==1)
for (size_t i1=ii1; i1<ii1+bsnorm; ++i1)
func(in[i0*sti0+i1], out[i0*sto0+i1*sto1]);
else
for (size_t i1=ii1; i1<ii1+bsnorm; ++i1)
func(in[i0*sti0+i1*sti1], out[i0*sto0+i1*sto1]);
}
else if (ii1e==ii1+bscrit)
{
if (sto1==1)
for (size_t i1=ii1; i1<ii1+bscrit; ++i1)
func(in[i0*sti0+i1*sti1], out[i0*sto0+i1]);
else if (sti1==1)
for (size_t i1=ii1; i1<ii1+bscrit; ++i1)
func(in[i0*sti0+i1], out[i0*sto0+i1*sto1]);
else
for (size_t i1=ii1; i1<ii1+bscrit; ++i1)
func(in[i0*sti0+i1*sti1], out[i0*sto0+i1*sto1]);
}
else
for (size_t i1=ii1; i1<ii1e; ++i1)
func(in[i0*sti0+i1*sti1], out[i0*sto0+i1*sto1]);
}
}
}
}
template<typename T, typename Func> void iter(const fmav<T> &in,
fmav<T> &out, size_t dim, ptrdiff_t idxin, ptrdiff_t idxout, Func func)
......
import ducc0.fft
import ducc0.misc
import numpy as np
import time
def _l2error(a, b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
def reference(arr):
return ducc0.fft.c2c(arr)
#def get_tw(f1,f2):
# a1 =
# works
def fourstep_1w(arr, f1):
assert(arr.ndim==1)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = arr.copy().reshape((f1, f2))
tx = time.time()
t1 = ducc0.fft.c2c(t1, out=t1, axes=(0,))
print("xx",time.time()-tx)
tx = time.time()
t1 = ducc0.misc.twiddle(t1,True)
print("xx",time.time()-tx)
# rngj = np.arange(f2).astype(np.float64)
# tw0 = -2*np.pi*1j/(f1*f2)
# for i in range(f1):
# t1[i] *= np.exp((tw0*i)*rngj)
tx = time.time()
t1 = t1.T
t1 = ducc0.fft.c2c(t1, axes=(0,))
print("xx",time.time()-tx)
return t1.reshape((-1,))
def fourstep_1(arr, f1):
assert(arr.ndim==1)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = arr.reshape((f1, f2))
tx = time.time()
t1 = ducc0.fft.c2c(t1, axes=(0,))
print("xx",time.time()-tx)
tx = time.time()
t1 = ducc0.misc.twiddle(t1,True)
print("xx",time.time()-tx)
# rngj = np.arange(f2).astype(np.float64)
# tw0 = -2*np.pi*1j/(f1*f2)
# for i in range(f1):
# t1[i] *= np.exp((tw0*i)*rngj)
tx = time.time()
t2 = np.empty((f2,f1),dtype=arr.dtype)
t2 = ducc0.fft.c2c(t1, out=t2.T, axes=(1,))
print("xx",time.time()-tx)
print(t2.strides)
return t2.T.reshape((-1,))
def fourstep_2(arr, f1):
assert(arr.ndim==1)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = arr.copy().reshape((f1, f2))
t1 = ducc0.fft.c2c(t1, axes=(0,))
t1 = ducc0.misc.twiddle(t1,True)
t1 = ducc0.fft.c2c(t1, axes=(1,))
t1 = t1.T
return t1.reshape((-1,))
def fourstep_3(arr, f1):
assert(arr.ndim==1)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = ducc0.fft.c2c(arr.reshape((f1, f2)), axes=(0,))
rngj = np.arange(f2).astype(np.float64)
tw0 = -2*np.pi*1j/(f1*f2)
for i in range(f1):
t1[i] *= np.exp((tw0*i)*rngj)
t1 = ducc0.fft.c2c(t1.T, axes=(0,))
return t1.reshape((-1,))
def fourstep_4(arr, f1):
assert(arr.ndim==1)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = ducc0.fft.c2c(arr.reshape((f1, f2)).T, axes=(1,))
tx = time.time()
rngj = np.arange(f1).astype(np.float64)
tw0 = -2*np.pi*1j/(f1*f2)
for i in range(f2):
t1[i] *= np.exp((tw0*i)*rngj)
print("xx",time.time()-tx)
t1 = ducc0.fft.c2c(t1, out=t1, axes=(0,))
return t1.reshape((-1,))
def sixstep_1(arr, f1):
assert(arr.ndim==1)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = arr.copy().reshape((f1, f2))
t1 = t1.T
t1 = ducc0.fft.c2c(t1, axes=(1,))
t1 = t1.T
rngj = np.arange(f2).astype(np.float64)
tw0 = -2*np.pi*1j/(f1*f2)
for i in range(f1):
t1[i] *= np.exp((tw0*i)*rngj)
t1 = ducc0.fft.c2c(t1, axes=(1,))
t1 = t1.T
return t1.reshape((-1,))
f1, f2 = 3000, 3000
rng = np.random.default_rng(42)
arr = rng.random(f1*f2) + 1j*rng.random(f1*f2) - 0.5 -0.5j
t0 = time.time()
ref = reference(arr)
print(time.time()-t0)
t0 = time.time()
blub = fourstep_1(arr, f1)
print(time.time()-t0)
print(_l2error(ref, blub))
t0 = time.time()
blub = fourstep_2(arr, f1)
print(time.time()-t0)
print(np.max(np.abs(ref-blub)))
t0 = time.time()
blub = fourstep_3(arr, f1)
print(time.time()-t0)
print(np.max(np.abs(ref-blub)))
t0 = time.time()
blub = fourstep_4(arr, f1)
print(time.time()-t0)
print(np.max(np.abs(ref-blub)))
t0 = time.time()
blub = sixstep_1(arr, f1)
print(time.time()-t0)
print(np.max(np.abs(ref-blub)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment