Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Reinecke
ducc
Commits
9abe011e
Commit
9abe011e
authored
Jul 01, 2020
by
Martin Reinecke
Browse files
evolution
parent
5a0c893f
Pipeline
#77600
passed with stages
in 16 minutes and 19 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/ducc0/infra/timers.h
View file @
9abe011e
...
...
@@ -39,6 +39,8 @@ class SimpleTimer
public:
SimpleTimer
()
:
starttime
(
clock
::
now
())
{}
void
reset
()
{
starttime
=
clock
::
now
();
}
double
operator
()()
const
{
return
std
::
chrono
::
duration
<
double
>
(
clock
::
now
()
-
starttime
).
count
();
...
...
src/ducc0/math/horner_kernel.h
View file @
9abe011e
...
...
@@ -108,23 +108,204 @@ template<size_t W, size_t D, typename T> class HornerKernel
abscissas x, x+2./W, x+4./W, ..., x+(2.*W-2)/W.
x must lie in [-1; -1+2./W]. */
const
array
<
T
,
W
>
&
DUCC0_NOINLINE
eval
(
T
x
)
const
T
*
DUCC0_NOINLINE
eval
(
T
x
)
{
x
=
(
x
+
1
)
*
W
-
1
;
#if 0
array<Tsimd,nvec> tval;
for (size_t i=0; i<nvec; ++i)
tval[i] = coeff[0][i];
for (size_t j=1; j<=D; ++j)
for (size_t i=0; i<nvec; ++i)
tval[i] = tval[i]*x+coeff[j][i];
for (size_t i=0; i<nvec; ++i)
res.v[i] = tval[i];
#else
for
(
size_t
i
=
0
;
i
<
nvec
;
++
i
)
{
auto
tval
=
coeff
[
0
][
i
];
for
(
size_t
j
=
1
;
j
<=
D
;
++
j
)
tval
=
tval
*
x
+
coeff
[
j
][
i
];
tval
=
tval
*
x
+
coeff
[
j
][
i
];
res
.
v
[
i
]
=
tval
;
}
return
res
.
s
;
#endif
return
&
res
.
s
[
0
];
}
};
template
<
typename
T
>
class
HornerKernelFlexible
{
private:
using
Tsimd
=
native_simd
<
T
>
;
static
constexpr
auto
vlen
=
Tsimd
::
size
();
size_t
W
,
D
,
nvec
;
vector
<
T
>
res
;
vector
<
Tsimd
>
coeff
;
const
T
*
(
HornerKernelFlexible
<
T
>::*
evalfunc
)
(
T
);
template
<
size_t
NV
,
size_t
DEG
>
const
T
*
eval_intern
(
T
x
)
{
x
=
(
x
+
1
)
*
W
-
1
;
#if 0
array<Tsimd,NV> tval;
for (size_t i=0; i<NV; ++i)
tval[i] = coeff[i];
for (size_t j=1; j<=DEG; ++j)
for (size_t i=0; i<NV; ++i)
tval[i] = tval[i]*x+coeff[j*NV+i];
for (size_t i=0; i<NV; ++i)
tval[i].storeu(&res[vlen*i]);
#else
for
(
size_t
i
=
0
;
i
<
NV
;
++
i
)
{
auto
tval
=
coeff
[
i
];
for
(
size_t
j
=
1
;
j
<=
DEG
;
++
j
)
tval
=
tval
*
x
+
coeff
[
j
*
NV
+
i
];
tval
.
storeu
(
&
res
[
vlen
*
i
]);
}
#endif
return
res
.
data
();
}
const
T
*
eval_intern_general
(
T
x
)
{
x
=
(
x
+
1
)
*
W
-
1
;
for
(
size_t
i
=
0
;
i
<
nvec
;
++
i
)
{
auto
tval
=
coeff
[
i
];
for
(
size_t
j
=
1
;
j
<=
D
;
++
j
)
tval
=
tval
*
x
+
coeff
[
j
*
nvec
+
i
];
tval
.
storeu
(
&
res
[
vlen
*
i
]);
}
return
res
.
data
();
}
template
<
size_t
NV
>
auto
get_evalfunc2
()
const
{
switch
(
D
)
{
case
0
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
0
>
;
case
1
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
1
>
;
case
2
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
2
>
;
case
3
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
3
>
;
case
4
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
4
>
;
case
5
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
5
>
;
case
6
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
6
>
;
case
7
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
7
>
;
case
8
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
8
>
;
case
9
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
9
>
;
case
10
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
10
>
;
case
11
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
11
>
;
case
12
:
return
&
HornerKernelFlexible
::
eval_intern
<
NV
,
12
>
;
default:
return
&
HornerKernelFlexible
::
eval_intern_general
;
}
}
auto
get_evalfunc
()
const
{
switch
(
nvec
)
{
case
1
:
return
get_evalfunc2
<
1
>
();
case
2
:
return
get_evalfunc2
<
2
>
();
case
3
:
return
get_evalfunc2
<
3
>
();
case
4
:
return
get_evalfunc2
<
4
>
();
case
5
:
return
get_evalfunc2
<
5
>
();
case
6
:
return
get_evalfunc2
<
6
>
();
case
7
:
return
get_evalfunc2
<
7
>
();
case
8
:
return
get_evalfunc2
<
8
>
();
case
9
:
return
get_evalfunc2
<
9
>
();
case
10
:
return
get_evalfunc2
<
10
>
();
case
11
:
return
get_evalfunc2
<
11
>
();
case
12
:
return
get_evalfunc2
<
12
>
();
default:
return
&
HornerKernelFlexible
::
eval_intern_general
;
}
}
public:
template
<
typename
Func
>
HornerKernelFlexible
(
size_t
W_
,
size_t
D_
,
Func
func
)
:
W
(
W_
),
D
(
D_
),
nvec
((
W
+
vlen
-
1
)
/
vlen
),
res
(
nvec
*
vlen
),
coeff
(
nvec
*
(
D
+
1
),
0
),
evalfunc
(
get_evalfunc
())
{
vector
<
double
>
chebroot
(
D
+
1
);
for
(
size_t
i
=
0
;
i
<=
D
;
++
i
)
chebroot
[
i
]
=
cos
((
2
*
i
+
1.
)
*
pi
/
(
2
*
D
+
2
));
vector
<
double
>
y
(
D
+
1
),
lcf
(
D
+
1
),
C
((
D
+
1
)
*
(
D
+
1
)),
lcf2
(
D
+
1
);
for
(
size_t
i
=
0
;
i
<
W
;
++
i
)
{
double
l
=
-
1
+
2.
*
i
/
double
(
W
);
double
r
=
-
1
+
2.
*
(
i
+
1
)
/
double
(
W
);
// function values at Chebyshev nodes
for
(
size_t
j
=
0
;
j
<=
D
;
++
j
)
y
[
j
]
=
func
(
chebroot
[
j
]
*
(
r
-
l
)
*
0.5
+
(
r
+
l
)
*
0.5
);
// Chebyshev coefficients
for
(
size_t
j
=
0
;
j
<=
D
;
++
j
)
{
lcf
[
j
]
=
0
;
for
(
size_t
k
=
0
;
k
<=
D
;
++
k
)
lcf
[
j
]
+=
2.
/
(
D
+
1
)
*
y
[
k
]
*
cos
(
j
*
(
2
*
k
+
1
)
*
pi
/
(
2
*
D
+
2
));
}
lcf
[
0
]
*=
0.5
;
// Polynomial coefficients
fill
(
C
.
begin
(),
C
.
end
(),
0.
);
C
[
0
]
=
1.
;
C
[
1
*
(
D
+
1
)
+
1
]
=
1.
;
for
(
size_t
j
=
2
;
j
<=
D
;
++
j
)
{
C
[
j
*
(
D
+
1
)
+
0
]
=
-
C
[(
j
-
2
)
*
(
D
+
1
)
+
0
];
for
(
size_t
k
=
1
;
k
<=
j
;
++
k
)
C
[
j
*
(
D
+
1
)
+
k
]
=
2
*
C
[(
j
-
1
)
*
(
D
+
1
)
+
k
-
1
]
-
C
[(
j
-
2
)
*
(
D
+
1
)
+
k
];
}
for
(
size_t
j
=
0
;
j
<=
D
;
++
j
)
lcf2
[
j
]
=
0
;
for
(
size_t
j
=
0
;
j
<=
D
;
++
j
)
for
(
size_t
k
=
0
;
k
<=
D
;
++
k
)
lcf2
[
k
]
+=
C
[
j
*
(
D
+
1
)
+
k
]
*
lcf
[
j
];
for
(
size_t
j
=
0
;
j
<=
D
;
++
j
)
coeff
[
j
*
nvec
+
i
/
vlen
][
i
%
vlen
]
=
T
(
lcf2
[
D
-
j
]);
}
}
/*! Returns the function approximation at W different locations with the
abscissas x, x+2./W, x+4./W, ..., x+(2.*W-2)/W.
x must lie in [-1; -1+2./W]. */
const
T
*
eval
(
T
x
)
{
return
(
this
->*
evalfunc
)(
x
);
}
};
}
using
detail_horner_kernel
::
HornerKernel
;
using
detail_horner_kernel
::
HornerKernelFlexible
;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment