Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Reinecke
ducc
Commits
656634b4
Commit
656634b4
authored
Jul 02, 2020
by
Martin Reinecke
Browse files
more cleanups
parent
edf30f93
Pipeline
#77749
passed with stages
in 13 minutes and 2 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/ducc0/math/horner_kernel.h
View file @
656634b4
...
@@ -126,11 +126,16 @@ template<size_t W, size_t D, typename T> class HornerKernel
...
@@ -126,11 +126,16 @@ template<size_t W, size_t D, typename T> class HornerKernel
template
<
typename
T
>
class
HornerKernelFlexible
template
<
typename
T
>
class
HornerKernelFlexible
{
{
private:
private:
static
constexpr
size_t
MAXW
=
16
,
MINDEG
=
5
,
MAXDEG
=
12
;
static
constexpr
size_t
MAXW
=
16
,
MINDEG
=
0
,
MAXDEG
=
12
;
using
Tsimd
=
native_simd
<
T
>
;
using
Tsimd
=
native_simd
<
T
>
;
static
constexpr
auto
vlen
=
Tsimd
::
size
();
static
constexpr
auto
vlen
=
Tsimd
::
size
();
size_t
W
,
D
,
nvec
;
size_t
W
,
D
,
nvec
;
vector
<
T
>
res
;
union
Tu
{
Tsimd
simd
;
T
scalar
[
vlen
];
};
vector
<
Tu
>
res
;
vector
<
Tsimd
>
coeff
;
vector
<
Tsimd
>
coeff
;
const
T
*
(
HornerKernelFlexible
<
T
>::*
evalfunc
)
(
T
);
const
T
*
(
HornerKernelFlexible
<
T
>::*
evalfunc
)
(
T
);
...
@@ -143,9 +148,9 @@ template<typename T> class HornerKernelFlexible
...
@@ -143,9 +148,9 @@ template<typename T> class HornerKernelFlexible
auto
tval
=
coeff
[
i
];
auto
tval
=
coeff
[
i
];
for
(
size_t
j
=
1
;
j
<=
DEG
;
++
j
)
for
(
size_t
j
=
1
;
j
<=
DEG
;
++
j
)
tval
=
tval
*
x
+
coeff
[
j
*
NV
+
i
];
tval
=
tval
*
x
+
coeff
[
j
*
NV
+
i
];
tval
.
storeu
(
&
res
[
vlen
*
i
])
;
res
[
i
].
simd
=
tval
;
}
}
return
res
.
data
(
);
return
&
(
res
[
0
].
scalar
[
0
]
);
}
}
const
T
*
eval_intern_general
(
T
x
)
const
T
*
eval_intern_general
(
T
x
)
...
@@ -156,9 +161,9 @@ template<typename T> class HornerKernelFlexible
...
@@ -156,9 +161,9 @@ template<typename T> class HornerKernelFlexible
auto
tval
=
coeff
[
i
];
auto
tval
=
coeff
[
i
];
for
(
size_t
j
=
1
;
j
<=
D
;
++
j
)
for
(
size_t
j
=
1
;
j
<=
D
;
++
j
)
tval
=
tval
*
x
+
coeff
[
j
*
nvec
+
i
];
tval
=
tval
*
x
+
coeff
[
j
*
nvec
+
i
];
tval
.
storeu
(
&
res
[
vlen
*
i
])
;
res
[
i
].
simd
=
tval
;
}
}
return
res
.
data
(
);
return
&
(
res
[
0
].
scalar
[
0
]
);
}
}
template
<
size_t
NV
,
size_t
DEG
>
auto
evfhelper2
()
const
template
<
size_t
NV
,
size_t
DEG
>
auto
evfhelper2
()
const
...
@@ -177,16 +182,10 @@ template<typename T> class HornerKernelFlexible
...
@@ -177,16 +182,10 @@ template<typename T> class HornerKernelFlexible
return
evfhelper1
<
((
NV
*
vlen
>
MAXW
)
?
NV
:
NV
+
1
)
>
();
return
evfhelper1
<
((
NV
*
vlen
>
MAXW
)
?
NV
:
NV
+
1
)
>
();
}
}
auto
get_evalfunc
()
const
{
return
evfhelper1
<
1
>
();
}
public:
public:
template
<
typename
Func
>
HornerKernelFlexible
(
size_t
W_
,
size_t
D_
,
Func
func
)
template
<
typename
Func
>
HornerKernelFlexible
(
size_t
W_
,
size_t
D_
,
Func
func
)
:
W
(
W_
),
D
(
D_
),
nvec
((
W
+
vlen
-
1
)
/
vlen
),
res
(
nvec
*
vlen
),
:
W
(
W_
),
D
(
D_
),
nvec
((
W
+
vlen
-
1
)
/
vlen
),
res
(
nvec
),
coeff
(
nvec
*
(
D
+
1
),
0
),
evalfunc
(
get_evalfunc
())
coeff
(
nvec
*
(
D
+
1
),
0
),
evalfunc
(
evfhelper1
<
1
>
())
{
{
vector
<
double
>
chebroot
(
D
+
1
);
vector
<
double
>
chebroot
(
D
+
1
);
for
(
size_t
i
=
0
;
i
<=
D
;
++
i
)
for
(
size_t
i
=
0
;
i
<=
D
;
++
i
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment