Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
On Thursday, 7th July from 1 to 3 pm there will be a maintenance with a short downtime of GitLab.
Open sidebar
Martin Reinecke
ducc
Commits
4475d484
Commit
4475d484
authored
Sep 01, 2020
by
Martin Reinecke
Browse files
getting uglier before it gets better
parent
4e95fdc1
Pipeline
#81386
passed with stages
in 12 minutes and 20 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
python/gridder_cxx.h
View file @
4475d484
...
...
@@ -230,27 +230,48 @@ class Baselines
};
template
<
typename
T
>
class
GridderConfig
constexpr
int
logsquare
=
4
;
template
<
typename
T
>
class
Params
{
protected:
size_t
nx_dirty
,
ny_dirty
,
nu
,
nv
;
double
epsilon
,
ofactor
;
private:
bool
gridding
;
TimerHierarchy
timers
;
const
mav
<
complex
<
T
>
,
2
>
&
ms_in
;
mav
<
complex
<
T
>
,
2
>
&
ms_out
;
const
mav
<
T
,
2
>
&
dirty_in
;
mav
<
T
,
2
>
&
dirty_out
;
const
mav
<
T
,
2
>
&
wgt
;
const
mav
<
uint8_t
,
2
>
&
mask
;
double
pixsize_x
,
pixsize_y
;
size_t
nxdirty
,
nydirty
;
double
epsilon
;
bool
do_wgridding
;
size_t
nthreads
;
size_t
verbosity
;
bool
negate_v
,
divide_by_n
;
Baselines
bl
;
VVR
ranges
;
double
wmin_d
,
wmax_d
;
size_t
nvis
;
double
wmin
,
dw
;
size_t
nplanes
;
double
nm1min
;
vector
<
uint8_t
>
active
;
size_t
nu
,
nv
;
double
ofactor
;
// FIXME: this should probably be done more cleanly
public:
TimerHierarchy
&
timers
;
shared_ptr
<
HornerKernel
<
T
>>
krn
;
protected:
double
psx
,
psy
;
size_t
supp
,
nsafe
;
size_t
nthreads
;
double
ushift
,
vshift
;
int
maxiu0
,
maxiv0
;
size_t
vlim
;
bool
uv_side_fast
;
T
phase
(
T
x
,
T
y
,
T
w
,
bool
adjoint
)
const
static
T
phase
(
T
x
,
T
y
,
T
w
,
bool
adjoint
)
{
constexpr
T
pi
=
T
(
3.141592653589793238462643383279502884197
);
T
tmp
=
1
-
x
-
y
;
...
...
@@ -261,68 +282,23 @@ template<typename T> class GridderConfig
return
phs
;
}
public:
GridderConfig
(
size_t
nxdirty
,
size_t
nydirty
,
size_t
nu_
,
size_t
nv_
,
size_t
kidx
,
double
epsilon_
,
double
pixsize_x
,
double
pixsize_y
,
const
Baselines
&
baselines
,
size_t
nthreads_
,
TimerHierarchy
&
timers_
)
:
nx_dirty
(
nxdirty
),
ny_dirty
(
nydirty
),
nu
(
nu_
),
nv
(
nv_
),
epsilon
(
epsilon_
),
ofactor
(
min
(
double
(
nu
)
/
nxdirty
,
double
(
nv
)
/
nydirty
)),
timers
(
timers_
),
krn
(
selectKernel
<
T
>
(
ofactor
,
epsilon
,
kidx
)),
psx
(
pixsize_x
),
psy
(
pixsize_y
),
supp
(
krn
->
support
()),
nsafe
((
supp
+
1
)
/
2
),
nthreads
(
nthreads_
),
ushift
(
supp
*
(
-
0.5
)
+
1
+
nu
),
vshift
(
supp
*
(
-
0.5
)
+
1
+
nv
),
maxiu0
((
nu
+
nsafe
)
-
supp
),
maxiv0
((
nv
+
nsafe
)
-
supp
),
vlim
(
min
(
nv
/
2
,
size_t
(
nv
*
baselines
.
Vmax
()
*
psy
+
0.5
*
supp
+
1
))),
uv_side_fast
(
true
)
{
size_t
vlim2
=
(
nydirty
+
1
)
/
2
+
(
supp
+
1
)
/
2
;
if
(
vlim2
<
vlim
)
{
vlim
=
vlim2
;
uv_side_fast
=
false
;
}
MR_assert
(
nu
>=
2
*
nsafe
,
"nu too small"
);
MR_assert
(
nv
>=
2
*
nsafe
,
"nv too small"
);
MR_assert
((
nx_dirty
&
1
)
==
0
,
"nx_dirty must be even"
);
MR_assert
((
ny_dirty
&
1
)
==
0
,
"ny_dirty must be even"
);
MR_assert
((
nu
&
1
)
==
0
,
"nu must be even"
);
MR_assert
((
nv
&
1
)
==
0
,
"nv must be even"
);
MR_assert
(
epsilon
>
0
,
"epsilon must be positive"
);
MR_assert
(
pixsize_x
>
0
,
"pixsize_x must be positive"
);
MR_assert
(
pixsize_y
>
0
,
"pixsize_y must be positive"
);
}
size_t
Nxdirty
()
const
{
return
nx_dirty
;
}
size_t
Nydirty
()
const
{
return
ny_dirty
;
}
double
Pixsize_x
()
const
{
return
psx
;
}
double
Pixsize_y
()
const
{
return
psy
;
}
size_t
Nu
()
const
{
return
nu
;
}
size_t
Nv
()
const
{
return
nv
;
}
size_t
Supp
()
const
{
return
supp
;
}
size_t
Nsafe
()
const
{
return
nsafe
;
}
size_t
Nthreads
()
const
{
return
nthreads
;
}
double
Epsilon
()
const
{
return
epsilon
;
}
double
Ofactor
()
const
{
return
ofactor
;
}
void
grid2dirty_post
(
mav
<
T
,
2
>
&
tmav
,
mav
<
T
,
2
>
&
dirty
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx
_
dirty
,
ny
_
dirty
});
auto
cfu
=
krn
->
corfunc
(
nx
_
dirty
/
2
+
1
,
1.
/
nu
,
nthreads
);
auto
cfv
=
krn
->
corfunc
(
ny
_
dirty
/
2
+
1
,
1.
/
nv
,
nthreads
);
execStatic
(
nx
_
dirty
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
checkShape
(
dirty
.
shape
(),
{
nxdirty
,
nydirty
});
auto
cfu
=
krn
->
corfunc
(
nxdirty
/
2
+
1
,
1.
/
nu
,
nthreads
);
auto
cfv
=
krn
->
corfunc
(
nydirty
/
2
+
1
,
1.
/
nv
,
nthreads
);
execStatic
(
nxdirty
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
int
icfu
=
abs
(
int
(
nx
_
dirty
/
2
)
-
int
(
i
));
for
(
size_t
j
=
0
;
j
<
ny
_
dirty
;
++
j
)
int
icfu
=
abs
(
int
(
nxdirty
/
2
)
-
int
(
i
));
for
(
size_t
j
=
0
;
j
<
nydirty
;
++
j
)
{
int
icfv
=
abs
(
int
(
ny
_
dirty
/
2
)
-
int
(
j
));
size_t
i2
=
nu
-
nx
_
dirty
/
2
+
i
;
int
icfv
=
abs
(
int
(
nydirty
/
2
)
-
int
(
j
));
size_t
i2
=
nu
-
nxdirty
/
2
+
i
;
if
(
i2
>=
nu
)
i2
-=
nu
;
size_t
j2
=
nv
-
ny
_
dirty
/
2
+
j
;
size_t
j2
=
nv
-
nydirty
/
2
+
j
;
if
(
j2
>=
nv
)
j2
-=
nv
;
dirty
.
v
(
i
,
j
)
=
tmav
(
i2
,
j2
)
*
T
(
cfu
[
icfu
]
*
cfv
[
icfv
]);
}
...
...
@@ -332,27 +308,27 @@ template<typename T> class GridderConfig
void
grid2dirty_post2
(
mav
<
complex
<
T
>
,
2
>
&
tmav
,
mav
<
T
,
2
>
&
dirty
,
T
w
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx
_
dirty
,
ny
_
dirty
});
double
x0
=
-
0.5
*
nx
_
dirty
*
p
s
x
,
y0
=
-
0.5
*
ny
_
dirty
*
p
s
y
;
execStatic
(
nx
_
dirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
checkShape
(
dirty
.
shape
(),
{
nxdirty
,
nydirty
});
double
x0
=
-
0.5
*
nxdirty
*
p
ixsize_
x
,
y0
=
-
0.5
*
nydirty
*
p
ixsize_
y
;
execStatic
(
nxdirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
using
vtype
=
native_simd
<
T
>
;
constexpr
size_t
vlen
=
vtype
::
size
();
size_t
nvec
=
(
ny
_
dirty
/
2
+
1
+
(
vlen
-
1
))
/
vlen
;
size_t
nvec
=
(
nydirty
/
2
+
1
+
(
vlen
-
1
))
/
vlen
;
vector
<
vtype
>
ph
(
nvec
),
sp
(
nvec
),
cp
(
nvec
);
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
T
fx
=
T
(
x0
+
i
*
p
s
x
);
T
fx
=
T
(
x0
+
i
*
p
ixsize_
x
);
fx
*=
fx
;
size_t
ix
=
nu
-
nx
_
dirty
/
2
+
i
;
size_t
ix
=
nu
-
nxdirty
/
2
+
i
;
if
(
ix
>=
nu
)
ix
-=
nu
;
size_t
i2
=
nx
_
dirty
-
i
;
size_t
ix2
=
nu
-
nx
_
dirty
/
2
+
i2
;
size_t
i2
=
nxdirty
-
i
;
size_t
ix2
=
nu
-
nxdirty
/
2
+
i2
;
if
(
ix2
>=
nu
)
ix2
-=
nu
;
for
(
size_t
j
=
0
;
j
<=
ny
_
dirty
/
2
;
++
j
)
for
(
size_t
j
=
0
;
j
<=
nydirty
/
2
;
++
j
)
{
T
fy
=
T
(
y0
+
j
*
p
s
y
);
T
fy
=
T
(
y0
+
j
*
p
ixsize_
y
);
ph
[
j
/
vlen
][
j
%
vlen
]
=
phase
(
fx
,
fy
*
fy
,
w
,
true
);
}
for
(
size_t
j
=
0
;
j
<
nvec
;
++
j
)
...
...
@@ -362,17 +338,17 @@ template<typename T> class GridderConfig
for
(
size_t
k
=
0
;
k
<
vlen
;
++
k
)
cp
[
j
][
k
]
=
cos
(
ph
[
j
][
k
]);
if
((
i
>
0
)
&&
(
i
<
i2
))
for
(
size_t
j
=
0
,
jx
=
nv
-
ny
_
dirty
/
2
;
j
<
ny
_
dirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
for
(
size_t
j
=
0
,
jx
=
nv
-
nydirty
/
2
;
j
<
nydirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
{
size_t
j2
=
min
(
j
,
ny
_
dirty
-
j
);
size_t
j2
=
min
(
j
,
nydirty
-
j
);
T
re
=
cp
[
j2
/
vlen
][
j2
%
vlen
],
im
=
sp
[
j2
/
vlen
][
j2
%
vlen
];
dirty
.
v
(
i
,
j
)
+=
tmav
(
ix
,
jx
).
real
()
*
re
-
tmav
(
ix
,
jx
).
imag
()
*
im
;
dirty
.
v
(
i2
,
j
)
+=
tmav
(
ix2
,
jx
).
real
()
*
re
-
tmav
(
ix2
,
jx
).
imag
()
*
im
;
}
else
for
(
size_t
j
=
0
,
jx
=
nv
-
ny
_
dirty
/
2
;
j
<
ny
_
dirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
for
(
size_t
j
=
0
,
jx
=
nv
-
nydirty
/
2
;
j
<
nydirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
{
size_t
j2
=
min
(
j
,
ny
_
dirty
-
j
);
size_t
j2
=
min
(
j
,
nydirty
-
j
);
T
re
=
cp
[
j2
/
vlen
][
j2
%
vlen
],
im
=
sp
[
j2
/
vlen
][
j2
%
vlen
];
dirty
.
v
(
i
,
j
)
+=
tmav
(
ix
,
jx
).
real
()
*
re
-
tmav
(
ix
,
jx
).
imag
()
*
im
;
// lower left
}
...
...
@@ -380,7 +356,7 @@ template<typename T> class GridderConfig
});
}
void
grid2dirty_overwrite
(
mav
<
T
,
2
>
&
grid
,
mav
<
T
,
2
>
&
dirty
)
const
void
grid2dirty_overwrite
(
mav
<
T
,
2
>
&
grid
,
mav
<
T
,
2
>
&
dirty
)
{
timers
.
push
(
"FFT"
);
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
...
...
@@ -391,7 +367,7 @@ template<typename T> class GridderConfig
}
void
grid2dirty_c_overwrite_wscreen_add
(
mav
<
complex
<
T
>
,
2
>
&
grid
,
mav
<
T
,
2
>
&
dirty
,
T
w
)
const
(
mav
<
complex
<
T
>
,
2
>
&
grid
,
mav
<
T
,
2
>
&
dirty
,
T
w
)
{
timers
.
push
(
"FFT"
);
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
...
...
@@ -417,23 +393,23 @@ template<typename T> class GridderConfig
void
dirty2grid_pre
(
const
mav
<
T
,
2
>
&
dirty
,
mav
<
T
,
2
>
&
grid
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx
_
dirty
,
ny
_
dirty
});
checkShape
(
dirty
.
shape
(),
{
nxdirty
,
nydirty
});
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
auto
cfu
=
krn
->
corfunc
(
nx
_
dirty
/
2
+
1
,
1.
/
nu
,
nthreads
);
auto
cfv
=
krn
->
corfunc
(
ny
_
dirty
/
2
+
1
,
1.
/
nv
,
nthreads
);
auto
cfu
=
krn
->
corfunc
(
nxdirty
/
2
+
1
,
1.
/
nu
,
nthreads
);
auto
cfv
=
krn
->
corfunc
(
nydirty
/
2
+
1
,
1.
/
nv
,
nthreads
);
// FIXME: maybe we don't have to fill everything and can save some time
grid
.
fill
(
0
);
execStatic
(
nx
_
dirty
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
execStatic
(
nxdirty
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
int
icfu
=
abs
(
int
(
nx
_
dirty
/
2
)
-
int
(
i
));
for
(
size_t
j
=
0
;
j
<
ny
_
dirty
;
++
j
)
int
icfu
=
abs
(
int
(
nxdirty
/
2
)
-
int
(
i
));
for
(
size_t
j
=
0
;
j
<
nydirty
;
++
j
)
{
int
icfv
=
abs
(
int
(
ny
_
dirty
/
2
)
-
int
(
j
));
size_t
i2
=
nu
-
nx
_
dirty
/
2
+
i
;
int
icfv
=
abs
(
int
(
nydirty
/
2
)
-
int
(
j
));
size_t
i2
=
nu
-
nxdirty
/
2
+
i
;
if
(
i2
>=
nu
)
i2
-=
nu
;
size_t
j2
=
nv
-
ny
_
dirty
/
2
+
j
;
size_t
j2
=
nv
-
nydirty
/
2
+
j
;
if
(
j2
>=
nv
)
j2
-=
nv
;
grid
.
v
(
i2
,
j2
)
=
dirty
(
i
,
j
)
*
T
(
cfu
[
icfu
]
*
cfv
[
icfv
]);
}
...
...
@@ -443,31 +419,31 @@ template<typename T> class GridderConfig
void
dirty2grid_pre2
(
const
mav
<
T
,
2
>
&
dirty
,
mav
<
complex
<
T
>
,
2
>
&
grid
,
T
w
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx
_
dirty
,
ny
_
dirty
});
checkShape
(
dirty
.
shape
(),
{
nxdirty
,
nydirty
});
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
// FIXME: maybe we don't have to fill everything and can save some time
grid
.
fill
(
0
);
double
x0
=
-
0.5
*
nx
_
dirty
*
p
s
x
,
y0
=
-
0.5
*
ny
_
dirty
*
p
s
y
;
execStatic
(
nx
_
dirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
double
x0
=
-
0.5
*
nxdirty
*
p
ixsize_
x
,
y0
=
-
0.5
*
nydirty
*
p
ixsize_
y
;
execStatic
(
nxdirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
using
vtype
=
native_simd
<
T
>
;
constexpr
size_t
vlen
=
vtype
::
size
();
size_t
nvec
=
(
ny
_
dirty
/
2
+
1
+
(
vlen
-
1
))
/
vlen
;
size_t
nvec
=
(
nydirty
/
2
+
1
+
(
vlen
-
1
))
/
vlen
;
vector
<
vtype
>
ph
(
nvec
),
sp
(
nvec
),
cp
(
nvec
);
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
T
fx
=
T
(
x0
+
i
*
p
s
x
);
T
fx
=
T
(
x0
+
i
*
p
ixsize_
x
);
fx
*=
fx
;
size_t
ix
=
nu
-
nx
_
dirty
/
2
+
i
;
size_t
ix
=
nu
-
nxdirty
/
2
+
i
;
if
(
ix
>=
nu
)
ix
-=
nu
;
size_t
i2
=
nx
_
dirty
-
i
;
size_t
ix2
=
nu
-
nx
_
dirty
/
2
+
i2
;
size_t
i2
=
nxdirty
-
i
;
size_t
ix2
=
nu
-
nxdirty
/
2
+
i2
;
if
(
ix2
>=
nu
)
ix2
-=
nu
;
for
(
size_t
j
=
0
;
j
<=
ny
_
dirty
/
2
;
++
j
)
for
(
size_t
j
=
0
;
j
<=
nydirty
/
2
;
++
j
)
{
T
fy
=
T
(
y0
+
j
*
p
s
y
);
T
fy
=
T
(
y0
+
j
*
p
ixsize_
y
);
ph
[
j
/
vlen
][
j
%
vlen
]
=
phase
(
fx
,
fy
*
fy
,
w
,
false
);
}
for
(
size_t
j
=
0
;
j
<
nvec
;
++
j
)
...
...
@@ -477,17 +453,17 @@ template<typename T> class GridderConfig
for
(
size_t
k
=
0
;
k
<
vlen
;
++
k
)
cp
[
j
][
k
]
=
cos
(
ph
[
j
][
k
]);
if
((
i
>
0
)
&&
(
i
<
i2
))
for
(
size_t
j
=
0
,
jx
=
nv
-
ny
_
dirty
/
2
;
j
<
ny
_
dirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
for
(
size_t
j
=
0
,
jx
=
nv
-
nydirty
/
2
;
j
<
nydirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
{
size_t
j2
=
min
(
j
,
ny
_
dirty
-
j
);
size_t
j2
=
min
(
j
,
nydirty
-
j
);
auto
ws
=
complex
<
T
>
(
cp
[
j2
/
vlen
][
j2
%
vlen
],
sp
[
j2
/
vlen
][
j2
%
vlen
]);
grid
.
v
(
ix
,
jx
)
=
dirty
(
i
,
j
)
*
ws
;
// lower left
grid
.
v
(
ix2
,
jx
)
=
dirty
(
i2
,
j
)
*
ws
;
// lower right
}
else
for
(
size_t
j
=
0
,
jx
=
nv
-
ny
_
dirty
/
2
;
j
<
ny
_
dirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
for
(
size_t
j
=
0
,
jx
=
nv
-
nydirty
/
2
;
j
<
nydirty
;
++
j
,
jx
=
(
jx
+
1
>=
nv
)
?
jx
+
1
-
nv
:
jx
+
1
)
{
size_t
j2
=
min
(
j
,
ny
_
dirty
-
j
);
size_t
j2
=
min
(
j
,
nydirty
-
j
);
auto
ws
=
complex
<
T
>
(
cp
[
j2
/
vlen
][
j2
%
vlen
],
sp
[
j2
/
vlen
][
j2
%
vlen
]);
grid
.
v
(
ix
,
jx
)
=
dirty
(
i
,
j
)
*
ws
;
// lower left
}
...
...
@@ -495,8 +471,7 @@ template<typename T> class GridderConfig
});
}
void
dirty2grid
(
const
mav
<
T
,
2
>
&
dirty
,
mav
<
T
,
2
>
&
grid
)
const
void
dirty2grid
(
const
mav
<
T
,
2
>
&
dirty
,
mav
<
T
,
2
>
&
grid
)
{
timers
.
push
(
"grid correction"
);
dirty2grid_pre
(
dirty
,
grid
);
...
...
@@ -506,7 +481,7 @@ template<typename T> class GridderConfig
}
void
dirty2grid_c_wscreen
(
const
mav
<
T
,
2
>
&
dirty
,
mav
<
complex
<
T
>
,
2
>
&
grid
,
T
w
)
const
mav
<
complex
<
T
>
,
2
>
&
grid
,
T
w
)
{
timers
.
push
(
"wscreen+grid correction"
);
dirty2grid_pre2
(
dirty
,
grid
,
w
);
...
...
@@ -530,54 +505,23 @@ template<typename T> class GridderConfig
[[
gnu
::
always_inline
]]
void
getpix
(
double
u_in
,
double
v_in
,
double
&
u
,
double
&
v
,
int
&
iu0
,
int
&
iv0
)
const
{
u
=
fmod1
(
u_in
*
p
s
x
)
*
nu
;
u
=
fmod1
(
u_in
*
p
ixsize_
x
)
*
nu
;
iu0
=
min
(
int
(
u
+
ushift
)
-
int
(
nu
),
maxiu0
);
v
=
fmod1
(
v_in
*
p
s
y
)
*
nv
;
v
=
fmod1
(
v_in
*
p
ixsize_
y
)
*
nv
;
iv0
=
min
(
int
(
v
+
vshift
)
-
int
(
nv
),
maxiv0
);
}
};
constexpr
int
logsquare
=
4
;
template
<
typename
T
>
class
Params
{
private:
bool
gridding
;
TimerHierarchy
timers
;
const
mav
<
complex
<
T
>
,
2
>
&
ms_in
;
mav
<
complex
<
T
>
,
2
>
&
ms_out
;
const
mav
<
T
,
2
>
&
dirty_in
;
mav
<
T
,
2
>
&
dirty_out
;
const
mav
<
T
,
2
>
&
wgt
;
const
mav
<
uint8_t
,
2
>
&
mask
;
double
pixsize_x
,
pixsize_y
;
size_t
nxdirty
,
nydirty
;
double
epsilon
;
bool
do_wgridding
;
size_t
nthreads
;
size_t
verbosity
;
bool
negate_v
,
divide_by_n
;
Baselines
bl
;
VVR
ranges
;
double
wmin_d
,
wmax_d
;
size_t
nvis
;
double
wmin
,
dw
;
size_t
nplanes
;
double
nm1min
;
vector
<
uint8_t
>
active
;
void
report
(
const
GridderConfig
<
T
>
&
gconf
)
void
report
()
{
if
(
verbosity
==
0
)
return
;
cout
<<
(
gridding
?
"Gridding"
:
"Degridding"
)
<<
": nthreads="
<<
nthreads
<<
", "
<<
"dirty=("
<<
nxdirty
<<
"x"
<<
nydirty
<<
"), "
<<
"grid=("
<<
gconf
.
Nu
()
<<
"x"
<<
gconf
.
Nv
()
;
<<
"grid=("
<<
nu
<<
"x"
<<
nv
;
if
(
nplanes
>
0
)
cout
<<
"x"
<<
nplanes
;
cout
<<
"), nvis="
<<
nvis
<<
", supp="
<<
gconf
.
S
upp
()
<<
", eps="
<<
(
gconf
.
E
psilon
()
*
((
nplanes
==
0
)
?
2
:
3
))
<<
", supp="
<<
s
upp
<<
", eps="
<<
(
e
psilon
*
((
nplanes
==
0
)
?
2
:
3
))
<<
endl
;
cout
<<
" w=["
<<
wmin_d
<<
"; "
<<
wmax_d
<<
"], min(n-1)="
<<
nm1min
<<
", dw="
<<
dw
<<
", wmax/dw="
<<
wmax_d
/
dw
<<
", nranges="
<<
ranges
.
size
()
<<
endl
;
...
...
@@ -671,19 +615,18 @@ auto getNuNv()
}
}
timers
.
pop
();
return
make_tuple
(
minnu
,
minnv
,
minidx
);
nu
=
minnu
;
nv
=
minnv
;
return
minidx
;
}
void
countRanges
(
const
GridderConfig
<
T
>
&
gconf
)
void
countRanges
()
{
timers
.
push
(
"range count"
);
size_t
nrow
=
bl
.
Nrows
(),
nchan
=
bl
.
Nchannels
(),
nsafe
=
gconf
.
Nsafe
(),
nthreads
=
gconf
.
Nthreads
(),
supp
=
gconf
.
Supp
();
nchan
=
bl
.
Nchannels
();
dw
=
0.5
/
gconf
.
O
factor
()
/
abs
(
nm1min
);
dw
=
0.5
/
o
factor
/
abs
(
nm1min
);
nplanes
=
size_t
((
wmax_d
-
wmin_d
)
/
dw
+
supp
);
wmin
=
(
wmin_d
+
wmax_d
)
*
0.5
-
0.5
*
(
nplanes
-
1
)
*
dw
;
...
...
@@ -712,7 +655,7 @@ void countRanges(const GridderConfig<T> &gconf)
if
(
uvw
.
w
<
0
)
uvw
.
Flip
();
double
u
,
v
;
int
iu0
,
iv0
,
iw
;
gconf
.
getpix
(
uvw
.
u
,
uvw
.
v
,
u
,
v
,
iu0
,
iv0
);
getpix
(
uvw
.
u
,
uvw
.
v
,
u
,
v
,
iu0
,
iv0
);
iu0
=
(
iu0
+
nsafe
)
>>
logsquare
;
iv0
=
(
iv0
+
nsafe
)
>>
logsquare
;
iw
=
max
(
0
,
int
(
1
+
(
abs
(
uvw
.
w
)
-
(
0.5
*
supp
*
dw
)
-
wmin
)
/
dw
));
...
...
@@ -764,13 +707,13 @@ void countRanges(const GridderConfig<T> &gconf)
timers
.
pop
();
}
void
apply_global_corrections
(
const
GridderConfig
<
T
>
&
gconf
,
mav
<
T
,
2
>
&
dirty
)
void
apply_global_corrections
(
mav
<
T
,
2
>
&
dirty
)
{
timers
.
push
(
"global corrections"
);
double
x0
=
-
0.5
*
nxdirty
*
pixsize_x
,
y0
=
-
0.5
*
nydirty
*
pixsize_y
;
auto
cfu
=
gconf
.
krn
->
corfunc
(
nxdirty
/
2
+
1
,
1.
/
gconf
.
Nu
()
,
nthreads
);
auto
cfv
=
gconf
.
krn
->
corfunc
(
nydirty
/
2
+
1
,
1.
/
gconf
.
Nv
()
,
nthreads
);
auto
cfu
=
krn
->
corfunc
(
nxdirty
/
2
+
1
,
1.
/
nu
,
nthreads
);
auto
cfv
=
krn
->
corfunc
(
nydirty
/
2
+
1
,
1.
/
nv
,
nthreads
);
execStatic
(
nxdirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
...
...
@@ -786,7 +729,7 @@ void apply_global_corrections(const GridderConfig<T> &gconf, mav<T,2> &dirty)
if
(
tmp
>=
0
)
{
auto
nm1
=
(
-
fx
-
fy
)
/
(
sqrt
(
tmp
)
+
1
);
// accurate form of sqrt(1-x-y)-1
fct
=
T
(
gconf
.
krn
->
corfunc
(
nm1
*
dw
));
fct
=
T
(
krn
->
corfunc
(
nm1
*
dw
));
if
(
divide_by_n
)
fct
/=
nm1
+
1
;
}
...
...
@@ -797,7 +740,7 @@ void apply_global_corrections(const GridderConfig<T> &gconf, mav<T,2> &dirty)
else
{
auto
nm1
=
sqrt
(
-
tmp
)
-
1
;
fct
=
T
(
gconf
.
krn
->
corfunc
(
nm1
*
dw
));
fct
=
T
(
krn
->
corfunc
(
nm1
*
dw
));
}
}
fct
*=
T
(
cfu
[
nxdirty
/
2
-
i
]
*
cfv
[
nydirty
/
2
-
j
]);
...
...
@@ -828,11 +771,9 @@ template<size_t supp, bool wgrid> class HelperX2g2
static
constexpr
int
sv
=
2
*
nsafe
+
(
1
<<
logsquare
);
static
constexpr
int
svvec
=
((
sv
+
vlen
-
1
)
/
vlen
)
*
vlen
;
static
constexpr
double
xsupp
=
2.
/
supp
;
const
GridderConfig
<
T
>
&
gconf
;
TemplateKernel
<
supp
,
T
>
krn
;
const
Params
*
parent
;
TemplateKernel
<
supp
,
T
>
tkrn
;
mav
<
complex
<
T
>
,
2
>
&
grid
;
int
nu
,
nv
;
int
iu0
,
iv0
;
// start index of the current visibility
int
bu0
,
bv0
;
// start index of the current buffer
...
...
@@ -843,12 +784,12 @@ template<size_t supp, bool wgrid> class HelperX2g2
DUCC0_NOINLINE
void
dump
()
{
int
nu
=
int
(
gconf
.
Nu
()
);
int
nv
=
int
(
gconf
.
Nv
()
);
int
i
nu
=
int
(
parent
->
nu
);
int
i
nv
=
int
(
parent
->
nv
);
if
(
bu0
<-
nsafe
)
return
;
// nothing written into buffer yet
int
idxu
=
(
bu0
+
nu
)
%
nu
;
int
idxv0
=
(
bv0
+
nv
)
%
nv
;
int
idxu
=
(
bu0
+
i
nu
)
%
i
nu
;
int
idxv0
=
(
bv0
+
i
nv
)
%
i
nv
;
for
(
int
iu
=
0
;
iu
<
su
;
++
iu
)
{
int
idxv
=
idxv0
;
...
...
@@ -858,10 +799,10 @@ template<size_t supp, bool wgrid> class HelperX2g2
{
grid
.
v
(
idxu
,
idxv
)
+=
complex
<
T
>
(
bufr
(
iu
,
iv
),
bufi
(
iu
,
iv
));
bufr
.
v
(
iu
,
iv
)
=
bufi
.
v
(
iu
,
iv
)
=
0
;
if
(
++
idxv
>=
nv
)
idxv
=
0
;
if
(
++
idxv
>=
i
nv
)
idxv
=
0
;
}
}
if
(
++
idxu
>=
nu
)
idxu
=
0
;
if
(
++
idxu
>=
i
nu
)
idxu
=
0
;
}
}
...
...
@@ -873,9 +814,9 @@ template<size_t supp, bool wgrid> class HelperX2g2
};
kbuf
buf
;
HelperX2g2
(
const
GridderConfig
<
T
>
&
gconf
_
,
mav
<
complex
<
T
>
,
2
>
&
grid_
,
HelperX2g2
(
const
Params
*
parent
_
,
mav
<
complex
<
T
>
,
2
>
&
grid_
,
vector
<
std
::
mutex
>
&
locks_
,
double
w0_
=-
1
,
double
dw_
=-
1
)
:
gconf
(
gconf
_
),
krn
(
*
gconf
.
krn
),
grid
(
grid_
),
:
parent
(
parent
_
),
t
krn
(
*
parent
->
krn
),
grid
(
grid_
),
iu0
(
-
1000000
),
iv0
(
-
1000000
),
bu0
(
-
1000000
),
bv0
(
-
1000000
),
bufr
({
size_t
(
su
),
size_t
(
svvec
)}),
...
...
@@ -884,7 +825,7 @@ template<size_t supp, bool wgrid> class HelperX2g2
w0
(
w0_
),
xdw
(
T
(
1
)
/
dw_
),
locks
(
locks_
)
{
checkShape
(
grid
.
shape
(),
{
gconf
.
Nu
(),
gconf
.
Nv
()
});
}
{
checkShape
(
grid
.
shape
(),
{
parent
->
nu
,
parent
->
nv
});
}
~
HelperX2g2
()
{
dump
();
}
constexpr
int
lineJump
()
const
{
return
svvec
;
}
...
...
@@ -893,13 +834,13 @@ template<size_t supp, bool wgrid> class HelperX2g2
double
u
,
v
;
auto
iu0old
=
iu0
;
auto
iv0old
=
iv0
;
gconf
.
getpix
(
in
.
u
,
in
.
v
,
u
,
v
,
iu0
,
iv0
);
parent
->
getpix
(
in
.
u
,
in
.
v
,
u
,
v
,
iu0
,
iv0
);
T
x0
=
(
iu0
-
T
(
u
))
*
2
+
(
supp
-
1
);
T
y0
=
(
iv0
-
T
(
v
))
*
2
+
(
supp
-
1
);
if
constexpr
(
wgrid
)
krn
.
eval2s
(
x0
,
y0
,
T
(
xdw
*
(
w0
-
in
.
w
)),
&
buf
.
simd
[
0
]);
t
krn
.
eval2s
(
x0
,
y0
,
T
(
xdw
*
(
w0
-
in
.
w
)),
&
buf
.
simd
[
0
]);
else
krn
.
eval2
(
x0
,
y0
,
&
buf
.
simd
[
0
]);
t
krn
.
eval2
(
x0
,
y0
,
&
buf
.
simd
[
0
]);
if
((
iu0
==
iu0old
)
&&
(
iv0
==
iv0old
))
return
;
if
((
iu0
<
bu0
)
||
(
iv0
<
bv0
)
||
(
iu0
+
int
(
supp
)
>
bu0
+
su
)
||
(
iv0
+
int
(
supp
)
>
bv0
+
sv
))
{
...
...
@@ -915,17 +856,17 @@ template<size_t supp, bool wgrid> class HelperX2g2
template
<
size_t
SUPP
,
bool
wgrid
>
[[
gnu
::
hot
]]
void
x2grid_c_helper
(
const
GridderConfig
<
T
>
&
gconf
,
mav
<
complex
<
T
>
,
2
>
&
grid
,
(
mav
<
complex
<
T
>
,
2
>
&
grid
,
size_t
p0
,
double
w0
)
{
bool
have_wgt
=
wgt
.
size
()
!=
0
;
vector
<
std
::
mutex
>
locks
(
gconf
.
Nu
()
);
vector
<
std
::
mutex
>
locks
(
nu
);
execGuided
(
ranges
.
size
(),
gconf
.
N
threads
()
,
100
,
0.2
,
[
&
](
Scheduler
&
sched
)
execGuided
(
ranges
.
size
(),
n
threads
,
100
,
0.2
,
[
&
](
Scheduler
&
sched
)
{
constexpr
size_t
vlen
=
native_simd
<
T
>::
size
();
constexpr
size_t
NVEC
((
SUPP
+
vlen
-
1
)
/
vlen
);
HelperX2g2
<
SUPP
,
wgrid
>
hlp
(
gconf
,
grid
,
locks
,
w0
,
dw
);
HelperX2g2
<
SUPP
,
wgrid
>
hlp
(
this
,
grid
,
locks
,
w0
,
dw
);
constexpr
int
jump
=
hlp
.
lineJump
();
const
T
*
DUCC0_RESTRICT
ku
=
hlp
.
buf
.
scalar
;
const
auto
*
DUCC0_RESTRICT
kv
=
hlp
.
buf
.
simd
+
NVEC
;
...
...
@@ -982,51 +923,51 @@ template<size_t SUPP, bool wgrid> [[gnu::hot]] void x2grid_c_helper
}
template
<
bool
wgrid
>
void
x2grid_c
(
const
GridderConfig
<
T
>
&
gconf
,
mav
<
complex
<
T
>
,
2
>
&
grid
,
(
mav
<
complex
<
T
>
,
2
>
&
grid
,
size_t
p0
,
double
w0
=-
1
)
{
timers
.
push
(
"gridding proper"
);
checkShape
(
grid
.
shape
(),
{
gconf
.
Nu
(),
gconf
.
Nv
()
});
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
if
constexpr
(
is_same
<
T
,
float
>::
value
)
switch
(
gconf
.
S
upp
()
)
switch
(
s
upp
)
{
case
4
:
x2grid_c_helper
<
4
,
wgrid
>
(
gconf
,
grid
,
p0
,
w0
);
break
;
case
5
:
x2grid_c_helper
<
5
,
wgrid
>
(
gconf
,
grid
,
p0
,
w0
);
break
;
case
6
:
x2grid_c_helper
<
6
,
wgrid
>
(
gconf
,
grid
,
p0
,
w0
);
break
;
case
7