Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Simon Perkins
ducc
Commits
11e3210f
Commit
11e3210f
authored
Jan 13, 2020
by
Martin Reinecke
Browse files
add nifty_gridder
parent
9c95b942
Changes
3
Hide whitespace changes
Inline
Side-by-side
nifty_gridder/gridder_cxx.h
0 → 100644
View file @
11e3210f
#ifndef GRIDDER_CXX_H
#define GRIDDER_CXX_H
/*
* This file is part of nifty_gridder.
*
* nifty_gridder is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* nifty_gridder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with nifty_gridder; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Copyright (C) 2019 Max-Planck-Society
Author: Martin Reinecke */
#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
#include <array>
#include "mr_util/error_handling.h"
#include "mr_util/fft.h"
#include "mr_util/threading.h"
#include "mr_util/useful_macros.h"
#include "mr_util/mav.h"
#include "mr_util/gl_integrator.h"
#if defined(__GNUC__)
#define ALIGNED(align) __attribute__ ((aligned(align)))
#else
#define ALIGNED(align)
#endif
namespace
gridder
{
namespace
detail
{
using
namespace
std
;
using
namespace
mr
;
template
<
size_t
ndim
>
void
checkShape
(
const
array
<
size_t
,
ndim
>
&
shp1
,
const
array
<
size_t
,
ndim
>
&
shp2
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
MR_assert
(
shp1
[
i
]
==
shp2
[
i
],
"shape mismatch"
);
}
template
<
typename
T
>
inline
T
fmod1
(
T
v
)
{
return
v
-
floor
(
v
);
}
template
<
typename
T
,
size_t
ndim
>
class
tmpStorage
{
private:
vector
<
T
>
d
;
mav
<
T
,
ndim
>
mav_
;
static
size_t
prod
(
const
array
<
size_t
,
ndim
>
&
shp
)
{
size_t
res
=
1
;
for
(
auto
v
:
shp
)
res
*=
v
;
return
res
;
}
public:
tmpStorage
(
const
array
<
size_t
,
ndim
>
&
shp
)
:
d
(
prod
(
shp
)),
mav_
(
d
.
data
(),
shp
)
{}
mav
<
T
,
ndim
>
&
getMav
()
{
return
mav_
;
}
const_mav
<
T
,
ndim
>
getCmav
()
{
return
cmav
(
mav_
);
}
void
fill
(
const
T
&
val
)
{
std
::
fill
(
d
.
begin
(),
d
.
end
(),
val
);
}
};
//
// Start of real gridder functionality
//
template
<
typename
T
>
void
complex2hartley
(
const
const_mav
<
complex
<
T
>
,
2
>
&
grid
,
const
mav
<
T
,
2
>
&
grid2
,
size_t
nthreads
)
{
checkShape
(
grid
.
shape
(),
grid2
.
shape
());
size_t
nu
=
grid
.
shape
(
0
),
nv
=
grid
.
shape
(
1
);
execStatic
(
nu
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
u
=
rng
.
lo
;
u
<
rng
.
hi
;
++
u
)
{
size_t
xu
=
(
u
==
0
)
?
0
:
nu
-
u
;
for
(
size_t
v
=
0
;
v
<
nv
;
++
v
)
{
size_t
xv
=
(
v
==
0
)
?
0
:
nv
-
v
;
grid2
(
u
,
v
)
+=
T
(
0.5
)
*
(
grid
(
u
,
v
).
real
()
+
grid
(
u
,
v
).
imag
()
+
grid
(
xu
,
xv
).
real
()
-
grid
(
xu
,
xv
).
imag
());
}
}
});
}
template
<
typename
T
>
void
hartley2complex
(
const
const_mav
<
T
,
2
>
&
grid
,
const
mav
<
complex
<
T
>
,
2
>
&
grid2
,
size_t
nthreads
)
{
checkShape
(
grid
.
shape
(),
grid2
.
shape
());
size_t
nu
=
grid
.
shape
(
0
),
nv
=
grid
.
shape
(
1
);
execStatic
(
nu
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
u
=
rng
.
lo
;
u
<
rng
.
hi
;
++
u
)
{
size_t
xu
=
(
u
==
0
)
?
0
:
nu
-
u
;
for
(
size_t
v
=
0
;
v
<
nv
;
++
v
)
{
size_t
xv
=
(
v
==
0
)
?
0
:
nv
-
v
;
T
v1
=
T
(
0.5
)
*
grid
(
u
,
v
);
T
v2
=
T
(
0.5
)
*
grid
(
xu
,
xv
);
grid2
(
u
,
v
)
=
std
::
complex
<
T
>
(
v1
+
v2
,
v1
-
v2
);
}
}
});
}
template
<
typename
T
>
void
hartley2_2D
(
const
const_mav
<
T
,
2
>
&
in
,
const
mav
<
T
,
2
>
&
out
,
size_t
nthreads
)
{
checkShape
(
in
.
shape
(),
out
.
shape
());
size_t
nu
=
in
.
shape
(
0
),
nv
=
in
.
shape
(
1
);
ptrdiff_t
sz
=
ptrdiff_t
(
sizeof
(
T
));
stride_t
stri
{
sz
*
in
.
stride
(
0
),
sz
*
in
.
stride
(
1
)};
stride_t
stro
{
sz
*
out
.
stride
(
0
),
sz
*
out
.
stride
(
1
)};
auto
d_i
=
in
.
data
();
auto
ptmp
=
out
.
data
();
r2r_separable_hartley
({
nu
,
nv
},
stri
,
stro
,
{
0
,
1
},
d_i
,
ptmp
,
T
(
1
),
nthreads
);
execStatic
((
nu
+
1
)
/
2
-
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
+
1
;
i
<
rng
.
hi
+
1
;
++
i
)
for
(
size_t
j
=
1
;
j
<
(
nv
+
1
)
/
2
;
++
j
)
{
T
a
=
ptmp
[
i
*
nv
+
j
];
T
b
=
ptmp
[(
nu
-
i
)
*
nv
+
j
];
T
c
=
ptmp
[
i
*
nv
+
nv
-
j
];
T
d
=
ptmp
[(
nu
-
i
)
*
nv
+
nv
-
j
];
ptmp
[
i
*
nv
+
j
]
=
T
(
0.5
)
*
(
a
+
b
+
c
-
d
);
ptmp
[(
nu
-
i
)
*
nv
+
j
]
=
T
(
0.5
)
*
(
a
+
b
+
d
-
c
);
ptmp
[
i
*
nv
+
nv
-
j
]
=
T
(
0.5
)
*
(
a
+
c
+
d
-
b
);
ptmp
[(
nu
-
i
)
*
nv
+
nv
-
j
]
=
T
(
0.5
)
*
(
b
+
c
+
d
-
a
);
}
});
}
class
ES_Kernel
{
private:
static
constexpr
double
pi
=
3.141592653589793238462643383279502884197
;
double
beta
;
int
p
;
vector
<
double
>
x
,
wgt
,
psi
;
size_t
supp
;
public:
ES_Kernel
(
size_t
supp_
,
double
ofactor
,
size_t
nthreads
)
:
beta
(
get_beta
(
supp_
,
ofactor
)
*
supp_
),
p
(
int
(
1.5
*
supp_
+
2
)),
supp
(
supp_
)
{
GL_Integrator
integ
(
2
*
p
,
nthreads
);
x
=
integ
.
coordsSymmetric
();
wgt
=
integ
.
weightsSymmetric
();
psi
=
x
;
for
(
auto
&
v
:
psi
)
v
=
operator
()(
v
);
}
ES_Kernel
(
size_t
supp_
,
size_t
nthreads
)
:
ES_Kernel
(
supp_
,
2.
,
nthreads
){}
double
operator
()(
double
v
)
const
{
return
exp
(
beta
*
(
sqrt
(
1.
-
v
*
v
)
-
1.
));
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double
corfac
(
double
v
)
const
{
double
tmp
=
0
;
for
(
int
i
=
0
;
i
<
p
;
++
i
)
tmp
+=
wgt
[
i
]
*
psi
[
i
]
*
cos
(
pi
*
supp
*
v
*
x
[
i
]);
return
2.
/
(
supp
*
tmp
);
}
static
double
get_beta
(
size_t
supp
,
double
ofactor
=
2
)
{
MR_assert
((
supp
>=
2
)
&&
(
supp
<=
15
),
"unsupported support size"
);
if
(
ofactor
>=
2
)
{
static
const
vector
<
double
>
opt_beta
{
-
1
,
0.14
,
1.70
,
2.08
,
2.205
,
2.26
,
2.29
,
2.307
,
2.316
,
2.3265
,
2.3324
,
2.282
,
2.294
,
2.304
,
2.3138
,
2.317
};
MR_assert
(
supp
<
opt_beta
.
size
(),
"bad support size"
);
return
opt_beta
[
supp
];
}
if
(
ofactor
>=
1.2
)
{
// empirical, but pretty accurate approximation
static
const
array
<
double
,
16
>
betacorr
{
0
,
0
,
-
0.51
,
-
0.21
,
-
0.1
,
-
0.05
,
-
0.025
,
-
0.0125
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
auto
x0
=
1.
/
(
2
*
ofactor
);
auto
bcstrength
=
1.
+
(
x0
-
0.25
)
*
2.5
;
return
2.32
+
bcstrength
*
betacorr
[
supp
]
+
(
0.25
-
x0
)
*
3.1
;
}
MR_fail
(
"oversampling factor is too small"
);
}
static
size_t
get_supp
(
double
epsilon
,
double
ofactor
=
2
)
{
double
epssq
=
epsilon
*
epsilon
;
if
(
ofactor
>=
2
)
{
static
const
vector
<
double
>
maxmaperr
{
1e8
,
0.19
,
2.98e-3
,
5.98e-5
,
1.11e-6
,
2.01e-8
,
3.55e-10
,
5.31e-12
,
8.81e-14
,
1.34e-15
,
2.17e-17
,
2.12e-19
,
2.88e-21
,
3.92e-23
,
8.21e-25
,
7.13e-27
};
for
(
size_t
i
=
2
;
i
<
maxmaperr
.
size
();
++
i
)
if
(
epssq
>
maxmaperr
[
i
])
return
i
;
MR_fail
(
"requested epsilon too small - minimum is 1e-13"
);
}
if
(
ofactor
>=
1.2
)
{
for
(
size_t
w
=
2
;
w
<
16
;
++
w
)
{
auto
estimate
=
12
*
exp
(
-
2.
*
w
*
ofactor
);
// empirical, not very good approximation
if
(
epssq
>
estimate
)
return
w
;
}
MR_fail
(
"requested epsilon too small"
);
}
MR_fail
(
"oversampling factor is too small"
);
}
};
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector
<
double
>
correction_factors
(
size_t
n
,
double
ofactor
,
size_t
nval
,
size_t
supp
,
size_t
nthreads
)
{
ES_Kernel
kernel
(
supp
,
ofactor
,
nthreads
);
vector
<
double
>
res
(
nval
);
double
xn
=
1.
/
n
;
execStatic
(
nval
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
res
[
i
]
=
kernel
.
corfac
(
i
*
xn
);
});
return
res
;
}
using
idx_t
=
uint32_t
;
struct
RowChan
{
idx_t
row
,
chan
;
};
struct
UVW
{
double
u
,
v
,
w
;
UVW
()
{}
UVW
(
double
u_
,
double
v_
,
double
w_
)
:
u
(
u_
),
v
(
v_
),
w
(
w_
)
{}
UVW
operator
*
(
double
fct
)
const
{
return
UVW
(
u
*
fct
,
v
*
fct
,
w
*
fct
);
}
void
Flip
()
{
u
=-
u
;
v
=-
v
;
w
=-
w
;
}
bool
FixW
()
{
bool
flip
=
w
<
0
;
if
(
flip
)
Flip
();
return
flip
;
}
};
class
Baselines
{
protected:
vector
<
UVW
>
coord
;
vector
<
double
>
f_over_c
;
idx_t
nrows
,
nchan
;
idx_t
shift
,
mask
;
public:
template
<
typename
T
>
Baselines
(
const
const_mav
<
T
,
2
>
&
coord_
,
const
const_mav
<
T
,
1
>
&
freq
,
bool
negate_v
=
false
)
{
constexpr
double
speedOfLight
=
299792458.
;
MR_assert
(
coord_
.
shape
(
1
)
==
3
,
"dimension mismatch"
);
auto
hugeval
=
size_t
(
~
(
idx_t
(
0
)));
MR_assert
(
coord_
.
shape
(
0
)
<
hugeval
,
"too many entries in MS"
);
MR_assert
(
coord_
.
shape
(
1
)
<
hugeval
,
"too many entries in MS"
);
MR_assert
(
coord_
.
size
()
<
hugeval
,
"too many entries in MS"
);
nrows
=
coord_
.
shape
(
0
);
nchan
=
freq
.
shape
(
0
);
shift
=
0
;
while
((
idx_t
(
1
)
<<
shift
)
<
nchan
)
++
shift
;
mask
=
(
idx_t
(
1
)
<<
shift
)
-
1
;
MR_assert
(
nrows
*
(
mask
+
1
)
<
hugeval
,
"too many entries in MS"
);
f_over_c
.
resize
(
nchan
);
for
(
size_t
i
=
0
;
i
<
nchan
;
++
i
)
{
MR_assert
(
freq
[
i
]
>
0
,
"negative channel frequency encountered"
);
f_over_c
[
i
]
=
freq
(
i
)
/
speedOfLight
;
}
coord
.
resize
(
nrows
);
if
(
negate_v
)
for
(
size_t
i
=
0
;
i
<
coord
.
size
();
++
i
)
coord
[
i
]
=
UVW
(
coord_
(
i
,
0
),
-
coord_
(
i
,
1
),
coord_
(
i
,
2
));
else
for
(
size_t
i
=
0
;
i
<
coord
.
size
();
++
i
)
coord
[
i
]
=
UVW
(
coord_
(
i
,
0
),
coord_
(
i
,
1
),
coord_
(
i
,
2
));
}
RowChan
getRowChan
(
idx_t
index
)
const
{
return
RowChan
{
index
>>
shift
,
index
&
mask
};
}
UVW
effectiveCoord
(
const
RowChan
&
rc
)
const
{
return
coord
[
rc
.
row
]
*
f_over_c
[
rc
.
chan
];
}
UVW
effectiveCoord
(
idx_t
index
)
const
{
return
effectiveCoord
(
getRowChan
(
index
));
}
size_t
Nrows
()
const
{
return
nrows
;
}
size_t
Nchannels
()
const
{
return
nchan
;
}
idx_t
getIdx
(
idx_t
irow
,
idx_t
ichan
)
const
{
return
ichan
+
(
irow
<<
shift
);
}
};
class
GridderConfig
{
protected:
size_t
nx_dirty
,
ny_dirty
,
nu
,
nv
;
double
ofactor
,
eps
,
psx
,
psy
;
size_t
supp
,
nsafe
;
double
beta
;
size_t
nthreads
;
double
ushift
,
vshift
;
int
maxiu0
,
maxiv0
;
template
<
typename
T
>
complex
<
T
>
wscreen
(
T
x
,
T
y
,
T
w
,
bool
adjoint
)
const
{
constexpr
T
pi
=
T
(
3.141592653589793238462643383279502884197
);
T
tmp
=
1
-
x
-
y
;
if
(
tmp
<=
0
)
return
1
;
// no phase factor beyond the horizon
T
nm1
=
(
-
x
-
y
)
/
(
sqrt
(
tmp
)
+
1
);
// more accurate form of sqrt(1-x-y)-1
T
phase
=
2
*
pi
*
w
*
nm1
;
if
(
adjoint
)
phase
*=
-
1
;
return
complex
<
T
>
(
cos
(
phase
),
sin
(
phase
));
}
public:
GridderConfig
(
size_t
nxdirty
,
size_t
nydirty
,
size_t
nu_
,
size_t
nv_
,
double
epsilon
,
double
pixsize_x
,
double
pixsize_y
,
size_t
nthreads_
)
:
nx_dirty
(
nxdirty
),
ny_dirty
(
nydirty
),
nu
(
nu_
),
nv
(
nv_
),
ofactor
(
min
(
double
(
nu
)
/
nxdirty
,
double
(
nv
)
/
nydirty
)),
eps
(
epsilon
),
psx
(
pixsize_x
),
psy
(
pixsize_y
),
supp
(
ES_Kernel
::
get_supp
(
epsilon
,
ofactor
)),
nsafe
((
supp
+
1
)
/
2
),
beta
(
ES_Kernel
::
get_beta
(
supp
,
ofactor
)
*
supp
),
nthreads
(
nthreads_
),
ushift
(
supp
*
(
-
0.5
)
+
1
+
nu
),
vshift
(
supp
*
(
-
0.5
)
+
1
+
nv
),
maxiu0
((
nu
+
nsafe
)
-
supp
),
maxiv0
((
nv
+
nsafe
)
-
supp
)
{
MR_assert
(
nu
>=
2
*
nsafe
,
"nu too small"
);
MR_assert
(
nv
>=
2
*
nsafe
,
"nv too small"
);
MR_assert
((
nx_dirty
&
1
)
==
0
,
"nx_dirty must be even"
);
MR_assert
((
ny_dirty
&
1
)
==
0
,
"ny_dirty must be even"
);
MR_assert
((
nu
&
1
)
==
0
,
"nu must be even"
);
MR_assert
((
nv
&
1
)
==
0
,
"nv must be even"
);
MR_assert
(
epsilon
>
0
,
"epsilon must be positive"
);
MR_assert
(
pixsize_x
>
0
,
"pixsize_x must be positive"
);
MR_assert
(
pixsize_y
>
0
,
"pixsize_y must be positive"
);
MR_assert
(
ofactor
>=
1.2
,
"oversampling factor smaller than 1.2"
);
}
GridderConfig
(
size_t
nxdirty
,
size_t
nydirty
,
double
epsilon
,
double
pixsize_x
,
double
pixsize_y
,
size_t
nthreads_
)
:
GridderConfig
(
nxdirty
,
nydirty
,
max
<
size_t
>
(
30
,
2
*
nxdirty
),
max
<
size_t
>
(
30
,
2
*
nydirty
),
epsilon
,
pixsize_x
,
pixsize_y
,
nthreads_
)
{}
size_t
Nxdirty
()
const
{
return
nx_dirty
;
}
size_t
Nydirty
()
const
{
return
ny_dirty
;
}
double
Epsilon
()
const
{
return
eps
;
}
double
Pixsize_x
()
const
{
return
psx
;
}
double
Pixsize_y
()
const
{
return
psy
;
}
size_t
Nu
()
const
{
return
nu
;
}
size_t
Nv
()
const
{
return
nv
;
}
size_t
Supp
()
const
{
return
supp
;
}
size_t
Nsafe
()
const
{
return
nsafe
;
}
double
Beta
()
const
{
return
beta
;
}
size_t
Nthreads
()
const
{
return
nthreads
;
}
double
Ofactor
()
const
{
return
ofactor
;
}
template
<
typename
T
>
void
grid2dirty_post
(
const
mav
<
T
,
2
>
&
tmav
,
const
mav
<
T
,
2
>
&
dirty
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
auto
cfu
=
correction_factors
(
nu
,
ofactor
,
nx_dirty
/
2
+
1
,
supp
,
nthreads
);
auto
cfv
=
correction_factors
(
nv
,
ofactor
,
ny_dirty
/
2
+
1
,
supp
,
nthreads
);
execStatic
(
nx_dirty
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
int
icfu
=
abs
(
int
(
nx_dirty
/
2
)
-
int
(
i
));
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
{
int
icfv
=
abs
(
int
(
ny_dirty
/
2
)
-
int
(
j
));
size_t
i2
=
nu
-
nx_dirty
/
2
+
i
;
if
(
i2
>=
nu
)
i2
-=
nu
;
size_t
j2
=
nv
-
ny_dirty
/
2
+
j
;
if
(
j2
>=
nv
)
j2
-=
nv
;
// FIXME: for some reason g++ warns about double-to-float conversion
// here, even though there is an explicit cast...
dirty
(
i
,
j
)
=
tmav
(
i2
,
j2
)
*
T
(
cfu
[
icfu
]
*
cfv
[
icfv
]);
}
}
});
}
template
<
typename
T
>
void
grid2dirty_post2
(
const
mav
<
complex
<
T
>
,
2
>
&
tmav
,
const
mav
<
T
,
2
>
&
dirty
,
T
w
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
double
x0
=
-
0.5
*
nx_dirty
*
psx
,
y0
=
-
0.5
*
ny_dirty
*
psy
;
execStatic
(
nx_dirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
T
fx
=
T
(
x0
+
i
*
psx
);
fx
*=
fx
;
for
(
size_t
j
=
0
;
j
<=
ny_dirty
/
2
;
++
j
)
{
T
fy
=
T
(
y0
+
j
*
psy
);
auto
ws
=
wscreen
(
fx
,
fy
*
fy
,
w
,
true
);
size_t
ix
=
nu
-
nx_dirty
/
2
+
i
;
if
(
ix
>=
nu
)
ix
-=
nu
;
size_t
jx
=
nv
-
ny_dirty
/
2
+
j
;
if
(
jx
>=
nv
)
jx
-=
nv
;
dirty
(
i
,
j
)
+=
(
tmav
(
ix
,
jx
)
*
ws
).
real
();
// lower left
size_t
i2
=
nx_dirty
-
i
,
j2
=
ny_dirty
-
j
;
size_t
ix2
=
nu
-
nx_dirty
/
2
+
i2
;
if
(
ix2
>=
nu
)
ix2
-=
nu
;
size_t
jx2
=
nv
-
ny_dirty
/
2
+
j2
;
if
(
jx2
>=
nv
)
jx2
-=
nv
;
if
((
i
>
0
)
&&
(
i
<
i2
))
{
dirty
(
i2
,
j
)
+=
(
tmav
(
ix2
,
jx
)
*
ws
).
real
();
// lower right
if
((
j
>
0
)
&&
(
j
<
j2
))
dirty
(
i2
,
j2
)
+=
(
tmav
(
ix2
,
jx2
)
*
ws
).
real
();
// upper right
}
if
((
j
>
0
)
&&
(
j
<
j2
))
dirty
(
i
,
j2
)
+=
(
tmav
(
ix
,
jx2
)
*
ws
).
real
();
// upper left
}
}
});
}
template
<
typename
T
>
void
grid2dirty
(
const
const_mav
<
T
,
2
>
&
grid
,
const
mav
<
T
,
2
>
&
dirty
)
const
{
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
tmpStorage
<
T
,
2
>
tmpdat
({
nu
,
nv
});
auto
tmav
=
tmpdat
.
getMav
();
hartley2_2D
<
T
>
(
grid
,
tmav
,
nthreads
);
grid2dirty_post
(
tmav
,
dirty
);
}
template
<
typename
T
>
void
grid2dirty_c_overwrite_wscreen_add
(
const
mav
<
complex
<
T
>
,
2
>
&
grid
,
const
mav
<
T
,
2
>
&
dirty
,
T
w
)
const
{
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
constexpr
auto
sc
=
ptrdiff_t
(
sizeof
(
complex
<
T
>
));
c2c
({
nu
,
nv
},{
grid
.
stride
(
0
)
*
sc
,
grid
.
stride
(
1
)
*
sc
},
{
grid
.
stride
(
0
)
*
sc
,
grid
.
stride
(
1
)
*
sc
},
{
0
,
1
},
BACKWARD
,
grid
.
data
(),
grid
.
data
(),
T
(
1
),
nthreads
);
grid2dirty_post2
(
grid
,
dirty
,
w
);
}
template
<
typename
T
>
void
dirty2grid_pre
(
const
const_mav
<
T
,
2
>
&
dirty
,
const
mav
<
T
,
2
>
&
grid
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
auto
cfu
=
correction_factors
(
nu
,
ofactor
,
nx_dirty
/
2
+
1
,
supp
,
nthreads
);
auto
cfv
=
correction_factors
(
nv
,
ofactor
,
ny_dirty
/
2
+
1
,
supp
,
nthreads
);
grid
.
fill
(
0
);
execStatic
(
nx_dirty
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
int
icfu
=
abs
(
int
(
nx_dirty
/
2
)
-
int
(
i
));
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
{
int
icfv
=
abs
(
int
(
ny_dirty
/
2
)
-
int
(
j
));
size_t
i2
=
nu
-
nx_dirty
/
2
+
i
;
if
(
i2
>=
nu
)
i2
-=
nu
;
size_t
j2
=
nv
-
ny_dirty
/
2
+
j
;
if
(
j2
>=
nv
)
j2
-=
nv
;
grid
(
i2
,
j2
)
=
dirty
(
i
,
j
)
*
T
(
cfu
[
icfu
]
*
cfv
[
icfv
]);
}
}
});
}
template
<
typename
T
>
void
dirty2grid_pre2
(
const
const_mav
<
T
,
2
>
&
dirty
,
const
mav
<
complex
<
T
>
,
2
>
&
grid
,
T
w
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
grid
.
fill
(
0
);
double
x0
=
-
0.5
*
nx_dirty
*
psx
,
y0
=
-
0.5
*
ny_dirty
*
psy
;
execStatic
(
nx_dirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)
{
T
fx
=
T
(
x0
+
i
*
psx
);
fx
*=
fx
;
for
(
size_t
j
=
0
;
j
<=
ny_dirty
/
2
;
++
j
)
{
T
fy
=
T
(
y0
+
j
*
psy
);
auto
ws
=
wscreen
(
fx
,
fy
*
fy
,
w
,
false
);
size_t
ix
=
nu
-
nx_dirty
/
2
+
i
;
if
(
ix
>=
nu
)
ix
-=
nu
;
size_t
jx
=
nv
-
ny_dirty
/
2
+
j
;
if
(
jx
>=
nv
)
jx
-=
nv
;
grid
(
ix
,
jx
)
=
dirty
(
i
,
j
)
*
ws
;
// lower left
size_t
i2
=
nx_dirty
-
i
,
j2
=
ny_dirty
-
j
;
size_t
ix2
=
nu
-
nx_dirty
/
2
+
i2
;
if
(
ix2
>=
nu
)
ix2
-=
nu
;
size_t
jx2
=
nv
-
ny_dirty
/
2
+
j2
;
if
(
jx2
>=
nv
)
jx2
-=
nv
;
if
((
i
>
0
)
&&
(
i
<
i2
))
{
grid
(
ix2
,
jx
)
=
dirty
(
i2
,
j
)
*
ws
;
// lower right
if
((
j
>
0
)
&&
(
j
<
j2
))
grid
(
ix2
,
jx2
)
=
dirty
(
i2
,
j2
)
*
ws
;
// upper right
}
if
((
j
>
0
)
&&
(
j
<
j2
))
grid
(
ix
,
jx2
)
=
dirty
(
i
,
j2
)
*
ws
;
// upper left
}
}
});
}
template
<
typename
T
>
void
dirty2grid
(
const
const_mav
<
T
,
2
>
&
dirty
,
const
mav
<
T
,
2
>
&
grid
)
const
{
dirty2grid_pre
(
dirty
,
grid
);
hartley2_2D
<
T
>
(
cmav
(
grid
),
grid
,
nthreads
);
}
template
<
typename
T
>
void
dirty2grid_c_wscreen
(
const
const_mav
<
T
,
2
>
&
dirty
,
const
mav
<
complex
<
T
>
,
2
>
&
grid
,
T
w
)
const
{
dirty2grid_pre2
(
dirty
,
grid
,
w
);
constexpr
auto
sc
=
ptrdiff_t
(
sizeof
(
complex
<
T
>
));
stride_t
strides
{
grid
.
stride
(
0
)
*
sc
,
grid
.
stride
(
1
)
*
sc
};
c2c
({
nu
,
nv
},
strides
,
strides
,
{
0
,
1
},
FORWARD
,
grid
.
data
(),
grid
.
data
(),
T
(
1
),
nthreads
);
}
void
getpix
(
double
u_in
,
double
v_in
,
double
&
u
,
double
&
v
,
int
&
iu0
,
int
&
iv0
)
const
{
u
=
fmod1
(
u_in
*
psx
)
*
nu
;
iu0
=
min
(
int
(
u
+
ushift
)
-
int
(
nu
),
maxiu0
);
v
=
fmod1
(
v_in
*
psy
)
*
nv
;
iv0
=
min
(
int
(
v
+
vshift
)
-
int
(
nv
),
maxiv0
);
}
template
<
typename
T
>
void
apply_wscreen
(
const
const_mav
<
complex
<
T
>
,
2
>
&
dirty
,
mav
<
complex
<
T
>
,
2
>
&
dirty2
,
double
w
,
bool
adjoint
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
checkShape
(
dirty2
.
shape
(),
{
nx_dirty
,
ny_dirty
});
double
x0
=
-
0.5
*
nx_dirty
*
psx
,
y0
=
-
0.5
*
ny_dirty
*
psy
;
execStatic
(
nx_dirty
/
2
+
1
,
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
i
=
rng
.
lo
;
i
<
rng
.
hi
;
++
i
)