Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ift
nifty_gridder
Commits
8b794388
Commit
8b794388
authored
Sep 02, 2019
by
Martin Reinecke
Browse files
Merge branch 'vector_experiments' into 'separate_interfaces'
Vector experiments See merge request
!20
parents
632ac597
738ef6d0
Changes
3
Hide whitespace changes
Inline
Side-by-side
gridder_cxx.h
View file @
8b794388
...
...
@@ -34,9 +34,11 @@
#if defined(__GNUC__)
#define NOINLINE __attribute__((noinline))
#define ALIGNED(align) __attribute__ ((aligned(align)))
#define RESTRICT __restrict__
#else
#define NOINLINE
#define ALIGNED(align)
#define RESTRICT
#endif
namespace
gridder
{
...
...
@@ -61,6 +63,8 @@ template<> struct VLEN<float> { static constexpr size_t val=4; };
template
<
>
struct
VLEN
<
double
>
{
static
constexpr
size_t
val
=
2
;
};
#endif
// "mav" stands for "multidimensional array view"
template
<
typename
T
,
size_t
ndim
>
class
mav
{
static_assert
((
ndim
>
0
)
&&
(
ndim
<
3
),
"only supports 1D and 2D arrays"
);
...
...
@@ -81,11 +85,7 @@ template<typename T, size_t ndim> class mav
for
(
size_t
d
=
2
;
d
<=
ndim
;
++
d
)
str
[
ndim
-
d
]
=
str
[
ndim
-
d
+
1
]
*
shp
[
ndim
-
d
+
1
];
}
operator
mav
<
const
T
,
ndim
>
()
const
{
return
mav
<
const
T
,
ndim
>
(
d
,
shp
,
str
);
}
T
&
operator
[](
size_t
i
)
{
return
operator
()(
i
);
}
const
T
&
operator
[](
size_t
i
)
const
T
&
operator
[](
size_t
i
)
const
{
return
operator
()(
i
);
}
T
&
operator
()(
size_t
i
)
const
{
...
...
@@ -106,9 +106,7 @@ template<typename T, size_t ndim> class mav
return
res
;
}
ptrdiff_t
stride
(
size_t
i
)
const
{
return
str
[
i
];
}
T
*
data
()
{
return
d
;
}
const
T
*
data
()
const
T
*
data
()
const
{
return
d
;
}
bool
contiguous
()
const
{
...
...
@@ -122,6 +120,7 @@ template<typename T, size_t ndim> class mav
}
void
fill
(
const
T
&
val
)
const
{
// FIXME: special cases for contiguous arrays and/or zeroing?
if
(
ndim
==
1
)
for
(
size_t
i
=
0
;
i
<
shp
[
0
];
++
i
)
d
[
str
[
0
]
*
i
]
=
val
;
...
...
@@ -133,7 +132,14 @@ template<typename T, size_t ndim> class mav
};
template
<
typename
T
,
size_t
ndim
>
using
const_mav
=
mav
<
const
T
,
ndim
>
;
template
<
typename
T
,
size_t
ndim
>
const_mav
<
T
,
ndim
>
cmav
(
const
mav
<
T
,
ndim
>
&
mav
)
{
return
const_mav
<
T
,
ndim
>
(
mav
.
data
(),
mav
.
shape
());
}
template
<
typename
T
,
size_t
ndim
>
const_mav
<
T
,
ndim
>
nullmav
()
{
array
<
size_t
,
ndim
>
shp
;
shp
.
fill
(
0
);
return
const_mav
<
T
,
ndim
>
(
nullptr
,
shp
);
}
template
<
typename
T
,
size_t
ndim
>
class
tmpStorage
{
private:
...
...
@@ -150,19 +156,66 @@ template<typename T, size_t ndim> class tmpStorage
public:
tmpStorage
(
const
array
<
size_t
,
ndim
>
&
shp
)
:
d
(
prod
(
shp
)),
mav_
(
d
.
data
(),
shp
)
{}
mav
<
T
,
ndim
>
&
getMav
()
{
return
mav_
;
};
mav
<
T
,
ndim
>
&
getMav
()
{
return
mav_
;
}
const_mav
<
T
,
ndim
>
getCmav
()
{
return
cmav
(
mav_
);
}
void
fill
(
const
T
&
val
)
{
std
::
fill
(
d
.
begin
(),
d
.
end
(),
val
);
}
};
//
// basic utilities
//
#if defined (__GNUC__)
#define LOC_ CodeLocation(__FILE__, __LINE__, __PRETTY_FUNCTION__)
#else
#define LOC_ CodeLocation(__FILE__, __LINE__)
#endif
void
myassert
(
bool
cond
,
const
char
*
msg
)
#define myfail(...) \
do { \
std::ostringstream os; \
streamDump__(os, LOC_, "\n", ##__VA_ARGS__, "\n"); \
throw std::runtime_error(os.str()); \
} while(0)
#define myassert(cond,...) \
do { \
if (cond); \
else { myfail("Assertion failure\n", ##__VA_ARGS__); } \
} while(0)
template
<
typename
T
>
inline
void
streamDump__
(
std
::
ostream
&
os
,
const
T
&
value
)
{
os
<<
value
;
}
template
<
typename
T
,
typename
...
Args
>
inline
void
streamDump__
(
std
::
ostream
&
os
,
const
T
&
value
,
const
Args
&
...
args
)
{
if
(
cond
)
return
;
throw
runtime_error
(
msg
);
os
<<
value
;
streamDump__
(
os
,
args
...
);
}
// to be replaced with std::source_location once available
class
CodeLocation
{
private:
const
char
*
file
,
*
func
;
int
line
;
public:
CodeLocation
(
const
char
*
file_
,
int
line_
,
const
char
*
func_
=
nullptr
)
:
file
(
file_
),
func
(
func_
),
line
(
line_
)
{}
ostream
&
print
(
ostream
&
os
)
const
{
os
<<
"file: "
<<
file
<<
", line: "
<<
line
;
if
(
func
)
os
<<
", function: "
<<
func
;
return
os
;
}
};
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CodeLocation
&
loc
)
{
return
loc
.
print
(
os
);
}
template
<
size_t
ndim
>
void
checkShape
(
const
array
<
size_t
,
ndim
>
&
shp1
,
const
array
<
size_t
,
ndim
>
&
shp2
)
...
...
@@ -259,7 +312,7 @@ size_t get_supp(double epsilon)
for
(
size_t
i
=
1
;
i
<
maxmaperr
.
size
();
++
i
)
if
(
epssq
>
maxmaperr
[
i
])
return
i
;
throw
runtime_error
(
"requested epsilon too small - minimum is 2e-13"
);
myfail
(
"requested epsilon too small - minimum is 2e-13"
);
}
struct
RowChan
...
...
@@ -267,28 +320,93 @@ struct RowChan
size_t
row
,
chan
;
};
template
<
typename
T
>
class
EC_Kernel
template
<
typename
T
>
void
complex2hartley
(
const
const_mav
<
complex
<
T
>
,
2
>
&
grid
,
const
mav
<
T
,
2
>
&
grid2
,
size_t
nthreads
)
{
myassert
(
grid
.
shape
()
==
grid2
.
shape
(),
"shape mismatch"
);
size_t
nu
=
grid
.
shape
(
0
),
nv
=
grid
.
shape
(
1
);
#pragma omp parallel for num_threads(nthreads)
for
(
size_t
u
=
0
;
u
<
nu
;
++
u
)
{
size_t
xu
=
(
u
==
0
)
?
0
:
nu
-
u
;
for
(
size_t
v
=
0
;
v
<
nv
;
++
v
)
{
size_t
xv
=
(
v
==
0
)
?
0
:
nv
-
v
;
grid2
(
u
,
v
)
+=
T
(
0.5
)
*
(
grid
(
u
,
v
).
real
()
+
grid
(
u
,
v
).
imag
()
+
grid
(
xu
,
xv
).
real
()
-
grid
(
xu
,
xv
).
imag
());
}
}
}
template
<
typename
T
>
void
hartley2complex
(
const
const_mav
<
T
,
2
>
&
grid
,
const
mav
<
complex
<
T
>
,
2
>
&
grid2
,
size_t
nthreads
)
{
myassert
(
grid
.
shape
()
==
grid2
.
shape
(),
"shape mismatch"
);
size_t
nu
=
grid
.
shape
(
0
),
nv
=
grid
.
shape
(
1
);
#pragma omp parallel for num_threads(nthreads)
for
(
size_t
u
=
0
;
u
<
nu
;
++
u
)
{
size_t
xu
=
(
u
==
0
)
?
0
:
nu
-
u
;
for
(
size_t
v
=
0
;
v
<
nv
;
++
v
)
{
size_t
xv
=
(
v
==
0
)
?
0
:
nv
-
v
;
T
v1
=
T
(
0.5
)
*
grid
(
u
,
v
);
T
v2
=
T
(
0.5
)
*
grid
(
xu
,
xv
);
grid2
(
u
,
v
)
=
std
::
complex
<
T
>
(
v1
+
v2
,
v1
-
v2
);
}
}
}
template
<
typename
T
>
void
hartley2_2D
(
const
const_mav
<
T
,
2
>
&
in
,
const
mav
<
T
,
2
>
&
out
,
size_t
nthreads
)
{
myassert
(
in
.
shape
()
==
out
.
shape
(),
"shape mismatch"
);
size_t
nu
=
in
.
shape
(
0
),
nv
=
in
.
shape
(
1
);
ptrdiff_t
sz
=
ptrdiff_t
(
sizeof
(
T
));
pocketfft
::
stride_t
stri
{
sz
*
in
.
stride
(
0
),
sz
*
in
.
stride
(
1
)};
pocketfft
::
stride_t
stro
{
sz
*
out
.
stride
(
0
),
sz
*
out
.
stride
(
1
)};
auto
d_i
=
in
.
data
();
auto
ptmp
=
out
.
data
();
pocketfft
::
r2r_separable_hartley
({
nu
,
nv
},
stri
,
stro
,
{
0
,
1
},
d_i
,
ptmp
,
T
(
1
),
nthreads
);
#pragma omp parallel for num_threads(nthreads)
for
(
size_t
i
=
1
;
i
<
(
nu
+
1
)
/
2
;
++
i
)
for
(
size_t
j
=
1
;
j
<
(
nv
+
1
)
/
2
;
++
j
)
{
T
a
=
ptmp
[
i
*
nv
+
j
];
T
b
=
ptmp
[(
nu
-
i
)
*
nv
+
j
];
T
c
=
ptmp
[
i
*
nv
+
nv
-
j
];
T
d
=
ptmp
[(
nu
-
i
)
*
nv
+
nv
-
j
];
ptmp
[
i
*
nv
+
j
]
=
T
(
0.5
)
*
(
a
+
b
+
c
-
d
);
ptmp
[(
nu
-
i
)
*
nv
+
j
]
=
T
(
0.5
)
*
(
a
+
b
+
d
-
c
);
ptmp
[
i
*
nv
+
nv
-
j
]
=
T
(
0.5
)
*
(
a
+
c
+
d
-
b
);
ptmp
[(
nu
-
i
)
*
nv
+
nv
-
j
]
=
T
(
0.5
)
*
(
b
+
c
+
d
-
a
);
}
}
class
EC_Kernel
{
protected:
T
beta
;
double
beta
;
public:
EC_Kernel
(
size_t
supp
)
:
beta
(
2.3
*
supp
)
{}
T
operator
()(
T
v
)
const
{
return
exp
(
beta
*
(
sqrt
(
T
(
1
)
-
v
*
v
)
-
T
(
1
)
));
}
double
operator
()(
double
v
)
const
{
return
exp
(
beta
*
(
sqrt
(
1.
-
v
*
v
)
-
1.
));
}
};
template
<
typename
T
>
class
EC_Kernel_with_correction
:
public
EC_Kernel
<
T
>
class
EC_Kernel_with_correction
:
public
EC_Kernel
{
protected:
static
constexpr
T
pi
=
T
(
3.141592653589793238462643383279502884197
L
)
;
static
constexpr
double
pi
=
3.141592653589793238462643383279502884197
;
int
p
;
vector
<
double
>
x
,
wgt
,
psi
;
size_t
supp
;
public:
using
EC_Kernel
<
T
>::
operator
();
EC_Kernel_with_correction
(
size_t
supp_
,
size_t
nthreads
)
:
EC_Kernel
<
T
>
(
supp_
),
p
(
int
(
1.5
*
supp_
+
2
)),
supp
(
supp_
)
:
EC_Kernel
(
supp_
),
p
(
int
(
1.5
*
supp_
+
2
)),
supp
(
supp_
)
{
legendre_prep
(
2
*
p
,
x
,
wgt
,
nthreads
);
psi
=
x
;
...
...
@@ -297,12 +415,12 @@ template<typename T> class EC_Kernel_with_correction: public EC_Kernel<T>
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
T
corfac
(
T
v
)
const
double
corfac
(
double
v
)
const
{
T
tmp
=
0
;
double
tmp
=
0
;
for
(
int
i
=
0
;
i
<
p
;
++
i
)
tmp
+=
wgt
[
i
]
*
psi
[
i
]
*
cos
(
pi
*
supp
*
v
*
x
[
i
]);
return
T
(
1.
/
(
supp
*
tmp
)
)
;
return
1.
/
(
supp
*
tmp
);
}
};
/* Compute correction factors for the ES gridding kernel
...
...
@@ -310,7 +428,7 @@ template<typename T> class EC_Kernel_with_correction: public EC_Kernel<T>
vector
<
double
>
correction_factors
(
size_t
n
,
size_t
nval
,
size_t
supp
,
size_t
nthreads
)
{
EC_Kernel_with_correction
<
double
>
kernel
(
supp
,
nthreads
);
EC_Kernel_with_correction
kernel
(
supp
,
nthreads
);
vector
<
double
>
res
(
nval
);
double
xn
=
1.
/
n
;
#pragma omp parallel for schedule(static) num_threads(nthreads)
...
...
@@ -497,12 +615,30 @@ template<typename T> class GridderConfig
img2
(
i
,
j
)
=
img
(
i
,
j
)
*
cfu
[
i
]
*
cfv
[
j
];
}
void
grid2dirty
(
const
const_mav
<
T
,
2
>
&
grid
,
const
mav
<
T
,
2
>
&
dirty
)
const
{
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
tmpStorage
<
T
,
2
>
tmpdat
({
nu
,
nv
});
auto
tmav
=
tmpdat
.
getMav
();
hartley2_2D
<
T
>
(
grid
,
tmav
,
nthreads
);
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
{
size_t
i2
=
nu
-
nx_dirty
/
2
+
i
;
if
(
i2
>=
nu
)
i2
-=
nu
;
size_t
j2
=
nv
-
ny_dirty
/
2
+
j
;
if
(
j2
>=
nv
)
j2
-=
nv
;
dirty
(
i
,
j
)
=
tmav
(
i2
,
j2
)
*
cfu
[
i
]
*
cfv
[
j
];
}
}
void
grid2dirty_c
(
const
mav
<
const
complex
<
T
>
,
2
>
&
grid
,
mav
<
complex
<
T
>
,
2
>
&
dirty
)
const
{
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
vector
<
complex
<
T
>>
tmpdat
(
nu
*
nv
);
auto
tmp
=
mav
<
complex
<
T
>
,
2
>
(
tmpdat
.
data
(),{
nu
,
nv
},{
ptrdiff_t
(
nv
),
ptrdiff_t
(
1
)}
);
tmpStorage
<
complex
<
T
>
,
2
>
tmpdat
(
{
nu
,
nv
}
);
auto
tmp
=
tmpdat
.
getMav
(
);
constexpr
auto
sc
=
ptrdiff_t
(
sizeof
(
complex
<
T
>
));
pocketfft
::
c2c
({
nu
,
nv
},{
grid
.
stride
(
0
)
*
sc
,
grid
.
stride
(
1
)
*
sc
},
{
tmp
.
stride
(
0
)
*
sc
,
tmp
.
stride
(
1
)
*
sc
},
{
0
,
1
},
pocketfft
::
BACKWARD
,
...
...
@@ -518,14 +654,29 @@ template<typename T> class GridderConfig
}
}
void
dirty2grid
(
const
const_mav
<
T
,
2
>
&
dirty
,
mav
<
T
,
2
>
&
grid
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
grid
.
fill
(
0
);
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
{
size_t
i2
=
nu
-
nx_dirty
/
2
+
i
;
if
(
i2
>=
nu
)
i2
-=
nu
;
size_t
j2
=
nv
-
ny_dirty
/
2
+
j
;
if
(
j2
>=
nv
)
j2
-=
nv
;
grid
(
i2
,
j2
)
=
dirty
(
i
,
j
)
*
cfu
[
i
]
*
cfv
[
j
];
}
hartley2_2D
<
T
>
(
cmav
(
grid
),
grid
,
nthreads
);
}
void
dirty2grid_c
(
const
const_mav
<
complex
<
T
>
,
2
>
&
dirty
,
mav
<
complex
<
T
>
,
2
>
&
grid
)
const
{
checkShape
(
dirty
.
shape
(),
{
nx_dirty
,
ny_dirty
});
checkShape
(
grid
.
shape
(),
{
nu
,
nv
});
for
(
size_t
i
=
0
;
i
<
nu
;
++
i
)
for
(
size_t
j
=
0
;
j
<
nv
;
++
j
)
grid
(
i
,
j
)
=
0.
;
grid
.
fill
(
0
);
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
{
...
...
@@ -785,8 +936,8 @@ template<typename T, typename Serv> void x2grid_c
{
Helper
<
T
>
hlp
(
gconf
,
nullptr
,
grid
.
data
(),
w0
,
dw
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
size_t
np
=
srv
.
Nvis
();
// Loop over sampling points
...
...
@@ -796,14 +947,22 @@ template<typename T, typename Serv> void x2grid_c
UVW
<
T
>
coord
=
srv
.
getCoord
(
ipart
);
auto
flip
=
coord
.
FixW
();
hlp
.
prep
(
coord
);
auto
*
ptr
=
hlp
.
p0w
;
auto
*
RESTRICT
ptr
=
hlp
.
p0w
;
auto
v
(
srv
.
getVis
(
ipart
));
if
(
do_w_gridding
)
v
*=
hlp
.
Wfac
();
if
(
flip
)
v
=
conj
(
v
);
for
(
size_t
cu
=
0
;
cu
<
supp
;
++
cu
)
{
complex
<
T
>
tmp
(
v
*
ku
[
cu
]);
for
(
size_t
cv
=
0
;
cv
<
supp
;
++
cv
)
size_t
cv
=
0
;
for
(;
cv
<
supp
-
3
;
cv
+=
4
)
{
ptr
[
cv
]
+=
tmp
*
kv
[
cv
];
ptr
[
cv
+
1
]
+=
tmp
*
kv
[
cv
+
1
];
ptr
[
cv
+
2
]
+=
tmp
*
kv
[
cv
+
2
];
ptr
[
cv
+
3
]
+=
tmp
*
kv
[
cv
+
3
];
}
for
(;
cv
<
supp
;
++
cv
)
ptr
[
cv
]
+=
tmp
*
kv
[
cv
];
ptr
+=
jump
;
}
...
...
@@ -843,8 +1002,8 @@ template<typename T, typename Serv> void grid2x_c
{
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
nullptr
,
w0
,
dw
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
size_t
np
=
srv
.
Nvis
();
#pragma omp for schedule(guided,100)
...
...
@@ -854,11 +1013,17 @@ template<typename T, typename Serv> void grid2x_c
auto
flip
=
coord
.
FixW
();
hlp
.
prep
(
coord
);
complex
<
T
>
r
=
0
;
const
auto
*
ptr
=
hlp
.
p0r
;
const
auto
*
RESTRICT
ptr
=
hlp
.
p0r
;
for
(
size_t
cu
=
0
;
cu
<
supp
;
++
cu
)
{
complex
<
T
>
tmp
(
0
);
for
(
size_t
cv
=
0
;
cv
<
supp
;
++
cv
)
size_t
cv
=
0
;
for
(;
cv
<
supp
-
3
;
cv
+=
4
)
tmp
+=
ptr
[
cv
]
*
kv
[
cv
]
+
ptr
[
cv
+
1
]
*
kv
[
cv
+
1
]
+
ptr
[
cv
+
2
]
*
kv
[
cv
+
2
]
+
ptr
[
cv
+
3
]
*
kv
[
cv
+
3
];
for
(;
cv
<
supp
;
++
cv
)
tmp
+=
ptr
[
cv
]
*
kv
[
cv
];
r
+=
tmp
*
ku
[
cu
];
ptr
+=
jump
;
...
...
@@ -1004,7 +1169,7 @@ template<typename T> void get_correlations
template
<
typename
T
,
typename
T2
>
void
apply_wcorr
(
const
GridderConfig
<
T
>
&
gconf
,
const
mav
<
T2
,
2
>
&
dirty
,
const
EC_Kernel_with_correction
<
T
>
&
kernel
,
double
dw
)
const
mav
<
T2
,
2
>
&
dirty
,
const
EC_Kernel_with_correction
&
kernel
,
double
dw
)
{
auto
nx_dirty
=
gconf
.
Nxdirty
();
auto
ny_dirty
=
gconf
.
Nydirty
();
...
...
@@ -1046,8 +1211,6 @@ template<typename T, typename T2> void apply_wcorr(const GridderConfig<T> &gconf
template
<
typename
T
,
typename
Serv
>
void
x2dirty
(
const
GridderConfig
<
T
>
&
gconf
,
const
Serv
&
srv
,
const
mav
<
T
,
2
>
&
dirty
,
size_t
verbosity
=
0
)
{
auto
nx_dirty
=
gconf
.
Nxdirty
();
auto
ny_dirty
=
gconf
.
Nydirty
();
auto
nu
=
gconf
.
Nu
();
auto
nv
=
gconf
.
Nv
();
...
...
@@ -1060,21 +1223,16 @@ template<typename T, typename Serv> void x2dirty(
auto
grid
=
grid_
.
getMav
();
grid_
.
fill
(
0.
);
x2grid_c
(
gconf
,
srv
,
grid
);
tmpStorage
<
complex
<
T
>
,
2
>
cdirty_
({
nx_dirty
,
ny_dirty
});
auto
cdirty
=
cdirty_
.
getMav
();
// FIXME: this could use symmetries to accelerate the FFT
gconf
.
grid2dirty_c
(
grid
,
cdirty
);
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
dirty
(
i
,
j
)
=
cdirty
(
i
,
j
).
real
();
tmpStorage
<
T
,
2
>
rgrid_
({
nu
,
nv
});
auto
rgrid
=
rgrid_
.
getMav
();
complex2hartley
(
cmav
(
grid
),
rgrid
,
gconf
.
Nthreads
());
gconf
.
grid2dirty
(
cmav
(
rgrid
),
dirty
);
}
template
<
typename
T
,
typename
Serv
>
void
dirty2x
(
const
GridderConfig
<
T
>
&
gconf
,
const
const_mav
<
T
,
2
>
&
dirty
,
const
Serv
&
srv
,
size_t
verbosity
=
0
)
{
auto
nx_dirty
=
gconf
.
Nxdirty
();
auto
ny_dirty
=
gconf
.
Nydirty
();
auto
nu
=
gconf
.
Nu
();
auto
nv
=
gconf
.
Nv
();
size_t
nvis
=
srv
.
Nvis
();
...
...
@@ -1087,17 +1245,13 @@ template<typename T, typename Serv> void dirty2x(
for
(
size_t
i
=
0
;
i
<
nvis
;
++
i
)
srv
.
setVis
(
i
,
0.
);
tmpStorage
<
complex
<
T
>
,
2
>
cdirty_
({
nx_dirty
,
ny_dirty
});
auto
cdirty
=
cdirty_
.
getMav
();
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
cdirty
(
i
,
j
)
=
dirty
(
i
,
j
);
tmpStorage
<
complex
<
T
>
,
2
>
grid_
({
nu
,
nv
});
tmpStorage
<
T
,
2
>
grid_
({
nu
,
nv
});
auto
grid
=
grid_
.
getMav
();
// FIXME: this could use symmetries to accelerate the FFT
gconf
.
dirty2grid_c
(
cdirty
,
grid
);
grid2x_c
(
gconf
,
const_mav
<
complex
<
T
>
,
2
>
(
grid
.
data
(),{
nu
,
nv
}),
srv
);
gconf
.
dirty2grid
(
dirty
,
grid
);
tmpStorage
<
complex
<
T
>
,
2
>
grid2_
({
nu
,
nv
});
auto
grid2
=
grid2_
.
getMav
();
hartley2complex
(
cmav
(
grid
),
grid2
,
gconf
.
Nthreads
());
grid2x_c
(
gconf
,
cmav
(
grid2
),
srv
);
}
template
<
typename
T
,
typename
Serv
>
void
wstack_common
(
...
...
@@ -1115,6 +1269,7 @@ template<typename T, typename Serv> void wstack_common(
wmin
=
T
(
1e38
);
T
wmax
=
T
(
-
1e38
);
// FIXME maybe this can be done more intelligently
#pragma omp parallel for num_threads(nthreads) reduction(min:wmin) reduction(max:wmax)
for
(
size_t
ipart
=
0
;
ipart
<
nvis
;
++
ipart
)
{
...
...
@@ -1166,7 +1321,7 @@ template<typename T, typename Serv> void x2dirty_wstack(
size_t
nvis
=
srv
.
Nvis
();
auto
w_supp
=
gconf
.
Supp
();
size_t
nthreads
=
gconf
.
Nthreads
();
EC_Kernel_with_correction
<
T
>
kernel
(
w_supp
,
nthreads
);
EC_Kernel_with_correction
kernel
(
w_supp
,
nthreads
);
T
wmin
;
double
dw
;
size_t
nplanes
;
...
...
@@ -1208,9 +1363,9 @@ template<typename T, typename Serv> void x2dirty_wstack(
idx_loc
[
i
]
=
srv
.
getIdx
(
ipart
);
}
grid
.
fill
(
0
);
vis2grid_c
(
srv
.
getBaselines
(),
gconf
,
c
onst_mav
<
uint32_t
,
1
>
(
idx_loc
),
const_mav
<
complex
<
T
>
,
1
>
(
vis_loc
),
grid
,
const_
mav
<
T
,
1
>
(
nullptr
,{
0
}
),
wcur
,
T
(
dw
));
gconf
.
grid2dirty_c
(
grid
,
tdirty
);
gconf
.
apply_wscreen
(
tdirty
,
tdirty
,
wcur
,
false
);
vis2grid_c
(
srv
.
getBaselines
(),
gconf
,
c
mav
(
idx_loc
),
cmav
(
vis_loc
),
grid
,
null
mav
<
T
,
1
>
(),
wcur
,
T
(
dw
));
gconf
.
grid2dirty_c
(
cmav
(
grid
)
,
tdirty
);
gconf
.
apply_wscreen
(
cmav
(
tdirty
)
,
tdirty
,
wcur
,
false
);
#pragma omp parallel for num_threads(nthreads)
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
...
...
@@ -1235,7 +1390,7 @@ template<typename T, typename Serv> void x2dirty_wstack2(
size_t
nvis
=
srv
.
Nvis
();
auto
w_supp
=
gconf
.
Supp
();
size_t
nthreads
=
gconf
.
Nthreads
();
EC_Kernel_with_correction
<
T
>
kernel
(
w_supp
,
nthreads
);
EC_Kernel_with_correction
kernel
(
w_supp
,
nthreads
);
T
wmin
;
double
dw
;
size_t
nplanes
;
...
...
@@ -1266,8 +1421,8 @@ template<typename T, typename Serv> void x2dirty_wstack2(
grid
.
fill
(
0
);
Serv
newsrv
(
srv
,
const_mav
<
uint32_t
,
1
>
(
newidx
.
data
(),
{
newidx
.
size
()}));
x2grid_c
(
gconf
,
newsrv
,
grid
,
wcur
,
T
(
dw
));
gconf
.
grid2dirty_c
(
grid
,
tdirty
);
gconf
.
apply_wscreen
(
tdirty
,
tdirty
,
wcur
,
false
);
gconf
.
grid2dirty_c
(
cmav
(
grid
)
,
tdirty
);
gconf
.
apply_wscreen
(
cmav
(
tdirty
)
,
tdirty
,
wcur
,
false
);
#pragma omp parallel for num_threads(nthreads)
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
...
...
@@ -1294,7 +1449,7 @@ template<typename T, typename Serv> void dirty2x_wstack(
size_t
nvis
=
srv
.
Nvis
();
auto
w_supp
=
gconf
.
Supp
();
size_t
nthreads
=
gconf
.
Nthreads
();
EC_Kernel_with_correction
<
T
>
kernel
(
w_supp
,
nthreads
);
EC_Kernel_with_correction
kernel
(
w_supp
,
nthreads
);
T
wmin
;
double
dw
;
size_t
nplanes
;
...
...
@@ -1345,9 +1500,9 @@ template<typename T, typename Serv> void dirty2x_wstack(
for
(
size_t
i
=
0
;
i
<
nx_dirty
;
++
i
)
for
(
size_t
j
=
0
;
j
<
ny_dirty
;
++
j
)
tdirty2
(
i
,
j
)
=
tdirty
(
i
,
j
);
gconf
.
apply_wscreen
(
tdirty2
,
tdirty2
,
wcur
,
true
);
gconf
.
dirty2grid_c
(
tdirty2
,
grid
);
grid2vis_c
(
srv
.
getBaselines
(),
gconf
,
c
onst_mav
<
uint32_t
,
1
>
(
idx_loc
),
const_mav
<
complex
<
T
>
,
2
>
(
grid
),
vis_loc
,
const_mav
<
T
,
1
>
(
nullptr
,{
0
}),
wcur
,
T
(
dw
));
gconf
.
apply_wscreen
(
cmav
(
tdirty2
)
,
tdirty2
,
wcur
,
true
);
gconf
.
dirty2grid_c
(
cmav
(
tdirty2
)
,
grid
);
grid2vis_c
(
srv
.
getBaselines
(),
gconf
,
c
mav
(
idx_loc
),
cmav
(
grid
),
vis_loc
,
const_mav
<
T
,
1
>
(
nullptr
,{
0
}),
wcur
,
T
(
dw
));
#pragma omp parallel for num_threads(nthreads)
for
(
size_t
i
=
0
;
i
<
nvis_plane
[
iw
];
++
i
)
...
...
@@ -1533,7 +1688,6 @@ template<typename T> void full_degridding(const const_mav<T,2> &uvw,
// public names
using
detail
::
mav
;
using
detail
::
const_mav
;
using
detail
::
myassert
;
using
detail
::
Baselines
;
using
detail
::
GridderConfig
;
using
detail
::
ms2grid_c
;
...
...
@@ -1544,6 +1698,8 @@ using detail::vis2dirty_wstack;
using
detail
::
dirty2vis_wstack
;
using
detail
::
full_gridding
;
using
detail
::
full_degridding
;
using
detail
::
streamDump__
;
using
detail
::
CodeLocation
;
}
// namespace gridder
#endif
nifty_gridder.cc
View file @
8b794388
...
...
@@ -233,6 +233,34 @@ template<typename T> class PyBaselines: public Baselines<T>
}
};
constexpr
auto
grid2dirty_DS
=
R"""(
Converts from UV grid to dirty image (FFT, cropping, correction)
Parameters
==========
grid: np.array((nu, nv), dtype=np.float64)
gridded UV data
Returns
=======
nd.array((nxdirty, nydirty), dtype=np.float64)
the dirty image
)"""
;
constexpr
auto
dirty2grid_DS
=
R"""(
Converts from a dirty image to a UV grid (correction, padding, FFT)
Parameters
==========
dirty: nd.array((nxdirty, nydirty), dtype=np.float64)
the dirty image
Returns
=======
np.array((nu, nv), dtype=np.float64)
gridded UV data
)"""
;
constexpr
auto
apply_taper_DS
=
R"""(
Applies the taper (or its inverse) to an image
...
...
@@ -318,6 +346,17 @@ template<typename T> class PyGridderConfig: public GridderConfig<T>
}
return
res
;
}
pyarr
<
T
>
grid2dirty
(
const
pyarr
<
T
>
&
grid
)
const
{
auto
res
=
makeArray
<
T
>
({
nx_dirty
,
ny_dirty
});
auto
grid2
=
make_const_mav
<
2
>
(
grid
);
auto
res2
=
make_mav
<
2
>
(
res
);
{
py
::
gil_scoped_release
release
;
GridderConfig
<
T
>::
grid2dirty
(
grid2
,
res2
);
}
return
res
;
}
pyarr
<
complex
<
T
>>
grid2dirty_c
(
const
pyarr
<
complex
<
T
>>
&
grid
)
const
{
auto
res
=
makeArray
<
complex
<
T
>>
({
nx_dirty
,
ny_dirty
});
...
...
@@ -330,6 +369,17 @@ template<typename T> class PyGridderConfig: public GridderConfig<T>