Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Reinecke
ducc
Commits
316398dc
Commit
316398dc
authored
Jan 04, 2020
by
Martin Reinecke
Browse files
rework
parent
4af2d1d2
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/mr_util/fft.h
View file @
316398dc
...
...
@@ -2343,15 +2343,14 @@ template<size_t N> class multi_iter
}
public:
multi_iter
(
const
arr_info
&
iarr_
,
const
arr_info
&
oarr_
,
size_t
idim_
)
multi_iter
(
const
arr_info
&
iarr_
,
const
arr_info
&
oarr_
,
size_t
idim_
,
size_t
nshares
,
size_t
myshare
)
:
pos
(
iarr_
.
ndim
(),
0
),
iarr
(
iarr_
),
oarr
(
oarr_
),
p_ii
(
0
),
str_i
(
iarr
.
stride
(
idim_
)),
p_oi
(
0
),
str_o
(
oarr
.
stride
(
idim_
)),
idim
(
idim_
),
rem
(
iarr
.
size
()
/
iarr
.
shape
(
idim
))
{
auto
nshares
=
num_threads
();
if
(
nshares
==
1
)
return
;
if
(
nshares
==
0
)
throw
runtime_error
(
"can't run with zero threads"
);
auto
myshare
=
thread_num
();
if
(
myshare
>=
nshares
)
throw
runtime_error
(
"impossible share requested"
);
size_t
nbase
=
rem
/
nshares
;
size_t
additional
=
rem
%
nshares
;
...
...
@@ -2596,11 +2595,11 @@ MRUTIL_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out,
execParallel
(
util
::
thread_count
(
nthreads
,
in
.
shape
(),
axes
[
iax
],
VLEN
<
T
>::
val
),
[
&
]
{
[
&
]
(
Scheduler
&
sched
)
{
constexpr
auto
vlen
=
VLEN
<
T0
>::
val
;
auto
storage
=
alloc_tmp
<
T0
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
const
auto
&
tin
(
iax
==
0
?
in
:
out
);
multi_iter
<
vlen
>
it
(
tin
,
out
,
axes
[
iax
]);
multi_iter
<
vlen
>
it
(
tin
,
out
,
axes
[
iax
]
,
sched
.
num_threads
(),
sched
.
thread_num
()
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
@@ -2703,10 +2702,10 @@ template<typename T> MRUTIL_NOINLINE void general_r2c(
size_t
len
=
in
.
shape
(
axis
);
execParallel
(
util
::
thread_count
(
nthreads
,
in
.
shape
(),
axis
,
VLEN
<
T
>::
val
),
[
&
]
{
[
&
]
(
Scheduler
&
sched
)
{
constexpr
auto
vlen
=
VLEN
<
T
>::
val
;
auto
storage
=
alloc_tmp
<
T
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
,
sched
.
num_threads
(),
sched
.
thread_num
()
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
@@ -2758,10 +2757,10 @@ template<typename T> MRUTIL_NOINLINE void general_c2r(
size_t
len
=
out
.
shape
(
axis
);
execParallel
(
util
::
thread_count
(
nthreads
,
in
.
shape
(),
axis
,
VLEN
<
T
>::
val
),
[
&
]
{
[
&
]
(
Scheduler
&
sched
)
{
constexpr
auto
vlen
=
VLEN
<
T
>::
val
;
auto
storage
=
alloc_tmp
<
T
>
(
out
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
,
sched
.
num_threads
(),
sched
.
thread_num
()
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
src/mr_util/gl_integrator.h
View file @
316398dc
...
...
@@ -35,7 +35,7 @@ using namespace std;
class
GL_Integrator
{
private:
int
m
;
int
n_
;
vector
<
double
>
x
,
w
;
static
inline
double
one_minus_x2
(
double
x
)
...
...
@@ -43,11 +43,12 @@ class GL_Integrator
public:
GL_Integrator
(
int
n
,
size_t
nthreads
=
1
)
n_
(
n
)
{
MR_assert
(
n
>=
1
,
"number of points must be at least 1"
);
constexpr
double
pi
=
3.141592653589793238462643383279502884197
;
constexpr
double
eps
=
3e-14
;
m
=
(
n
+
1
)
>>
1
;
int
m
=
(
n
+
1
)
>>
1
;
x
.
resize
(
m
);
w
.
resize
(
m
);
...
...
@@ -99,7 +100,7 @@ class GL_Integrator
template
<
typename
Func
>
double
integrate
(
Func
f
)
{
double
res
=
0
,
istart
=
0
;
if
(
x
[
0
]
==
0.
)
if
(
n_
&
1
)
{
res
=
f
(
x
[
0
])
*
w
[
0
];
istart
=
1
;
...
...
@@ -113,11 +114,30 @@ class GL_Integrator
{
using
T
=
decltype
(
f
(
0.
));
T
res
=
f
(
x
[
0
])
*
w
[
0
];
if
(
x
[
0
]
==
0.
)
res
*=
0.5
;
if
(
n_
&
1
)
res
*=
0.5
;
for
(
size_t
i
=
1
;
i
<
x
.
size
();
++
i
)
res
+=
f
(
x
[
i
])
*
w
[
i
];
return
res
*
2
;
}
vector
<
double
>
coords
()
const
{
vector
<
double
>
res
(
n_
);
for
(
size_t
i
=
0
;
i
<
x
.
size
();
++
i
)
{
res
[
i
]
=-
x
[
x
.
size
()
-
1
-
i
];
res
[
n_
-
1
-
i
]
=
x
[
x
.
size
()
-
1
-
i
];
}
return
res
;
}
vector
<
double
>
weights
()
const
{
vector
<
double
>
res
(
n_
);
for
(
size_t
i
=
0
;
i
<
w
.
size
();
++
i
)
res
[
i
]
=
res
[
n_
-
1
-
i
]
=
w
[
w
.
size
()
-
1
-
i
];
return
res
;
}
};
}
...
...
src/mr_util/threading.h
View file @
316398dc
...
...
@@ -43,8 +43,6 @@ namespace detail_threading {
using
namespace
std
;
#ifndef MRUTIL_NO_THREADING
thread_local
size_t
thread_id
=
0
;
thread_local
size_t
num_threads_
=
1
;
static
const
size_t
max_threads_
=
max
(
1u
,
thread
::
hardware_concurrency
());
class
latch
...
...
@@ -174,7 +172,7 @@ class thread_pool
}
};
thread_pool
&
get_pool
()
inline
thread_pool
&
get_pool
()
{
static
thread_pool
pool
;
#if __has_include(<pthread.h>)
...
...
@@ -192,41 +190,15 @@ thread_pool & get_pool()
return
pool
;
}
/** Map a function f over nthreads */
template
<
typename
Func
>
void
thread_map
(
size_t
nthreads
,
Func
f
)
struct
Range
{
if
(
nthreads
==
0
)
nthreads
=
max_threads_
;
if
(
nthreads
==
1
)
{
f
();
return
;
}
auto
&
pool
=
get_pool
();
latch
counter
(
nthreads
);
exception_ptr
ex
;
mutex
ex_mut
;
for
(
size_t
i
=
0
;
i
<
nthreads
;
++
i
)
{
pool
.
submit
(
[
&
f
,
&
counter
,
&
ex
,
&
ex_mut
,
i
,
nthreads
]
{
thread_id
=
i
;
num_threads_
=
nthreads
;
try
{
f
();
}
catch
(...)
{
lock_guard
<
mutex
>
lock
(
ex_mut
);
ex
=
current_exception
();
}
counter
.
count_down
();
});
}
counter
.
wait
();
if
(
ex
)
rethrow_exception
(
ex
);
}
size_t
lo
,
hi
;
Range
()
:
lo
(
0
),
hi
(
0
)
{}
Range
(
size_t
lo_
,
size_t
hi_
)
:
lo
(
lo_
),
hi
(
hi_
)
{}
operator
bool
()
const
{
return
hi
>
lo
;
}
};
class
Scheduler
class
Distribution
{
private:
size_t
nthreads_
;
...
...
@@ -239,24 +211,19 @@ class Scheduler
typedef
enum
{
SINGLE
,
STATIC
,
DYNAMIC
}
SchedMode
;
SchedMode
mode
;
bool
single_done
;
struct
Range
{
size_t
lo
,
hi
;
Range
()
:
lo
(
0
),
hi
(
0
)
{}
Range
(
size_t
lo_
,
size_t
hi_
)
:
lo
(
lo_
),
hi
(
hi_
)
{}
operator
bool
()
const
{
return
hi
>
lo
;
}
};
template
<
typename
Func
>
void
thread_map
(
Func
f
);
public:
size_t
nthreads
()
const
{
return
nthreads_
;
}
mutex
&
mut
()
{
return
mut_
;
}
template
<
typename
Func
>
void
execSingle
(
size_t
nwork
,
Func
f
)
{
mode
=
SINGLE
;
single_done
=
false
;
nwork_
=
nwork
;
f
(
*
this
);
nthreads_
=
1
;
thread_map
(
move
(
f
));
}
template
<
typename
Func
>
void
execStatic
(
size_t
nwork
,
size_t
nthreads
,
size_t
chunksize
,
Func
f
)
...
...
@@ -270,7 +237,7 @@ class Scheduler
nextstart
.
resize
(
nthreads_
);
for
(
size_t
i
=
0
;
i
<
nextstart
.
size
();
++
i
)
nextstart
[
i
]
=
i
*
chunksize_
;
thread_map
(
nthreads_
,
[
&
]()
{
f
(
*
this
);}
);
thread_map
(
move
(
f
)
);
}
template
<
typename
Func
>
void
execDynamic
(
size_t
nwork
,
size_t
nthreads
,
size_t
chunksize_min
,
double
fact_max
,
Func
f
)
...
...
@@ -283,9 +250,17 @@ class Scheduler
return
execStatic
(
nwork
,
nthreads
,
0
,
move
(
f
));
fact_max_
=
fact_max
;
cur_
=
0
;
thread_map
(
nthreads_
,
[
&
]()
{
f
(
*
this
);}
);
thread_map
(
move
(
f
)
);
}
Range
getNext
()
template
<
typename
Func
>
void
execParallel
(
size_t
nthreads
,
Func
f
)
{
mode
=
STATIC
;
nthreads_
=
(
nthreads
==
0
)
?
max_threads_
:
nthreads
;
nwork_
=
nthreads_
;
chunksize_
=
1
;
thread_map
(
move
(
f
));
}
Range
getNext
(
size_t
thread_id
)
{
switch
(
mode
)
{
...
...
@@ -320,38 +295,64 @@ class Scheduler
}
};
template
<
typename
Func
>
void
execParallel
(
size_t
nthreads
,
Func
f
)
class
Scheduler
{
nthreads
=
(
nthreads
==
0
)
?
max_threads_
:
nthreads
;
thread_map
(
nthreads
,
move
(
f
));
private:
Distribution
&
dist_
;
size_t
ithread_
;
public:
Scheduler
(
Distribution
&
dist
,
size_t
ithread
)
:
dist_
(
dist
),
ithread_
(
ithread
)
{}
size_t
num_threads
()
const
{
return
dist_
.
nthreads
();
}
size_t
thread_num
()
const
{
return
ithread_
;
}
Range
getNext
()
{
return
dist_
.
getNext
(
ithread_
);
}
};
template
<
typename
Func
>
void
Distribution
::
thread_map
(
Func
f
)
{
auto
&
pool
=
get_pool
();
latch
counter
(
nthreads_
);
exception_ptr
ex
;
mutex
ex_mut
;
for
(
size_t
i
=
0
;
i
<
nthreads_
;
++
i
)
{
pool
.
submit
(
[
this
,
&
f
,
i
,
&
counter
,
&
ex
,
&
ex_mut
]
{
try
{
Scheduler
sched
(
*
this
,
i
);
f
(
sched
);
}
catch
(...)
{
lock_guard
<
mutex
>
lock
(
ex_mut
);
ex
=
current_exception
();
}
counter
.
count_down
();
});
}
counter
.
wait
();
if
(
ex
)
rethrow_exception
(
ex
);
}
#else
constexpr
size_t
thread_id
=
0
;
constexpr
size_t
num_threads_
=
1
;
constexpr
size_t
max_threads_
=
1
;
class
Sched
uler
class
Sched
0
{
private:
size_t
nwork_
;
struct
Range
{
size_t
lo
,
hi
;
Range
()
:
lo
(
0
),
hi
(
0
)
{}
Range
(
size_t
lo_
,
size_t
hi_
)
:
lo
(
lo_
),
hi
(
hi_
)
{}
operator
bool
()
const
{
return
hi
>
lo
;
}
};
public:
size_t
nthreads
()
const
{
return
1
;
}
// mutex &mut() { return mut_; }
template
<
typename
Func
>
void
execSingle
(
size_t
nwork
,
Func
f
)
{
nwork_
=
nwork
;
f
(
*
this
);
f
(
Scheduler
(
*
this
,
0
)
);
}
template
<
typename
Func
>
void
execStatic
(
size_t
nwork
,
size_t
/*nthreads*/
,
size_t
/*chunksize*/
,
Func
f
)
...
...
@@ -362,8 +363,9 @@ class Scheduler
{
execSingle
(
nwork
,
move
(
f
));
}
Range
getNext
()
{
return
Range
(
0
,
nwork_
);
Range
res
(
0
,
nwork_
);
nwork_
=
0
;
return
res
;
}
};
...
...
@@ -372,36 +374,43 @@ template<typename Func> void execParallel(size_t /*nthreads*/, Func f)
#endif
namespace
{
template
<
typename
Func
>
void
execSingle
(
size_t
nwork
,
Func
f
)
{
Scheduler
sched
;
sched
.
execSingle
(
nwork
,
move
(
f
));
Distribution
dist
;
dist
.
execSingle
(
nwork
,
move
(
f
));
}
template
<
typename
Func
>
void
execStatic
(
size_t
nwork
,
size_t
nthreads
,
size_t
chunksize
,
Func
f
)
{
Scheduler
sched
;
sched
.
execStatic
(
nwork
,
nthreads
,
chunksize
,
move
(
f
));
Distribution
dist
;
dist
.
execStatic
(
nwork
,
nthreads
,
chunksize
,
move
(
f
));
}
template
<
typename
Func
>
void
execDynamic
(
size_t
nwork
,
size_t
nthreads
,
size_t
chunksize_min
,
Func
f
)
{
Scheduler
sched
;
sched
.
execDynamic
(
nwork
,
nthreads
,
chunksize_min
,
0.
,
move
(
f
));
Distribution
dist
;
dist
.
execDynamic
(
nwork
,
nthreads
,
chunksize_min
,
0.
,
move
(
f
));
}
template
<
typename
Func
>
void
execGuided
(
size_t
nwork
,
size_t
nthreads
,
size_t
chunksize_min
,
double
fact_max
,
Func
f
)
{
Scheduler
sched
;
sched
.
execDynamic
(
nwork
,
nthreads
,
chunksize_min
,
fact_max
,
move
(
f
));
Distribution
dist
;
dist
.
execDynamic
(
nwork
,
nthreads
,
chunksize_min
,
fact_max
,
move
(
f
));
}
size_t
num_threads
()
{
return
num_threads_
;
}
size_t
thread_num
()
{
return
thread_id
;
}
size_t
max_threads
()
template
<
typename
Func
>
static
void
execParallel
(
size_t
nthreads
,
Func
f
)
{
Distribution
dist
;
dist
.
execParallel
(
nthreads
,
move
(
f
));
}
inline
size_t
max_threads
()
{
return
max_threads_
;
}
}
}
// end of namespace detail
using
detail_threading
::
Scheduler
;
...
...
@@ -409,8 +418,6 @@ using detail_threading::execSingle;
using
detail_threading
::
execStatic
;
using
detail_threading
::
execDynamic
;
using
detail_threading
::
execGuided
;
using
detail_threading
::
num_threads
;
using
detail_threading
::
thread_num
;
using
detail_threading
::
max_threads
;
using
detail_threading
::
execParallel
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment