Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Reinecke
pypocketfft
Commits
98bcb4c3
Commit
98bcb4c3
authored
Aug 07, 2019
by
Martin Reinecke
Browse files
Merge branch 'thread_pool' into 'master'
Custom thread pool See merge request
!23
parents
a9a6e6ab
891b5454
Changes
2
Hide whitespace changes
Inline
Side-by-side
pocketfft_hdronly.h
View file @
98bcb4c3
...
...
@@ -62,8 +62,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include
<array>
#include
<mutex>
#endif
#ifdef POCKETFFT_OPENMP
#include
<omp.h>
#include
<mutex>
#include
<condition_variable>
#include
<thread>
#include
<queue>
#include
<atomic>
#include
<functional>
#ifdef POCKETFFT_PTHREADS
# include <pthread.h>
#endif
...
...
@@ -557,22 +565,197 @@ struct util // hack to avoid duplicate symbols
if
(
axis
>=
shape
.
size
())
throw
invalid_argument
(
"bad axis number"
);
}
#ifdef POCKETFFT_OPENMP
static
size_t
nthreads
()
{
return
size_t
(
omp_get_num_threads
());
}
static
size_t
thread_num
()
{
return
size_t
(
omp_get_thread_num
());
}
static
size_t
thread_count
(
size_t
nthreads
,
const
shape_t
&
shape
,
size_t
axis
)
size_t
axis
,
size_t
vlen
)
{
if
(
nthreads
==
1
)
return
1
;
if
(
prod
(
shape
)
<
20
*
shape
[
axis
])
return
1
;
return
(
nthreads
==
0
)
?
size_t
(
omp_get_max_threads
())
:
nthreads
;
size_t
size
=
prod
(
shape
);
size_t
parallel
=
size
/
(
shape
[
axis
]
*
vlen
);
if
(
shape
[
axis
]
<
1000
)
parallel
/=
4
;
size_t
max_threads
=
nthreads
==
0
?
thread
::
hardware_concurrency
()
:
nthreads
;
return
max
(
size_t
(
1
),
min
(
parallel
,
max_threads
));
}
#else
static
constexpr
size_t
nthreads
()
{
return
1
;
}
static
constexpr
size_t
thread_num
()
{
return
0
;
}
#endif
};
namespace
threading
{
thread_local
size_t
thread_id
=
0
;
thread_local
size_t
num_threads
=
1
;
class
latch
{
atomic
<
size_t
>
num_left_
;
mutex
mut_
;
condition_variable
completed_
;
using
lock_t
=
unique_lock
<
mutex
>
;
public:
latch
(
size_t
n
)
:
num_left_
(
n
)
{}
void
count_down
()
{
{
lock_t
lock
(
mut_
);
if
(
--
num_left_
)
return
;
}
completed_
.
notify_all
();
}
void
wait
()
{
lock_t
lock
(
mut_
);
completed_
.
wait
(
lock
,
[
this
]{
return
is_ready
();
});
}
bool
is_ready
()
{
return
num_left_
==
0
;
}
};
template
<
typename
T
>
class
concurrent_queue
{
queue
<
T
>
q_
;
mutex
mut_
;
condition_variable
item_added_
;
bool
shutdown_
;
using
lock_t
=
unique_lock
<
mutex
>
;
public:
concurrent_queue
()
:
shutdown_
(
false
)
{}
void
push
(
T
val
)
{
{
lock_t
lock
(
mut_
);
if
(
shutdown_
)
throw
runtime_error
(
"Item added to queue after shutdown"
);
q_
.
push
(
move
(
val
));
}
item_added_
.
notify_one
();
}
bool
pop
(
T
&
val
)
{
lock_t
lock
(
mut_
);
item_added_
.
wait
(
lock
,
[
this
]
{
return
(
!
q_
.
empty
()
||
shutdown_
);
});
if
(
q_
.
empty
())
return
false
;
// We are shutting down
val
=
std
::
move
(
q_
.
front
());
q_
.
pop
();
return
true
;
}
void
shutdown
()
{
{
lock_t
lock
(
mut_
);
shutdown_
=
true
;
}
item_added_
.
notify_all
();
}
void
restart
()
{
shutdown_
=
false
;
}
};
class
thread_pool
{
concurrent_queue
<
function
<
void
()
>>
work_queue_
;
vector
<
thread
>
threads_
;
void
worker_main
()
{
function
<
void
()
>
work
;
while
(
work_queue_
.
pop
(
work
))
work
();
}
void
create_threads
()
{
size_t
nthreads
=
threads_
.
size
();
for
(
size_t
i
=
0
;
i
<
nthreads
;
++
i
)
{
try
{
threads_
[
i
]
=
thread
([
this
]{
worker_main
();
});
}
catch
(...)
{
shutdown
();
throw
;
}
}
}
public:
explicit
thread_pool
(
size_t
nthreads
)
:
threads_
(
nthreads
)
{
create_threads
();
}
thread_pool
()
:
thread_pool
(
thread
::
hardware_concurrency
())
{}
~
thread_pool
()
{
shutdown
();
}
void
submit
(
function
<
void
()
>
work
)
{
work_queue_
.
push
(
move
(
work
));
}
void
shutdown
()
{
work_queue_
.
shutdown
();
for
(
auto
&
thread
:
threads_
)
if
(
thread
.
joinable
())
thread
.
join
();
}
void
restart
()
{
work_queue_
.
restart
();
create_threads
();
}
};
thread_pool
&
get_pool
()
{
static
thread_pool
pool
;
#ifdef POCKETFFT_PTHREADS
static
once_flag
f
;
call_once
(
f
,
[]{
pthread_atfork
(
+
[]{
get_pool
().
shutdown
();
},
// prepare
+
[]{
get_pool
().
restart
();
},
// parent
+
[]{
get_pool
().
restart
();
}
// child
);
});
#endif
return
pool
;
}
/** Map a function f over nthreads */
template
<
typename
Func
>
void
thread_map
(
size_t
nthreads
,
Func
f
)
{
if
(
nthreads
==
0
)
nthreads
=
thread
::
hardware_concurrency
();
if
(
nthreads
==
1
)
{
f
();
return
;
}
auto
&
pool
=
get_pool
();
latch
counter
(
nthreads
);
for
(
size_t
i
=
0
;
i
<
nthreads
;
++
i
)
{
pool
.
submit
(
[
&
f
,
&
counter
,
i
,
nthreads
]
{
thread_id
=
i
;
num_threads
=
nthreads
;
f
();
counter
.
count_down
();
});
}
counter
.
wait
();
}
}
//
// complex FFTPACK transforms
//
...
...
@@ -2661,10 +2844,10 @@ template<size_t N> class multi_iter
str_i
(
iarr
.
stride
(
idim_
)),
p_oi
(
0
),
str_o
(
oarr
.
stride
(
idim_
)),
idim
(
idim_
),
rem
(
iarr
.
size
()
/
iarr
.
shape
(
idim
))
{
auto
nshares
=
util
::
n
threads
()
;
auto
nshares
=
threading
::
num_
threads
;
if
(
nshares
==
1
)
return
;
if
(
nshares
==
0
)
throw
runtime_error
(
"can't run with zero threads"
);
auto
myshare
=
util
::
thread_
num
()
;
auto
myshare
=
threading
::
thread_
id
;
if
(
myshare
>=
nshares
)
throw
runtime_error
(
"impossible share requested"
);
size_t
nbase
=
rem
/
nshares
;
size_t
additional
=
rem
%
nshares
;
...
...
@@ -2838,12 +3021,6 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
return
arr
<
char
>
(
tmpsize
*
elemsize
);
}
#ifdef POCKETFFT_OPENMP
#define POCKETFFT_NTHREADS nthreads
#else
#define POCKETFFT_NTHREADS
#endif
template
<
typename
T
,
size_t
vlen
>
void
copy_input
(
const
multi_iter
<
vlen
>
&
it
,
const
cndarr
<
cmplx
<
T
>>
&
src
,
cmplx
<
vtype_t
<
T
>>
*
POCKETFFT_RESTRICT
dst
)
{
...
...
@@ -2902,7 +3079,7 @@ template <typename T> using add_vec_t = typename add_vec<T>::type;
template
<
typename
Tplan
,
typename
T
,
typename
T0
,
typename
Exec
>
POCKETFFT_NOINLINE
void
general_nd
(
const
cndarr
<
T
>
&
in
,
ndarr
<
T
>
&
out
,
const
shape_t
&
axes
,
T0
fct
,
size_t
POCKETFFT_NTHREADS
,
const
Exec
&
exec
,
const
shape_t
&
axes
,
T0
fct
,
size_t
nthreads
,
const
Exec
&
exec
,
const
bool
allow_inplace
=
true
)
{
shared_ptr
<
Tplan
>
plan
;
...
...
@@ -2914,30 +3091,28 @@ POCKETFFT_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out,
if
((
!
plan
)
||
(
len
!=
plan
->
length
()))
plan
=
get_plan
<
Tplan
>
(
len
);
#ifdef POCKETFFT_OPENMP
#pragma omp parallel num_threads(util::thread_count(nthreads, in.shape(), axes[iax]))
#endif
{
auto
storage
=
alloc_tmp
<
T0
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
const
auto
&
tin
(
iax
==
0
?
in
:
out
);
multi_iter
<
vlen
>
it
(
tin
,
out
,
axes
[
iax
]);
threading
::
thread_map
(
util
::
thread_count
(
nthreads
,
in
.
shape
(),
axes
[
iax
],
vlen
),
[
&
]
{
auto
storage
=
alloc_tmp
<
T0
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
const
auto
&
tin
(
iax
==
0
?
in
:
out
);
multi_iter
<
vlen
>
it
(
tin
,
out
,
axes
[
iax
]);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
{
it
.
advance
(
vlen
);
auto
tdatav
=
reinterpret_cast
<
add_vec_t
<
T
>
*>
(
storage
.
data
());
exec
(
it
,
tin
,
out
,
tdatav
,
*
plan
,
fct
);
}
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
{
it
.
advance
(
vlen
);
auto
tdatav
=
reinterpret_cast
<
add_vec_t
<
T
>
*>
(
storage
.
data
());
exec
(
it
,
tin
,
out
,
tdatav
,
*
plan
,
fct
);
}
#endif
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
buf
=
allow_inplace
&&
it
.
stride_out
()
==
sizeof
(
T
)
?
&
out
[
it
.
oofs
(
0
)]
:
reinterpret_cast
<
T
*>
(
storage
.
data
());
exec
(
it
,
tin
,
out
,
buf
,
*
plan
,
fct
);
}
}
// end of parallel region
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
buf
=
allow_inplace
&&
it
.
stride_out
()
==
sizeof
(
T
)
?
&
out
[
it
.
oofs
(
0
)]
:
reinterpret_cast
<
T
*>
(
storage
.
data
());
exec
(
it
,
tin
,
out
,
buf
,
*
plan
,
fct
);
}
});
// end of parallel region
fct
=
T0
(
1
);
// factor has been applied, use 1 for remaining axes
}
}
...
...
@@ -3017,119 +3192,115 @@ struct ExecDcst
template
<
typename
T
>
POCKETFFT_NOINLINE
void
general_r2c
(
const
cndarr
<
T
>
&
in
,
ndarr
<
cmplx
<
T
>>
&
out
,
size_t
axis
,
bool
forward
,
T
fct
,
size_t
POCKETFFT_NTHREADS
)
size_t
nthreads
)
{
auto
plan
=
get_plan
<
pocketfft_r
<
T
>>
(
in
.
shape
(
axis
));
constexpr
auto
vlen
=
VLEN
<
T
>::
val
;
size_t
len
=
in
.
shape
(
axis
);
#ifdef POCKETFFT_OPENMP
#pragma omp parallel num_threads(util::thread_count(nthreads, in.shape(), axis))
#endif
{
auto
storage
=
alloc_tmp
<
T
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
threading
::
thread_map
(
util
::
thread_count
(
nthreads
,
in
.
shape
(),
axis
,
vlen
),
[
&
]
{
auto
storage
=
alloc_tmp
<
T
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
{
it
.
advance
(
vlen
);
auto
tdatav
=
reinterpret_cast
<
vtype_t
<
T
>
*>
(
storage
.
data
());
copy_input
(
it
,
in
,
tdatav
);
plan
->
exec
(
tdatav
,
fct
,
true
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
0
)].
Set
(
tdatav
[
0
][
j
]);
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
{
it
.
advance
(
vlen
);
auto
tdatav
=
reinterpret_cast
<
vtype_t
<
T
>
*>
(
storage
.
data
());
copy_input
(
it
,
in
,
tdatav
);
plan
->
exec
(
tdatav
,
fct
,
true
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
0
)].
Set
(
tdatav
[
0
][
j
]);
size_t
i
=
1
,
ii
=
1
;
if
(
forward
)
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
ii
)].
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
else
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
ii
)].
Set
(
tdatav
[
i
][
j
],
-
tdatav
[
i
+
1
][
j
]);
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
ii
)].
Set
(
tdatav
[
i
][
j
]);
}
#endif
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
copy_input
(
it
,
in
,
tdata
);
plan
->
exec
(
tdata
,
fct
,
true
);
out
[
it
.
oofs
(
0
)].
Set
(
tdata
[
0
]);
size_t
i
=
1
,
ii
=
1
;
if
(
forward
)
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
ii
)].
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
out
[
it
.
oofs
(
ii
)].
Set
(
tdata
[
i
],
tdata
[
i
+
1
]);
else
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
ii
)].
Set
(
tdatav
[
i
][
j
],
-
tdatav
[
i
+
1
][
j
]);
out
[
it
.
oofs
(
ii
)].
Set
(
tdata
[
i
],
-
tdata
[
i
+
1
]);
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
out
[
it
.
oofs
(
j
,
ii
)].
Set
(
tdatav
[
i
][
j
]);
out
[
it
.
oofs
(
ii
)].
Set
(
tdata
[
i
]);
}
#endif
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
copy_input
(
it
,
in
,
tdata
);
plan
->
exec
(
tdata
,
fct
,
true
);
out
[
it
.
oofs
(
0
)].
Set
(
tdata
[
0
]);
size_t
i
=
1
,
ii
=
1
;
if
(
forward
)
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
out
[
it
.
oofs
(
ii
)].
Set
(
tdata
[
i
],
tdata
[
i
+
1
]);
else
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
out
[
it
.
oofs
(
ii
)].
Set
(
tdata
[
i
],
-
tdata
[
i
+
1
]);
if
(
i
<
len
)
out
[
it
.
oofs
(
ii
)].
Set
(
tdata
[
i
]);
}
}
// end of parallel region
});
// end of parallel region
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
general_c2r
(
const
cndarr
<
cmplx
<
T
>>
&
in
,
ndarr
<
T
>
&
out
,
size_t
axis
,
bool
forward
,
T
fct
,
size_t
POCKETFFT_NTHREADS
)
size_t
nthreads
)
{
auto
plan
=
get_plan
<
pocketfft_r
<
T
>>
(
out
.
shape
(
axis
));
constexpr
auto
vlen
=
VLEN
<
T
>::
val
;
size_t
len
=
out
.
shape
(
axis
);
#ifdef POCKETFFT_OPENMP
#pragma omp parallel num_threads(util::thread_count(nthreads, in.shape(), axis))
#endif
{
auto
storage
=
alloc_tmp
<
T
>
(
out
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
threading
::
thread_map
(
util
::
thread_count
(
nthreads
,
in
.
shape
(),
axis
,
vlen
),
[
&
]
{
auto
storage
=
alloc_tmp
<
T
>
(
out
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
{
it
.
advance
(
vlen
);
auto
tdatav
=
reinterpret_cast
<
vtype_t
<
T
>
*>
(
storage
.
data
());
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
0
][
j
]
=
in
[
it
.
iofs
(
j
,
0
)].
r
;
{
size_t
i
=
1
,
ii
=
1
;
if
(
forward
)
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
in
[
it
.
iofs
(
j
,
ii
)].
SplitConj
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
else
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
{
it
.
advance
(
vlen
);
auto
tdatav
=
reinterpret_cast
<
vtype_t
<
T
>
*>
(
storage
.
data
());
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
in
[
it
.
iofs
(
j
,
ii
)].
Split
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
in
[
it
.
iofs
(
j
,
ii
)].
r
;
}
plan
->
exec
(
tdatav
,
fct
,
false
);
copy_output
(
it
,
tdatav
,
out
);
}
tdatav
[
0
][
j
]
=
in
[
it
.
iofs
(
j
,
0
)].
r
;
{
size_t
i
=
1
,
ii
=
1
;
if
(
forward
)
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
in
[
it
.
iofs
(
j
,
ii
)].
SplitConj
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
else
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
in
[
it
.
iofs
(
j
,
ii
)].
Split
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
in
[
it
.
iofs
(
j
,
ii
)].
r
;
}
plan
->
exec
(
tdatav
,
fct
,
false
);
copy_output
(
it
,
tdatav
,
out
);
}
#endif
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
tdata
[
0
]
=
in
[
it
.
iofs
(
0
)].
r
;
{
size_t
i
=
1
,
ii
=
1
;
if
(
forward
)
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
in
[
it
.
iofs
(
ii
)].
SplitConj
(
tdata
[
i
],
tdata
[
i
+
1
]);
else
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
in
[
it
.
iofs
(
ii
)].
Split
(
tdata
[
i
],
tdata
[
i
+
1
]);
if
(
i
<
len
)
tdata
[
i
]
=
in
[
it
.
iofs
(
ii
)].
r
;
}
plan
->
exec
(
tdata
,
fct
,
false
);
copy_output
(
it
,
tdata
,
out
);
}
}
// end of parallel region
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
tdata
[
0
]
=
in
[
it
.
iofs
(
0
)].
r
;
{
size_t
i
=
1
,
ii
=
1
;
if
(
forward
)
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
in
[
it
.
iofs
(
ii
)].
SplitConj
(
tdata
[
i
],
tdata
[
i
+
1
]);
else
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
in
[
it
.
iofs
(
ii
)].
Split
(
tdata
[
i
],
tdata
[
i
+
1
]);
if
(
i
<
len
)
tdata
[
i
]
=
in
[
it
.
iofs
(
ii
)].
r
;
}
plan
->
exec
(
tdata
,
fct
,
false
);
copy_output
(
it
,
tdata
,
out
);
}
});
// end of parallel region
}
struct
ExecR2R
...
...
@@ -3152,8 +3323,6 @@ struct ExecR2R
}
};
#undef POCKETFFT_NTHREADS
template
<
typename
T
>
void
c2c
(
const
shape_t
&
shape
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
const
shape_t
&
axes
,
bool
forward
,
const
complex
<
T
>
*
data_in
,
complex
<
T
>
*
data_out
,
T
fct
,
...
...
setup.py
View file @
98bcb4c3
...
...
@@ -15,6 +15,7 @@ include_dirs = ['./', _deferred_pybind11_include(True),
_deferred_pybind11_include
()]
extra_compile_args
=
[
'--std=c++11'
,
'-march=native'
,
'-O3'
]
python_module_link_args
=
[]
define_macros
=
[(
'POCKETFFT_PTHREADS'
,
None
)]
if
sys
.
platform
==
'darwin'
:
import
distutils.sysconfig
...
...
@@ -23,8 +24,8 @@ if sys.platform == 'darwin':
vars
[
'LDSHARED'
]
=
vars
[
'LDSHARED'
].
replace
(
'-bundle'
,
''
)
python_module_link_args
+=
[
'-bundle'
]
else
:
extra_compile_args
+=
[
'-DPOCKETFFT_OPENMP'
,
'-fopenmp'
,
'-Wfatal-errors'
,
'-Wfloat-conversion'
,
'-Wsign-conversion'
,
'-Wconversion'
,
'-W'
,
'-Wall'
,
'-Wstrict-aliasing=2'
,
'-Wwrite-strings'
,
'-Wredundant-decls'
,
'-Woverloaded-virtual'
,
'-Wcast-qual'
,
'-Wcast-align'
,
'-Wpointer-arith'
]
python_module_link_args
+=
[
'-march=native'
,
'-Wl,-rpath,$ORIGIN'
,
'-fopenmp'
]
extra_compile_args
+=
[
'-Wfatal-errors'
,
'-Wfloat-conversion'
,
'-Wsign-conversion'
,
'-Wconversion'
,
'-W'
,
'-Wall'
,
'-Wstrict-aliasing=2'
,
'-Wwrite-strings'
,
'-Wredundant-decls'
,
'-Woverloaded-virtual'
,
'-Wcast-qual'
,
'-Wcast-align'
,
'-Wpointer-arith'
]
python_module_link_args
+=
[
'-march=native'
,
'-Wl,-rpath,$ORIGIN'
]
# if you don't want debugging info, add "-s" to python_module_link_args
...
...
@@ -35,6 +36,7 @@ def get_extension_modules():
sources
=
[
'pypocketfft.cc'
],
depends
=
[
'pocketfft_hdronly.h'
],
include_dirs
=
include_dirs
,
define_macros
=
define_macros
,
extra_compile_args
=
extra_compile_args
,
extra_link_args
=
python_module_link_args
)]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment