Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
719457df
Commit
719457df
authored
Mar 25, 2017
by
Andreas Marek
Browse files
Rename SSE assembly kernels
parent
f132e10f
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/compute_hh_trafo.X90
View file @
719457df
...
...
@@ -488,12 +488,14 @@
&MATH_DATATYPE&
&_&
&PRECISION&
&_sse_assembly&
& (c_loc(a(1,j+off+a_off-1,istripe,my_thread)), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_&
&MATH_DATATYPE&
&_&
&PRECISION&
&_sse_assembly&
& (c_loc(a(1,j+off+a_off-1,istripe)), w, nbw, nl, stripe_width, nbw)
#endif
enddo
...
...
@@ -517,12 +519,14 @@
&MATH_DATATYPE&
&_&
&PRECISION&
&_sse_assembly&
& (a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_&
&MATH_DATATYPE&
&_&
&PRECISION&
&_sse_assembly&
& (a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#endif
enddo
...
...
src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
View file @
719457df
...
...
@@ -54,8 +54,8 @@
#
distributed
along
with
the
original
code
in
the
file
"COPYING"
.
#
#
--------------------------------------------------------------------------------------------------
.
globl
double_hh_trafo_real_double
.
globl
single_hh_trafo_complex_double
.
globl
double_hh_trafo_real_double
_sse_assembly
.
globl
single_hh_trafo_complex_double
_sse_assembly
.
text
#-------------------------------------------------------------------------------
...
...
@@ -367,7 +367,7 @@
#-------------------------------------------------------------------------------
#
FORTRAN
Interface
:
#
#
subroutine
double_hh_trafo_real_double
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
#
subroutine
double_hh_trafo_real_double
_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
#
#
integer
,
intent
(
in
)
::
nb
,
nq
,
ldq
,
ldh
#
real
*8,
intent
(
inout
)
::
q
(
ldq
,*)
...
...
@@ -385,7 +385,8 @@
#!
f
>#
ifdef
WITH_REAL_SSE_ASSEMBLY_KERNEL
#!
f
>
interface
#!
f
>
subroutine
double_hh_trafo_real_double
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
bind
(
C
,
name
=
"double_hh_trafo_real_double"
)
#!
f
>
subroutine
double_hh_trafo_real_double_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
&
#!
f
>
bind
(
C
,
name
="
double_hh_trafo_real_double_sse_assembly
")
#!
f
>
use
,
intrinsic
::
iso_c_binding
#!
f
>
integer
(
kind
=
c_int
)
::
nb
,
nq
,
ldq
,
ldh
#!
f
>
type
(
c_ptr
),
value
::
q
...
...
@@ -394,7 +395,7 @@
#!
f
>
end
interface
#!
f
>#
endif
.
align
16
,
0x90
double_hh_trafo_real_double
:
double_hh_trafo_real_double
_sse_assembly
:
#
Get
integer
parameters
into
corresponding
registers
...
...
@@ -695,7 +696,7 @@ return1:
#-------------------------------------------------------------------------------
#
FORTRAN
Interface
:
#
#
subroutine
single_hh_trafo_complex_double
(
q
,
hh
,
nb
,
nq
,
ldq
)
#
subroutine
single_hh_trafo_complex_double
_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
)
#
#
integer
,
intent
(
in
)
::
nb
,
nq
,
ldq
#
complex
*16,
intent
(
inout
)
::
q
(
ldq
,*)
...
...
@@ -711,7 +712,8 @@ return1:
#-------------------------------------------------------------------------------
#!
f
>#
ifdef
WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#!
f
>
interface
#!
f
>
subroutine
single_hh_trafo_complex_double
(
q
,
hh
,
nb
,
nq
,
ldq
)
bind
(
C
,
name
=
"single_hh_trafo_complex_double"
)
#!
f
>
subroutine
single_hh_trafo_complex_double_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
)
&
#!
f
>
bind
(
C
,
name
="
single_hh_trafo_complex_double_sse_assembly
")
#!
f
>
use
,
intrinsic
::
iso_c_binding
#!
f
>
integer
(
kind
=
c_int
)
::
nb
,
nq
,
ldq
#!
f
>
complex
(
kind
=
c_double_complex
)
::
q
(*)
...
...
@@ -720,7 +722,7 @@ return1:
#!
f
>
end
interface
#!
f
>#
endif
.
align
16
,
0x90
single_hh_trafo_complex_double
:
single_hh_trafo_complex_double
_sse_assembly
:
#
Get
integer
parameters
into
corresponding
registers
...
...
src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
View file @
719457df
...
...
@@ -41,8 +41,8 @@
#
#
Author
:
Andreas
Marek
,
MPCDF
.
globl
double_hh_trafo_real_single
.
globl
single_hh_trafo_complex_single
.
globl
double_hh_trafo_real_single
_sse_assembly
.
globl
single_hh_trafo_complex_single
_sse_assembly
.
text
#-------------------------------------------------------------------------------
...
...
@@ -376,7 +376,7 @@
#-------------------------------------------------------------------------------
#
FORTRAN
Interface
:
#
#
subroutine
double_hh_trafo_real_single
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
#
subroutine
double_hh_trafo_real_single
_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
#
#
integer
,
intent
(
in
)
::
nb
,
nq
,
ldq
,
ldh
#
real
*8,
intent
(
inout
)
::
q
(
ldq
,*)
...
...
@@ -394,7 +394,8 @@
#!
f
>#
ifdef
WITH_REAL_SSE_ASSEMBLY_KERNEL
#!
f
>#
ifdef
WANT_SINGLE_PRECISION_REAL
#!
f
>
interface
#!
f
>
subroutine
double_hh_trafo_real_single
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
bind
(
C
,
name
=
"double_hh_trafo_real_single"
)
#!
f
>
subroutine
double_hh_trafo_real_single_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
&
#!
f
>
bind
(
C
,
name
="
double_hh_trafo_real_single_sse_assembly
")
#!
f
>
use
,
intrinsic
::
iso_c_binding
#!
f
>
integer
(
kind
=
c_int
)
::
nb
,
nq
,
ldq
,
ldh
#!
f
>
type
(
c_ptr
),
value
::
q
...
...
@@ -404,7 +405,7 @@
#!
f
>#
endif
#!
f
>#
endif
.
align
16
,
0x90
double_hh_trafo_real_single
:
double_hh_trafo_real_single
_sse_assembly
:
#
Get
integer
parameters
into
corresponding
registers
...
...
@@ -714,7 +715,7 @@ return1:
#-------------------------------------------------------------------------------
#
FORTRAN
Interface
:
#
#
subroutine
single_hh_trafo_complex_single
(
q
,
hh
,
nb
,
nq
,
ldq
)
#
subroutine
single_hh_trafo_complex_single
_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
)
#
#
integer
,
intent
(
in
)
::
nb
,
nq
,
ldq
#
complex
(
kind
=
c_float_complex
),
intent
(
inout
)
::
q
(
ldq
,*)
...
...
@@ -731,7 +732,8 @@ return1:
#!
f
>#
ifdef
WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#!
f
>#
ifdef
WANT_SINGLE_PRECISION_COMPLEX
#!
f
>
interface
#!
f
>
subroutine
single_hh_trafo_complex_single
(
q
,
hh
,
nb
,
nq
,
ldq
)
bind
(
C
,
name
=
"single_hh_trafo_complex_single"
)
#!
f
>
subroutine
single_hh_trafo_complex_single_sse_assembly
(
q
,
hh
,
nb
,
nq
,
ldq
)
&
#!
f
>
bind
(
C
,
name
="
single_hh_trafo_complex_single_sse_assembly
")
#!
f
>
use
,
intrinsic
::
iso_c_binding
#!
f
>
integer
(
kind
=
c_int
)
::
nb
,
nq
,
ldq
#!
f
>
complex
(
kind
=
c_float_complex
)
::
q
(*)
...
...
@@ -742,7 +744,7 @@ return1:
#!
f
>#
endif
.
align
16
,
0x90
single_hh_trafo_complex_single
:
single_hh_trafo_complex_single
_sse_assembly
:
#
Get
integer
parameters
into
corresponding
registers
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment