Commit 4907df75 authored by Lorenz Huedepohl's avatar Lorenz Huedepohl
Browse files

Check for AVX support for peak performance example

parent 144ff848
......@@ -36,6 +36,25 @@ if test "$want_perf" = "yes" ; then
fi
AM_CONDITIONAL([HAVE_PERF],[test "$want_perf" = "yes"])
AC_MSG_CHECKING([for AVX support (important for example programs only)])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
int main(int argc, char **argv) {
__asm__ __volatile__(
"vmulpd %%ymm1, %%ymm2, %%ymm3;"
"vaddpd %%ymm4, %%ymm5, %%ymm6;"
: /* No outputs */
: /* No inputs */
: "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6"
);
}])],
[can_compile_avx=yes],
[can_compile_avx=no]
)
if test "$can_compile_avx" = "yes" ; then
AC_DEFINE([HAVE_AVX], [1], [AVX code can be compiled])
fi
AC_MSG_RESULT([$can_compile_avx])
AC_LANG([Fortran])
AC_PROG_FC
AC_FC_LIBRARY_LDFLAGS
......
#define N 1000000L
#define NFLOPS 10000000L
#define FLOPS_PER_ITERATION 2
#define N_ITERATIONS (NFLOPS/FLOPS_PER_ITERATION)
double a[N];
double b[N];
double c[N];
double d[N];
double __attribute__((aligned(0x1000))) a[N_ITERATIONS];
double __attribute__((aligned(0x1000))) b[N_ITERATIONS];
double __attribute__((aligned(0x1000))) c[N_ITERATIONS];
double __attribute__((aligned(0x1000))) d[N_ITERATIONS];
/* This should produce 2 million floating point operations,
/* This should produce 10 million floating point operations,
* with an arithmetic intensity of
*
* AI = #FLOP / #BYTES = 2 / (4 * sizeof(double)) = 0.0625
......@@ -14,11 +16,36 @@ double d[N];
void vector_triad(void) {
int i;
for (i=0; i < N; i++) {
for (i = 0; i < N_ITERATIONS; i++) {
a[i] = b[i] + c[i] * d[i];
}
}
void peak_perf(void) {
int i;
#ifdef HAVE_AVX
for (i=0; i < N_ITERATIONS / 4; i++) {
__asm__ __volatile__(
"vmulpd %%ymm1, %%ymm2, %%ymm3;"
"vaddpd %%ymm4, %%ymm5, %%ymm6;"
: /* No outputs */
: /* No inputs */
: "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6"
);
}
#else
for (i=0; i < N_ITERATIONS / 2; i++) {
__asm__ __volatile__(
"mulpd %%xmm1, %%xmm2;"
"addpd %%xmm3, %%xmm4;"
: /* No outputs */
: /* No inputs */
: "%xmm1", "%xmm2", "%xmm3", "%xmm4"
);
}
#endif
}
#ifdef TEST_DO_FLOPS
int main(int argc, char **argv) {
vector_triad();
......
......@@ -15,6 +15,8 @@
! You should have received a copy of the GNU Lesser General Public License
! along with ftimings. If not, see <http://www.gnu.org/licenses/>.
#include "config-f90.h"
program test_timings
use ftimings
use iso_c_binding, only : C_DOUBLE
......@@ -25,23 +27,22 @@ program test_timings
call timer%measure_flops(.true.)
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_memory_bandwidth(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%measure_memory_bandwidth(.true.)
call timer%set_print_options(&
print_flop_count=.true., &
! print_flop_count=.true., &
print_flop_rate=.true., &
print_virtual_memory=.true., &
print_max_allocated_memory=.true., &
print_memory_transferred=.true., &
! print_max_allocated_memory=.true., &
! print_memory_transferred=.true., &
print_memory_bandwidth=.true., &
print_ai=.true.)
print_ai=.true. &
)
call timer%enable()
call timer%start("pre-program")
call timer%stop("pre-program")
call timer%start("program")
call timer%start("main-loop")
do i = 1,10
! Test a bit more complex enable/disable decisions
if (i < 5 .or. mod(i,4) == 0) then
......@@ -66,43 +67,41 @@ program test_timings
if (i == 15) then
! test run-in-progress report when printing
! entries that are not yet done
call timer%print("program")
call timer%print("main-loop")
else
! usual printing of current subtree
call timer%print("program", "cycle")
call timer%print("main-loop", "cycle")
endif
write(*,*)
write(*,'(a,f12.6)') " cycle total : ", timer%get("program", "cycle")
write(*,'(a,f12.6)') " cycle total : ", timer%get("main-loop", "cycle")
write(*,'(a,f12.6)') " in c entries: ", timer%in_entries("c")
write(*,'(a,f8.2,a)') " c part: ", timer%in_entries("c") / timer%get("program", "cycle") * 100, "%"
write(*,'(a,f8.2,a)') " c part: ", timer%in_entries("c") / timer%get("main-loop", "cycle") * 100, "%"
write(*,'(a,f12.6)') " in b entries: ", timer%in_entries("b")
write(*,'(a,f8.2,a)') " b part: ", timer%in_entries("b") / timer%get("program", "cycle") * 100, "%"
write(*,'(a,f8.2,a)') " b part: ", timer%in_entries("b") / timer%get("main-loop", "cycle") * 100, "%"
#ifndef _OPENMP
write(*,'(a,f12.6)') " cycle -> a -> b -> c : ", timer%get("program", "cycle", "a", "b", "c")
write(*,'(a,f12.6)') " cycle -> a -> b -> c : ", timer%get("main-loop", "cycle", "a", "b", "c")
#else
write(*,'(a,f12.6)') " cycle -> a -> b -> c : ", timer%get("program", "cycle", "parallel", "a", "b", "c")
write(*,'(a,f12.6)') " cycle -> a -> b -> c : ", timer%get("main-loop", "cycle", "parallel", "a", "b", "c")
#endif
write(*,*)
endif
end do
call timer%enable()
call timer%stop("program")
call timer%start("post-program")
call timer%stop("post-program")
call timer%stop("main-loop")
write(*,*)
write(*,*) "Total program:"
call timer%print("program")
write(*,*) "Total main-loop:"
call timer%print("main-loop")
write(*,*)
write(*,*) "Sorted:"
call timer%sort()
call timer%print("program")
call timer%print("main-loop")
write(*,*)
write(*,*) "Ignoring entries <0.02s:"
call timer%print("program", threshold=0.02_C_DOUBLE)
call timer%print("main-loop", threshold=0.02_C_DOUBLE)
write(*,*)
write(*,*) "Whole tree:"
......@@ -139,6 +138,11 @@ program test_timings
end subroutine
end interface
interface
subroutine peak_perf() bind(C, name="peak_perf")
end subroutine
end interface
interface
subroutine fill_100_mebi() bind(C, name="fill_100_mebi")
end subroutine
......@@ -146,10 +150,18 @@ program test_timings
call timer%start("c")
call timer%start("2.0 Mflop, AI=0.0625")
call timer%start("10.0 Mflop, AI=0.0625")
call vector_triad()
call timer%stop()
#ifdef HAVE_AVX
call timer%start("10.0 Mflop within registers (AVX)")
#else
call timer%start("10.0 Mflop within registers (SSE)")
#endif
call peak_perf()
call timer%stop()
call timer%start("Fill 100 MiB")
call fill_100_mebi()
call timer%stop()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment