Commit cb4c4ae7 authored by Andreas Marek's avatar Andreas Marek
Browse files

Remove assumed size from generic real kernel

The generic real kernel is now contained in a module, this allows
strict interface checking! It also does not use assumed size arrays
anymore. Both points increase the possibility to debug and find errors.

However, this might be performance critical! It is possible to
switch back to the old implementation if that turns out to
be beneficial w.r.t. performance. Timings with gfortran 4.9 on Intel
Haswell showed that the new implementation is about 30 percent faster
then the previous one
parent 33a94bfc
...@@ -39,7 +39,7 @@ if HAVE_DETAILED_TIMINGS ...@@ -39,7 +39,7 @@ if HAVE_DETAILED_TIMINGS
endif endif
if WITH_REAL_GENERIC_KERNEL if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.f90 libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.F90
endif endif
if WITH_COMPLEX_GENERIC_KERNEL if WITH_COMPLEX_GENERIC_KERNEL
...@@ -308,6 +308,9 @@ elpa2.i: $(top_srcdir)/src/elpa2.F90 ...@@ -308,6 +308,9 @@ elpa2.i: $(top_srcdir)/src/elpa2.F90
elpa1.i: $(top_srcdir)/src/elpa1.F90 elpa1.i: $(top_srcdir)/src/elpa1.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/elpa1.F90 -o $@ $(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/elpa1.F90 -o $@
elpa2_kernels_real.i: $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90 -o $@
mod_compute_hh_trafo_real.i: $(top_srcdir)/src/mod_compute_hh_trafo_real.F90 mod_compute_hh_trafo_real.i: $(top_srcdir)/src/mod_compute_hh_trafo_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/mod_compute_hh_trafo_real.F90 -o $@ $(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/mod_compute_hh_trafo_real.F90 -o $@
......
...@@ -666,6 +666,11 @@ DX_MAN_FEATURE(ON) ...@@ -666,6 +666,11 @@ DX_MAN_FEATURE(ON)
DX_HTML_FEATURE(ON) DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs]) DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs])
DESPERATELY_WANT_ASSUMED_SIZE=0
if text x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([DESPERATELY_WANT_ASSUMED_SIZE],[1],[use assumed size arrays, even if not debuggable])
fi
AC_SUBST([WITH_MKL]) AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS]) AC_SUBST([WITH_BLACS])
AC_SUBST([with_amd_bulldozer_kernel]) AC_SUBST([with_amd_bulldozer_kernel])
......
...@@ -53,26 +53,49 @@ ...@@ -53,26 +53,49 @@
! distributed along with the original code in the file "COPYING". ! distributed along with the original code in the file "COPYING".
! !
! -------------------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------------------
!module real_generic_kernel #include "config-f90.h"
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
#define PACK_REAL_TO_COMPLEX
#else
#undef PACK_REAL_TO_COMPLEX
#endif
#ifndef DESPERATELY_WANT_ASSUMED_SIZE
module real_generic_kernel
private
public double_hh_trafo_generic
contains
#endif
! private
! public double_hh_trafo_generic
!contains
subroutine double_hh_trafo_generic(q, hh, nb, nq, ldq, ldh) subroutine double_hh_trafo_generic(q, hh, nb, nq, ldq, ldh)
use precision use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use iso_c_binding
implicit none implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
real(kind=rk), intent(inout) :: q(ldq,*) #ifdef DESPERATELY_WANT_ASSUMED_SIZE
real(kind=rk), intent(in) :: hh(ldh,*) real(kind=rk), intent(inout) :: q(ldq,*)
real(kind=rk), intent(in) :: hh(ldh,*)
#else
real(kind=rk), intent(inout) :: q(1:ldq,1:nb+1)
real(kind=rk), intent(in) :: hh(1:ldh,1:6)
#endif
real(kind=rk) :: s real(kind=rk) :: s
integer(kind=ik) :: i integer(kind=ik) :: i
! equivalence(q(1,1),q_complex(1,1)) ! equivalence(q(1,1),q_complex(1,1))
! Safety only: ! Safety only:
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel generic: double_hh_trafo_generic")
#endif
if(mod(ldq,4) /= 0) STOP 'double_hh_trafo: ldq not divisible by 4!' if(mod(ldq,4) /= 0) STOP 'double_hh_trafo: ldq not divisible by 4!'
! Calculate dot product of the two Householder vectors ! Calculate dot product of the two Householder vectors
...@@ -84,20 +107,42 @@ ...@@ -84,20 +107,42 @@
! Do the Householder transformations ! Do the Householder transformations
#ifndef DESPERATELY_WANT_ASSUMED_SIZE
! ! assign real data to compplex pointer
! call c_f_pointer(c_loc(q), q_complex, [size(q,dim=1)/2,size(q,dim=2)])
#endif
! Always a multiple of 4 Q-rows is transformed, even if nq is smaller ! Always a multiple of 4 Q-rows is transformed, even if nq is smaller
do i=1,nq-8,12 do i=1,nq-8,12
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_kernel_12_generic(q(i,1),hh, nb, ldq, ldh, s) call hh_trafo_kernel_12_generic(q(i,1),hh, nb, ldq, ldh, s)
#else
call hh_trafo_kernel_12_generic(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
enddo enddo
! i > nq-8 now, i.e. at most 8 rows remain ! i > nq-8 now, i.e. at most 8 rows remain
if(nq-i+1 > 4) then if(nq-i+1 > 4) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
print *,"calling 8"
call hh_trafo_kernel_8_generic(q(i,1),hh, nb, ldq, ldh, s) call hh_trafo_kernel_8_generic(q(i,1),hh, nb, ldq, ldh, s)
#else
call hh_trafo_kernel_8_generic(q(i:ldq,1:nb+1), hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
else if(nq-i+1 > 0) then else if(nq-i+1 > 0) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
print *,"calling 4"
call hh_trafo_kernel_4_generic(q(i,1),hh, nb, ldq, ldh, s) call hh_trafo_kernel_4_generic(q(i,1),hh, nb, ldq, ldh, s)
endif #else
call hh_trafo_kernel_4_generic(q(i:ldq,1:+nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic: double_hh_trafo_generic")
#endif
end subroutine double_hh_trafo_generic end subroutine double_hh_trafo_generic
! -------------------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------------------
...@@ -109,30 +154,267 @@ ...@@ -109,30 +154,267 @@
subroutine hh_trafo_kernel_12_generic(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_12_generic(q, hh, nb, ldq, ldh, s)
use precision use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
implicit none implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq/2,*) complex(kind=ck), intent(inout) :: q(ldq/2,*)
real(kind=rk), intent(in) :: hh(ldh,*), s real(kind=rk), intent(in) :: h(ldh,*)
#else
real(kind=rk), intent(inout) :: q(:,:)
#endif
real(kind=rk), intent(in) :: hh(ldh,2), s
#ifdef PACK_REAL_TO_COMPLEX
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6 complex(kind=ck) :: x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6
#else
real(kind=rk) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, &
y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12
#endif
real(kind=rk) :: h1, h2, tau1, tau2 real(kind=rk) :: h1, h2, tau1, tau2
integer(kind=ik) :: i integer(kind=ik) :: i
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel generic: hh_trafo_kernel_12_generic")
#endif
x1 = q(1,2)
x2 = q(2,2)
x3 = q(3,2)
x4 = q(4,2)
x5 = q(5,2)
x6 = q(6,2)
#ifndef PACK_REAL_TO_COMPLEX
x7 = q(7,2)
x8 = q(8,2)
x9 = q(9,2)
x10 = q(10,2)
x11 = q(11,2)
x12 = q(12,2)
#endif
y1 = q(1 ,1) + q(1, 2)*hh(2,2)
y2 = q(2 ,1) + q(2, 2)*hh(2,2)
y3 = q(3 ,1) + q(3, 2)*hh(2,2)
y4 = q(4 ,1) + q(4, 2)*hh(2,2)
y5 = q(5 ,1) + q(5, 2)*hh(2,2)
y6 = q(6 ,1) + q(6, 2)*hh(2,2)
#ifndef PACK_REAL_TO_COMPLEX
y7 = q(7 ,1) + q(7, 2)*hh(2,2)
y8 = q(8 ,1) + q(8, 2)*hh(2,2)
y9 = q(9 ,1) + q(9, 2)*hh(2,2)
y10 = q(10,1) + q(10,2)*hh(2,2)
y11 = q(11,1) + q(11,2)*hh(2,2)
y12 = q(12,1) + q(12,2)*hh(2,2)
#endif
!DEC$ VECTOR ALIGNED
do i=3,nb
h1 = hh(i-1,1)
h2 = hh(i,2)
x1 = x1 + q(1, i)*h1
y1 = y1 + q(1, i)*h2
x2 = x2 + q(2, i)*h1
y2 = y2 + q(2, i)*h2
x3 = x3 + q(3, i)*h1
y3 = y3 + q(3, i)*h2
x4 = x4 + q(4, i)*h1
y4 = y4 + q(4, i)*h2
x5 = x5 + q(5, i)*h1
y5 = y5 + q(5, i)*h2
x6 = x6 + q(6, i)*h1
y6 = y6 + q(6, i)*h2
#ifndef PACK_REAL_TO_COMPLEX
x7 = x7 + q(7, i)*h1
y7 = y7 + q(7, i)*h2
x8 = x8 + q(8, i)*h1
y8 = y8 + q(8, i)*h2
x9 = x9 + q(9, i)*h1
y9 = y9 + q(9, i)*h2
x10 = x10 + q(10,i)*h1
y10 = y10 + q(10,i)*h2
x11 = x11 + q(11,i)*h1
y11 = y11 + q(11,i)*h2
x12 = x12 + q(12,i)*h1
y12 = y12 + q(12,i)*h2
#endif
enddo
x1 = x1 + q(1,nb+1)*hh(nb,1)
x2 = x2 + q(2,nb+1)*hh(nb,1)
x3 = x3 + q(3,nb+1)*hh(nb,1)
x4 = x4 + q(4,nb+1)*hh(nb,1)
x5 = x5 + q(5,nb+1)*hh(nb,1)
x6 = x6 + q(6,nb+1)*hh(nb,1)
#ifndef PACK_REAL_TO_COMPLEX
x7 = x7 + q(7, nb+1)*hh(nb,1)
x8 = x8 + q(8, nb+1)*hh(nb,1)
x9 = x9 + q(9, nb+1)*hh(nb,1)
x10 = x10 + q(10,nb+1)*hh(nb,1)
x11 = x11 + q(11,nb+1)*hh(nb,1)
x12 = x12 + q(12,nb+1)*hh(nb,1)
#endif
tau1 = hh(1,1)
tau2 = hh(1,2)
h1 = -tau1
x1 = x1 *h1
x2 = x2 *h1
x3 = x3 *h1
x4 = x4 *h1
x5 = x5 *h1
x6 = x6 *h1
#ifndef PACK_REAL_TO_COMPLEX
x7 = x7 *h1
x8 = x8 *h1
x9 = x9 *h1
x10 = x10*h1
x11 = x11*h1
x12 = x12*h1
#endif
h1 = -tau2
h2 = -tau2*s
y1 = y1 *h1 + x1 *h2
y2 = y2 *h1 + x2 *h2
y3 = y3 *h1 + x3 *h2
y4 = y4 *h1 + x4 *h2
y5 = y5 *h1 + x5 *h2
y6 = y6 *h1 + x6 *h2
#ifndef PACK_REAL_TO_COMPLEX
y7 = y7 *h1 + x7 *h2
y8 = y8 *h1 + x8 *h2
y9 = y9 *h1 + x9 *h2
y10 = y10*h1 + x10*h2
y11 = y11*h1 + x11*h2
y12 = y12*h1 + x12*h2
#endif
q(1,1) = q(1, 1) + y1
q(2,1) = q(2, 1) + y2
q(3,1) = q(3, 1) + y3
q(4,1) = q(4, 1) + y4
q(5,1) = q(5, 1) + y5
q(6,1) = q(6, 1) + y6
#ifndef PACK_REAL_TO_COMPLEX
q(7 ,1) = q(7, 1) + y7
q(8 ,1) = q(8, 1) + y8
q(9 ,1) = q(9, 1) + y9
q(10,1) = q(10,1) + y10
q(11,1) = q(11,1) + y11
q(12,1) = q(12,1) + y12
#endif
q(1, 2) = q(1, 2) + x1 + y1 *hh(2,2)
q(2, 2) = q(2, 2) + x2 + y2 *hh(2,2)
q(3, 2) = q(3, 2) + x3 + y3 *hh(2,2)
q(4, 2) = q(4, 2) + x4 + y4 *hh(2,2)
q(5, 2) = q(5, 2) + x5 + y5 *hh(2,2)
q(6, 2) = q(6, 2) + x6 + y6 *hh(2,2)
#ifndef PACK_REAL_TO_COMPLEX
q(7, 2) = q(7, 2) + x7 + y7 *hh(2,2)
q(8, 2) = q(8, 2) + x8 + y8 *hh(2,2)
q(9, 2) = q(9, 2) + x9 + y9 *hh(2,2)
q(10,2) = q(10,2) + x10 + y10*hh(2,2)
q(11,2) = q(11,2) + x11 + y11*hh(2,2)
q(12,2) = q(12,2) + x12 + y12*hh(2,2)
#endif
!DEC$ VECTOR ALIGNED
do i=3,nb
h1 = hh(i-1,1)
h2 = hh(i,2)
q(1, i) = q(1,i) + x1 *h1 + y1 *h2
q(2, i) = q(2,i) + x2 *h1 + y2 *h2
q(3, i) = q(3,i) + x3 *h1 + y3 *h2
q(4, i) = q(4,i) + x4 *h1 + y4 *h2
q(5, i) = q(5,i) + x5 *h1 + y5 *h2
q(6, i) = q(6,i) + x6 *h1 + y6 *h2
#ifndef PACK_REAL_TO_COMPLEX
q(7, i) = q(7, i) + x7 *h1 + y7 *h2
q(8, i) = q(8, i) + x8 *h1 + y8 *h2
q(9, i) = q(9, i) + x9 *h1 + y9 *h2
q(10,i) = q(10,i) + x10*h1 + y10*h2
q(11,i) = q(11,i) + x11*h1 + y11*h2
q(12,i) = q(12,i) + x12*h1 + y12*h2
#endif
enddo
q(1, nb+1) = q(1, nb+1) + x1 *hh(nb,1)
q(2, nb+1) = q(2, nb+1) + x2 *hh(nb,1)
q(3, nb+1) = q(3, nb+1) + x3 *hh(nb,1)
q(4, nb+1) = q(4, nb+1) + x4 *hh(nb,1)
q(5, nb+1) = q(5, nb+1) + x5 *hh(nb,1)
q(6, nb+1) = q(6, nb+1) + x6 *hh(nb,1)
#ifndef PACK_REAL_TO_COMPLEX
q(7, nb+1) = q(7, nb+1) + x7 *hh(nb,1)
q(8, nb+1) = q(8, nb+1) + x8 *hh(nb,1)
q(9, nb+1) = q(9, nb+1) + x9 *hh(nb,1)
q(10,nb+1) = q(10,nb+1) + x10*hh(nb,1)
q(11,nb+1) = q(11,nb+1) + x11*hh(nb,1)
q(12,nb+1) = q(12,nb+1) + x12*hh(nb,1)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic: hh_trafo_kernel_12_generic")
#endif
end subroutine hh_trafo_kernel_12_generic
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_kernel_8_generic(q, hh, nb, ldq, ldh, s)
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq/2,*)
real(kind=rk), intent(in) :: h(ldh,*)
#else
real(kind=rk), intent(inout) :: q(:,:)
#endif
real(kind=rk), intent(in) :: hh(ldh,2), s
#ifdef PACK_REAL_TO_COMPLEX
complex(kind=ck) :: x1, x2, x3, x4, y1, y2, y3, y4
#else
real(kind=rk) :: x1, x2, x3, x4, x5, x6, x7, x8, &
y1, y2, y3, y4, y5, y6, y7, y8
#endif
real(kind=rk) :: h1, h2, tau1, tau2
integer(kind=ik) :: i
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel generic: hh_trafo_kernel_8_generic")
#endif
x1 = q(1,2) x1 = q(1,2)
x2 = q(2,2) x2 = q(2,2)
x3 = q(3,2) x3 = q(3,2)
x4 = q(4,2) x4 = q(4,2)
#ifndef PACK_REAL_TO_COMPLEX
x5 = q(5,2) x5 = q(5,2)
x6 = q(6,2) x6 = q(6,2)
x7 = q(7,2)
x8 = q(8,2)
#endif
y1 = q(1,1) + q(1,2)*hh(2,2) y1 = q(1,1) + q(1,2)*hh(2,2)
y2 = q(2,1) + q(2,2)*hh(2,2) y2 = q(2,1) + q(2,2)*hh(2,2)
y3 = q(3,1) + q(3,2)*hh(2,2) y3 = q(3,1) + q(3,2)*hh(2,2)
y4 = q(4,1) + q(4,2)*hh(2,2) y4 = q(4,1) + q(4,2)*hh(2,2)
#ifndef PACK_REAL_TO_COMPLEX
y5 = q(5,1) + q(5,2)*hh(2,2) y5 = q(5,1) + q(5,2)*hh(2,2)
y6 = q(6,1) + q(6,2)*hh(2,2) y6 = q(6,1) + q(6,2)*hh(2,2)
y7 = q(7,1) + q(7,2)*hh(2,2)
y8 = q(8,1) + q(8,2)*hh(2,2)
#endif
!DEC$ VECTOR ALIGNED !DEC$ VECTOR ALIGNED
do i=3,nb do i=3,nb
...@@ -146,18 +428,28 @@ ...@@ -146,18 +428,28 @@
y3 = y3 + q(3,i)*h2 y3 = y3 + q(3,i)*h2
x4 = x4 + q(4,i)*h1 x4 = x4 + q(4,i)*h1
y4 = y4 + q(4,i)*h2 y4 = y4 + q(4,i)*h2
#ifndef PACK_REAL_TO_COMPLEX
x5 = x5 + q(5,i)*h1 x5 = x5 + q(5,i)*h1
y5 = y5 + q(5,i)*h2 y5 = y5 + q(5,i)*h2
x6 = x6 + q(6,i)*h1 x6 = x6 + q(6,i)*h1
y6 = y6 + q(6,i)*h2 y6 = y6 + q(6,i)*h2
x7 = x7 + q(7,i)*h1
y7 = y7 + q(7,i)*h2
x8 = x8 + q(8,i)*h1
y8 = y8 + q(8,i)*h2
#endif
enddo enddo
x1 = x1 + q(1,nb+1)*hh(nb,1) x1 = x1 + q(1,nb+1)*hh(nb,1)
x2 = x2 + q(2,nb+1)*hh(nb,1) x2 = x2 + q(2,nb+1)*hh(nb,1)
x3 = x3 + q(3,nb+1)*hh(nb,1) x3 = x3 + q(3,nb+1)*hh(nb,1)
x4 = x4 + q(4,nb+1)*hh(nb,1) x4 = x4 + q(4,nb+1)*hh(nb,1)
#ifndef PACK_REAL_TO_COMPLEX
x5 = x5 + q(5,nb+1)*hh(nb,1) x5 = x5 + q(5,nb+1)*hh(nb,1)
x6 = x6 + q(6,nb+1)*hh(nb,1) x6 = x6 + q(6,nb+1)*hh(nb,1)
x7 = x7 + q(7,nb+1)*hh(nb,1)
x8 = x8 + q(8,nb+1)*hh(nb,1)
#endif
tau1 = hh(1,1) tau1 = hh(1,1)
tau2 = hh(1,2) tau2 = hh(1,2)
...@@ -167,29 +459,45 @@ ...@@ -167,29 +459,45 @@
x2 = x2*h1 x2 = x2*h1
x3 = x3*h1 x3 = x3*h1
x4 = x4*h1 x4 = x4*h1
#ifndef PACK_REAL_TO_COMPLEX
x5 = x5*h1 x5 = x5*h1
x6 = x6*h1 x6 = x6*h1
x7 = x7*h1
x8 = x8*h1
#endif
h1 = -tau2 h1 = -tau2
h2 = -tau2*s h2 = -tau2*s
y1 = y1*h1 + x1*h2 y1 = y1*h1 + x1*h2
y2 = y2*h1 + x2*h2 y2 = y2*h1 + x2*h2
y3 = y3*h1 + x3*h2 y3 = y3*h1 + x3*h2
y4 = y4*h1 + x4*h2 y4 = y4*h1 + x4*h2
#ifndef PACK_REAL_TO_COMPLEX
y5 = y5*h1 + x5*h2 y5 = y5*h1 + x5*h2
y6 = y6*h1 + x6*h2 y6 = y6*h1 + x6*h2
y7 = y7*h1 + x7*h2
y8 = y8*h1 + x8*h2
#endif
q(1,1) = q(1,1) + y1 q(1,1) = q(1,1) + y1
q(2,1) = q(2,1) + y2 q(2,1) = q(2,1) + y2
q(3,1) = q(3,1) + y3 q(3,1) = q(3,1) + y3
q(4,1) = q(4,1) + y4 q(4,1) = q(4,1) + y4
#ifndef PACK_REAL_TO_COMPLEX
q(5,1) = q(5,1) + y5 q(5,1) = q(5,1) + y5
q(6,1) = q(6,1) + y6 q(6,1) = q(6,1)