! See copyright notice in the COPYRIGHT file.
! ****************************************************************************** !
!> author: Manuel Hasert
!! author: Kannan Masilamani
!! author: Jiaxing Qi
!! Routines and parameter definitions for the standard D3Q19 model
?? include 'header/lbm_macros.inc'
?? include 'header/lbm_interfaceMacros.inc'
?? include 'header/lbm_d3q19Macros.inc'
module mus_d3q19_turb_module
  use iso_c_binding,            only: c_f_pointer

  ! include treelm modules
  use env_module,            only: rk
  use tem_compileconf_module, only: vlen
  use tem_varSys_module,     only: tem_varSys_type, tem_varSys_op_type
  use tem_param_module,      only: div1_3, div1_6, div1_36, div1_8, div3_4h,   &
    &                              div1_12, div1_18, div1_16, div1_2, div1_24, &
    &                              div1_4, div1_48, div1_72,                   &
    &                              cs2inv, cs4inv, t2cs2inv,t2cs4inv, rho0
  use tem_dyn_array_module,  only: PositionOfVal

  ! include musubi modules
  use mus_field_prop_module,    only: mus_field_prop_type
  use mus_scheme_layout_module, only: mus_scheme_layout_type
  use mus_scheme_type_module,   only: mus_scheme_type, array2D_type
  use mus_param_module,         only: mus_param_type
  use mus_varSys_module,        only: mus_varSys_data_type

  implicit none

  private

  public :: bgk_advRel_d3q19_LES_new
  public :: mrt_advRel_d3q19_LES_new

  public :: bgk_advRel_d3q19_incomp_LES_new

  ! =============================================================================
  ! D3Q19 flow model
  ! =============================================================================
  !> Definition of the discrete velocity set

  ! integer,parameter :: block = 32
  integer,parameter :: QQ   = 19  !< number of pdf directions

  integer,parameter :: qN00 = 1   !< west             x-
  integer,parameter :: q0N0 = 2   !< south            y-
  integer,parameter :: q00N = 3   !< bottom           z-
  integer,parameter :: q100 = 4   !< east             x+
  integer,parameter :: q010 = 5   !< north            y+
  integer,parameter :: q001 = 6   !< top              z+
  integer,parameter :: q0NN = 7   !<                  z-,y-
  integer,parameter :: q0N1 = 8   !<                  z+,y-
  integer,parameter :: q01N = 9   !<                  z-,y+
  integer,parameter :: q011 = 10  !<                  z+,y+
  integer,parameter :: qN0N = 11  !<                  x-,z-
  integer,parameter :: q10N = 12  !<                  x+,z-
  integer,parameter :: qN01 = 13  !<                  x-,z+
  integer,parameter :: q101 = 14  !<                  x+,z+
  integer,parameter :: qNN0 = 15  !<                  y-,x-
  integer,parameter :: qN10 = 16  !<                  y+,x-
  integer,parameter :: q1N0 = 17  !<                  y-,x+
  integer,parameter :: q110 = 18  !<                  y+,x+
  integer,parameter :: q000 = 19  !< rest density is last

contains

! ****************************************************************************** !
  !> Advection relaxation routine for the D3Q19 model with BGK using auxField
  !! and ST omega
  !!
  !! 
?? copy :: compute_routineHeader( bgk_advRel_d3q19_LES_new )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem, iDir
    real(kind=rk) :: pdfTmp(19)
    real(kind=rk) :: rho     ! local density
    real(kind=rk) :: u_x     ! local x-velocity
    real(kind=rk) :: u_y     ! local y-velocity
    real(kind=rk) :: u_z     ! local z-velocity
    ! derived constants
    real(kind=rk) :: usq, ucx
    real(kind=rk) :: fEq(19) !< equilibrium distribution
    real(kind=rk) :: omega_tot
    type(mus_varSys_data_type), pointer :: fPtr
    type(mus_scheme_type), pointer :: scheme
    integer :: dens_pos, vel_pos(3), elemOff
    ! ---------------------------------------------------------------------------
    ! access scheme via 1st variable method data which is a state variable
    call C_F_POINTER( varSys%method%val(1)%method_Data, fPtr )
    scheme => fPtr%solverData%scheme
    dens_pos = varSys%method%val(scheme%derVarPos(1)%density) &
      &                     %auxField_varPos(1)
    vel_pos = varSys%method%val(scheme%derVarPos(1)%velocity) &
      &                    %auxField_varPos(1:3)
    !$omp do schedule(static)

?? copy :: dir_novec
    nodeloop2: do iElem = 1,nSolve
 
      ! First load all local values into temp array
      ! Generic! PUSH+PULL is possible
      pdfTmp( qN00 ) = inState(?FETCH?( qN00, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q0N0 ) = inState(?FETCH?( q0N0, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q00N ) = inState(?FETCH?( q00N, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q100 ) = inState(?FETCH?( q100, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q010 ) = inState(?FETCH?( q010, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q001 ) = inState(?FETCH?( q001, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q0NN ) = inState(?FETCH?( q0NN, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q0N1 ) = inState(?FETCH?( q0N1, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q01N ) = inState(?FETCH?( q01N, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q011 ) = inState(?FETCH?( q011, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( qN0N ) = inState(?FETCH?( qN0N, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q10N ) = inState(?FETCH?( q10N, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( qN01 ) = inState(?FETCH?( qN01, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q101 ) = inState(?FETCH?( q101, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( qNN0 ) = inState(?FETCH?( qNN0, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( qN10 ) = inState(?FETCH?( qN10, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q1N0 ) = inState(?FETCH?( q1N0, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q110 ) = inState(?FETCH?( q110, 1, iElem, 19, 19, nElems,neigh ))
      pdfTmp( q000 ) = inState(?FETCH?( q000, 1, iElem, 19, 19, nElems,neigh ))

      elemOff = (iElem-1)*varSys%nAuxScalars 
      ! local density
      rho = auxField(elemOff + dens_pos)
      ! local x-, y- and z-velocity
      u_x = auxField(elemOff + vel_pos(1))
      u_y = auxField(elemOff + vel_pos(2))
      u_z = auxField(elemOff + vel_pos(3))

      !write(*,*) 'dens ', rho, 'vel: ', u_x, u_y, u_z

      ! Calculate the equilibrium distribution function
      !fEq(:) = getEquilibrium( density = rho, velocity = (/u_x, u_y, u_z /), layout = layout)
      ! square of velocity
      usq = u_x*u_x + u_y*u_y + u_z*u_z
      do iDir = 1, QQ

        ! velocity times lattice unit velocity
        ucx =   dble( layout%fStencil%cxDir(1,iDir)) * u_x &
          &   + dble( layout%fStencil%cxDir(2,iDir)) * u_y &
          &   + dble( layout%fStencil%cxDir(3,iDir)) * u_z

        ! calculate equilibrium density
        fEq( iDir ) = layout%weight( iDir ) * rho * ( 1.d0 + ucx*cs2inv &
          &         + ucx*ucx*t2cs4inv - usq*t2cs2inv )

      enddo

      ! effective omega
      omega_tot = fieldProp(1)%fluid%viscKine%omLvl(level)%val(iElem) 

      ! relaxation
      outState(?SAVE?(qN00,1,iElem,QQ,QQ,nElems,neigh )) =                     &
        &      pdfTmp( qN00 ) - omega_tot*(pdfTmp( qN00 ) - fEq( qN00 ))
      outState(?SAVE?(q0N0,1,iElem,QQ,QQ,nElems,neigh )) =                     &
        &      pdfTmp( q0N0 ) - omega_tot*(pdfTmp( q0N0 ) - fEq( q0N0 ))
      outState(?SAVE?(q00N,1,iElem,QQ,QQ,nElems,neigh )) =                     &
        &      pdfTmp( q00N ) - omega_tot*(pdfTmp( q00N ) - fEq( q00N ))
      outState(?SAVE?(q100,1,iElem,QQ,QQ,nElems,neigh )) =                     &
        &      pdfTmp( q100 ) - omega_tot*(pdfTmp( q100 ) - fEq( q100 ))
      outState(?SAVE?(q010,1,iElem,QQ,QQ,nElems,neigh )) =                     &
        &      pdfTmp( q010 ) - omega_tot*(pdfTmp( q010 ) - fEq( q010 ))
      outState(?SAVE?(q001,1,iElem,QQ,QQ,nElems,neigh )) =                     &
        &      pdfTmp( q001 ) - omega_tot*(pdfTmp( q001 ) - fEq( q001 ))
      outState(?SAVE?(q0NN,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q0NN ) - omega_tot*(pdfTmp( q0NN ) - fEq( q0NN ))
      outState(?SAVE?(q0N1,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q0N1 ) - omega_tot*(pdfTmp( q0N1 ) - fEq( q0N1 ))
      outState(?SAVE?(q01N,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q01N ) - omega_tot*(pdfTmp( q01N ) - fEq( q01N ))
      outState(?SAVE?(q011,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q011 ) - omega_tot*(pdfTmp( q011 ) - fEq( q011 ))
      outState(?SAVE?(qN0N,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( qN0N ) - omega_tot*(pdfTmp( qN0N ) - fEq( qN0N ))
      outState(?SAVE?(q10N,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q10N ) - omega_tot*(pdfTmp( q10N ) - fEq( q10N ))
      outState(?SAVE?(qN01,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( qN01 ) - omega_tot*(pdfTmp( qN01 ) - fEq( qN01 ))
      outState(?SAVE?(q101,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q101 ) - omega_tot*(pdfTmp( q101 ) - fEq( q101 ))
      outState(?SAVE?(qNN0,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( qNN0 ) - omega_tot*(pdfTmp( qNN0 ) - fEq( qNN0 ))
      outState(?SAVE?(qN10,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( qN10 ) - omega_tot*(pdfTmp( qN10 ) - fEq( qN10 ))
      outState(?SAVE?(q1N0,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q1N0 ) - omega_tot*(pdfTmp( q1N0 ) - fEq( q1N0 ))
      outState(?SAVE?(q110,1,iElem,QQ,QQ,nElems,neigh))  =                     &
        &      pdfTmp( q110 ) - omega_tot*(pdfTmp( q110 ) - fEq( q110 ))
      outState(?SAVE?(q000,1,iElem,QQ,QQ,nElems,neigh)) =                      &
        &      pdfTmp( q000 ) - omega_tot*(pdfTmp( q000 ) - fEq( q000 ))
   enddo nodeloop2
!$omp end do nowait

  end subroutine bgk_advRel_d3q19_LES_new
! ****************************************************************************** !

! ****************************************************************************** !
  !> Advection relaxation routine for the MRT model.
  !! This routine has roughly 392 FLOPS per element.
  !! 
?? copy :: compute_routineHeader( mrt_advRel_d3q19_LES_new )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem
?? copy :: pdfTmp19( f )
    real(kind=rk) :: rho     ! local density
    real(kind=rk) :: u_x     ! local x-velocity
    real(kind=rk) :: u_y     ! local y-velocity
    real(kind=rk) :: u_z     ! local z-velocity
    real(kind=rk) :: usq
    ! MRT Variables
    real(kind=rk) :: meq10, meq12
    real(kind=rk) :: s_mrt( QQ )
    real(kind=rk) :: m( QQ )
    real(kind=rk) :: mout( 2:QQ )
    real(kind=rk) :: c0, c1, c2, c3, c4, c5, c6, c7
    ! Variables required for LES
    real(kind=rk) :: Q
    real(kind=rk) :: cs2_dt2_18
    real(kind=rk) :: omega_tot, omegaBulk
    integer :: nScalars
    type(mus_varSys_data_type), pointer :: fPtr
    type(mus_scheme_type), pointer :: scheme
    integer :: dens_pos, vel_pos(3), elemOff
    ! ---------------------------------------------------------------------------
    ! access scheme via 1st variable method data which is a state variable
    call C_F_POINTER( varSys%method%val(1)%method_Data, fPtr )
    scheme => fPtr%solverData%scheme
    dens_pos = varSys%method%val(scheme%derVarPos(1)%density) &
      &                     %auxField_varPos(1)
    vel_pos = varSys%method%val(scheme%derVarPos(1)%velocity) &
      &                    %auxField_varPos(1:3)

    omegaBulk = fieldProp(1)%fluid%omLvl_bulk(level) 
    ! MRT omegas
    ! overwrite omegaKine term in the element loop
    s_mrt = fieldProp(1)%fluid%mrtPtr(omegaKine=1.0_rk, omegaBulk=omegaBulk, &
      &                               QQ=QQ)
    s_mrt( 5)     = s_mrt( 5) * div1_24
    s_mrt( 7)     = s_mrt( 7) * div1_24
    s_mrt( 9)     = s_mrt( 9) * div1_24
    s_mrt(17:19)  = s_mrt(17:19) * div1_8

    nScalars = varSys%nScalars

    !NEC$ ivdep
?? IF (SOA) THEN
?? copy :: dir_vector
?? ELSE
?? copy :: dir_novec
?? END IF
    nodeloop: do iElem = 1, nSolve
      !> First load all local values into temp array
      fN00 = inState( ?FETCH?(  1, 1, iElem, QQ, nScalars, nElems,neigh ))
      f0N0 = inState( ?FETCH?(  2, 1, iElem, QQ, nScalars, nElems,neigh ))
      f00N = inState( ?FETCH?(  3, 1, iElem, QQ, nScalars, nElems,neigh ))
      f100 = inState( ?FETCH?(  4, 1, iElem, QQ, nScalars, nElems,neigh ))
      f010 = inState( ?FETCH?(  5, 1, iElem, QQ, nScalars, nElems,neigh ))
      f001 = inState( ?FETCH?(  6, 1, iElem, QQ, nScalars, nElems,neigh ))
      f0NN = inState( ?FETCH?(  7, 1, iElem, QQ, nScalars, nElems,neigh ))
      f0N1 = inState( ?FETCH?(  8, 1, iElem, QQ, nScalars, nElems,neigh ))
      f01N = inState( ?FETCH?(  9, 1, iElem, QQ, nScalars, nElems,neigh ))
      f011 = inState( ?FETCH?( 10, 1, iElem, QQ, nScalars, nElems,neigh ))
      fN0N = inState( ?FETCH?( 11, 1, iElem, QQ, nScalars, nElems,neigh ))
      f10N = inState( ?FETCH?( 12, 1, iElem, QQ, nScalars, nElems,neigh ))
      fN01 = inState( ?FETCH?( 13, 1, iElem, QQ, nScalars, nElems,neigh ))
      f101 = inState( ?FETCH?( 14, 1, iElem, QQ, nScalars, nElems,neigh ))
      fNN0 = inState( ?FETCH?( 15, 1, iElem, QQ, nScalars, nElems,neigh ))
      fN10 = inState( ?FETCH?( 16, 1, iElem, QQ, nScalars, nElems,neigh ))
      f1N0 = inState( ?FETCH?( 17, 1, iElem, QQ, nScalars, nElems,neigh ))
      f110 = inState( ?FETCH?( 18, 1, iElem, QQ, nScalars, nElems,neigh ))
      f000 = inState( ?FETCH?( 19, 1, iElem, QQ, nScalars, nElems,neigh ))

      !pxy
      m(14) = f110+ fNN0- f1N0- fN10
      !pyz
      m(15) = f011+ f0NN- f01N- f0N1
      !pxz
      m(16) = f101+ fN0N- f10N- fN01


m(1) = f010 + f0N0
m(4) = f001 + f00N
c7   = 2._rk * ( f100+ fN00 )
c0   = f110 + fNN0 + f1N0 + fN10
m(6) = f101 + fN0N + f10N + fN01
m(8) = f011 + f0NN + f01N + f0N1

      elemOff = (iElem-1)*varSys%nAuxScalars 
      ! local density
      rho = auxField(elemOff + dens_pos)

      ! local x-, y- and z-velocity
      u_x = auxField(elemOff + vel_pos(1))
      u_y = auxField(elemOff + vel_pos(2))
      u_z = auxField(elemOff + vel_pos(3))

      ! square velocity and derived constants
      usq = u_x*u_x + u_y*u_y + u_z*u_z

      ! meq2  = rho*usq
      meq10 = rho*(3.0_rk*u_x*u_x - usq)
      meq12 = rho*( u_y*u_y - u_z*u_z )

      m(2) =  - f000 + c0 + m(6) + m(8)

      !epsilon
      m(3) =  f000- 2._rk*( m(1) + m(4) ) -c7 + c0 + m(6) + m(8)

      !3pxx
      m(10) = - m(1) - m(4) + c0 + m(6) + c7 - m(8) * 2.0_rk
      !3pixx
      m(11) = + m(1) + m(4) + c0 + m(6) - c7 - m(8) * 2.0_rk
      !pww
      m(12) =   m(1) - m(4) + c0 - m(6)
      !piww
      m(13) = - m(1) + m(4) + c0 - m(6)

c1 = f110 - fNN0
c2 = f1N0 - fN10
c3 = f101 - fN0N
c4 = f10N - fN01

      !qx
      m(5) =  -2.0_rk*(f100- fN00) + c1 + c2 + c3 + c4
      !mx
      m(17) = c1 + c2 - c3 - c4

c5 = f011 - f0NN
c6 = f01N - f0N1

      !qy
      m(7) =  -2.0_rk*(f010- f0N0) + c1 - c2 + c5 + c6
      !my
      m(18) =-c1 + c2 + c5 + c6

      !qz
      m(9) =  -2.0_rk*(f001- f00N) + c3 - c4 + c5 - c6
      !mz
      m(19) = c3 - c4 - c5 + c6

      ! ---------------------- MRT Part -----------------------------------------
      ! effective omega
      omega_tot = fieldProp(1)%fluid%viscKine%omLvl(level)%val(iElem) 
      s_mrt(10) = omega_tot
      ! s_mrt(12) = s_mrt(10)
      s_mrt(14) = div1_4 * omega_tot 
      ! s_mrt(15) = s_mrt(16) = s_mrt(14)
      ! Now the relaxation parameters are changed based on the strain rate
      ! ---------------------- MRT Part -----------------------------------------

      mout( 2) = s_mrt( 2) * (m( 2) - rho*usq)
      mout( 3) = s_mrt( 3) *  m( 3)
      mout( 5) = s_mrt( 5) *  m( 5)
      mout( 7) = s_mrt( 7) *  m( 7)
      mout( 9) = s_mrt( 9) *  m( 9)
      mout(10) = s_mrt(10) * (m(10) - meq10)
      mout(11) = s_mrt(11) *  m(11)
      mout(12) = s_mrt(10) * (m(12) - meq12) ! s_mrt(12) = s_mrt(10)
      mout(13) = s_mrt(13) *  m(13)
      mout(14) = s_mrt(14) * (m(14) - rho*u_x*u_y)
      mout(15) = s_mrt(14) * (m(15) - rho*u_y*u_z)! s_mrt(15) = s_mrt(14)
      mout(16) = s_mrt(14) * (m(16) - rho*u_x*u_z)! s_mrt(16) = s_mrt(14)
      mout(17) = s_mrt(17) *  m(17)
      mout(18) = s_mrt(18) *  m(18)
      mout(19) = s_mrt(19) *  m(19)

      ! Transformation back to PDF
      outstate( ?SAVE?( 19,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &             f000 -( -div1_2*mout(2) + div1_6*mout(3))

! -------------------------------------------------------------------------------
c0 = - div1_18*mout(3) + div1_12*(mout(10) - mout(11))
      outstate( ?SAVE?(  4,1,iElem,QQ,nScalars,nElems,neigh )) = f100 -( c0 - 4.0_rk *mout(5) )
      outstate( ?SAVE?(  1,1,iElem,QQ,nScalars,nElems,neigh )) = fN00 -( c0 + 4.0_rk *mout(5) )
! -------------------------------------------------------------------------------

! -------------------------------------------------------------------------------
c1 =  - div1_18*mout(3)  - div1_24*(mout(10) - mout(11))
c2 =    div1_8 * (mout(12) - mout(13))

      outstate( ?SAVE?(  5,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &             f010 -( c1 + c2 - 4.0_rk*mout(7) )

      outstate( ?SAVE?(  2,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &             f0N0 -( c1 + c2 + 4.0_rk*mout(7) )

      outstate( ?SAVE?(  6,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &             f001 -( c1 - c2 - 4.0_rk*mout(9) )

      outstate( ?SAVE?(  3,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &             f00N -( c1 - c2 + 4.0_rk*mout(9) )
! -------------------------------------------------------------------------------

! -------------------------------------------------------------------------------
c3 =   div1_24*mout(2)  + div1_72*mout(3)  &
 &   + div1_48 * ( mout(10) + mout(11) )  &
 &   + div1_16 * ( mout(12) + mout(13) )

      outstate( ?SAVE?( 18,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  f110 -(c3 + mout(5) + mout(7) + mout(14) + mout(17) - mout(18))

      outstate( ?SAVE?( 15,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  fNN0 -(c3 - mout(5) - mout(7) + mout(14) - mout(17) + mout(18))

      outstate( ?SAVE?( 17,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  f1N0 -(c3 + mout(5) - mout(7) - mout(14) + mout(17) + mout(18))

      outstate( ?SAVE?( 16,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  fN10 -(c3 - mout(5) + mout(7) - mout(14) - mout(17) - mout(18))
! -------------------------------------------------------------------------------

! -------------------------------------------------------------------------------
c4 =   div1_24*mout(2)  + div1_72*mout(3)  &
 &   + div1_48 * ( mout(10) + mout(11) ) &
 &   - div1_16 * ( mout(12) + mout(13) )

      outstate( ?SAVE?( 14,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  f101 -( c4 + mout(5) + mout(9) + mout(16) - mout(17)+mout(19))

      outstate( ?SAVE?( 11,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  fN0N -( c4 - mout(5) - mout(9) + mout(16) + mout(17)-mout(19))

      outstate( ?SAVE?( 12,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  f10N -( c4 + mout(5) - mout(9) - mout(16) - mout(17)-mout(19))

      outstate( ?SAVE?( 13,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        &  fN01 -( c4 - mout(5) + mout(9) - mout(16) + mout(17)+mout(19))
! -------------------------------------------------------------------------------

! -------------------------------------------------------------------------------
c5 = div1_24*(mout(2)-mout(10)-mout(11)) + div1_72*mout(3)

      outstate( ?SAVE?( 10,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        & f011 -( c5 + mout(7) + mout(9) + mout(15) + mout(18) - mout(19))

      outstate( ?SAVE?(  7,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        & f0NN -( c5 - mout(7) - mout(9) + mout(15) - mout(18) + mout(19))

      outstate( ?SAVE?(  9,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        & f01N -( c5 + mout(7) - mout(9) - mout(15) + mout(18) + mout(19))

      outstate( ?SAVE?(  8,1,iElem,QQ,nScalars,nElems,neigh )) =                       &
        & f0N1 -( c5 - mout(7) + mout(9) - mout(15) - mout(18) - mout(19))
! -------------------------------------------------------------------------------

    enddo nodeloop

  end subroutine mrt_advRel_d3q19_LES_new
! ****************************************************************************** !

! ****************************************************************************** !
  !> Advection relaxation routine for the D3Q19 model with BGK incompressible 
  !! model and Smagorinsky Subgrid-scale model for large eddy simulations (LES.
  !!
?? copy :: compute_routineHeader( bgk_advRel_d3q19_incomp_LES_new )
?? copy :: compute_routineParams
    ! ---------------------------------------------------------------------------
    integer :: iElem,iDir
?? copy :: pdfTmp19( f )
    real(kind=rk) :: rho     ! local density
    real(kind=rk) :: u_x     ! local x-velocity
    real(kind=rk) :: u_y     ! local y-velocity
    real(kind=rk) :: u_z     ! local z-velocity
    real(kind=rk) :: inv_rho0 ! inverse local density
    ! derived constants
    real(kind=rk) :: usq, ucx
    real(kind=rk) :: fEq(19) ! equilibrium distribution
    real(kind=rk) :: omega_tot
    ! ---------------------------------------------------------------------------

    inv_rho0 = 1._rk / rho0

    !$omp do schedule(static)

?? copy :: dir_novec
    nodeloop: do iElem = 1, nSolve
      ! First load all local values into temp array
      fN00 = inState(?FETCH?( qN00, 1, iElem, QQ, QQ, nElems,neigh ))
      f0N0 = inState(?FETCH?( q0N0, 1, iElem, QQ, QQ, nElems,neigh ))
      f00N = inState(?FETCH?( q00N, 1, iElem, QQ, QQ, nElems,neigh ))
      f100 = inState(?FETCH?( q100, 1, iElem, QQ, QQ, nElems,neigh ))
      f010 = inState(?FETCH?( q010, 1, iElem, QQ, QQ, nElems,neigh ))
      f001 = inState(?FETCH?( q001, 1, iElem, QQ, QQ, nElems,neigh ))
      f0NN = inState(?FETCH?( q0NN, 1, iElem, QQ, QQ, nElems,neigh ))
      f0N1 = inState(?FETCH?( q0N1, 1, iElem, QQ, QQ, nElems,neigh ))
      f01N = inState(?FETCH?( q01N, 1, iElem, QQ, QQ, nElems,neigh ))
      f011 = inState(?FETCH?( q011, 1, iElem, QQ, QQ, nElems,neigh ))
      fN0N = inState(?FETCH?( qN0N, 1, iElem, QQ, QQ, nElems,neigh ))
      f10N = inState(?FETCH?( q10N, 1, iElem, QQ, QQ, nElems,neigh ))
      fN01 = inState(?FETCH?( qN01, 1, iElem, QQ, QQ, nElems,neigh ))
      f101 = inState(?FETCH?( q101, 1, iElem, QQ, QQ, nElems,neigh ))
      fNN0 = inState(?FETCH?( qNN0, 1, iElem, QQ, QQ, nElems,neigh ))
      fN10 = inState(?FETCH?( qN10, 1, iElem, QQ, QQ, nElems,neigh ))
      f1N0 = inState(?FETCH?( q1N0, 1, iElem, QQ, QQ, nElems,neigh ))
      f110 = inState(?FETCH?( q110, 1, iElem, QQ, QQ, nElems,neigh ))
      f000 = inState(?FETCH?( q000, 1, iElem, QQ, QQ, nElems,neigh ))

      ! local density
?? copy :: rho_d3q19( rho, f )

?? copy :: ux_d3q19( u_x, f, inv_rho0 )
?? copy :: uy_d3q19( u_y, f, inv_rho0 )
?? copy :: uz_d3q19( u_z, f, inv_rho0 )

      ! Calculate the equilibrium distribution function using
      ! incompressible formulation
      ! square of velocity
      usq = u_x*u_x + u_y*u_y + u_z*u_z
      do iDir = 1, layout%fStencil%QQ

        ! velocity times lattice unit velocity
        ucx =   real( layout%fStencil%cxDir( 1, iDir ), kind=rk) * u_x       &
          &   + real( layout%fStencil%cxDir( 2, iDir ), kind=rk) * u_y       &
          &   + real( layout%fStencil%cxDir( 3, iDir ), kind=rk) * u_z

        ! calculate equilibrium density
        fEq( iDir ) = layout%weight( iDir ) * ( rho + rho0 * ( ucx*cs2inv      &
          &         + ucx*ucx*t2cs4inv - usq*t2cs2inv ) )

      enddo

      ! effective omega
      omega_tot = fieldProp(1)%fluid%viscKine%omLvl(level)%val(iElem) 

      ! relaxation
      outState(?SAVE?( q000,1,iElem,QQ,QQ, nElems,neigh)) =  &
        &      f000 - omega_tot*(f000 - fEq(q000))
      outState(?SAVE?(q110,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f110 - omega_tot*(f110 - fEq(q110))
      outState(?SAVE?(qNN0,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      fNN0 - omega_tot*(fNN0 - fEq(qNN0))
      outState(?SAVE?(qN10,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      fN10 - omega_tot*(fN10 - fEq(qN10))
      outState(?SAVE?(q1N0,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f1N0 - omega_tot*(f1N0 - fEq(q1N0))
      outState(?SAVE?(q101,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f101 - omega_tot*(f101 - fEq(q101))
      outState(?SAVE?(qN0N,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      fN0N - omega_tot*(fN0N - fEq(qN0N))
      outState(?SAVE?(qN01,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      fN01 - omega_tot*(fN01 - fEq(qN01))
      outState(?SAVE?(q10N,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f10N - omega_tot*(f10N - fEq(q10N))
      outState(?SAVE?(q011,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f011 - omega_tot*(f011 - fEq(q011))
      outState(?SAVE?(q0NN,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f0NN - omega_tot*(f0NN - fEq(q0NN))
      outState(?SAVE?(q0N1,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f0N1 - omega_tot*(f0N1 - fEq(q0N1))
      outState(?SAVE?(q01N,1,iElem,QQ,QQ,nElems,neigh))  =   &
        &      f01N - omega_tot*(f01N - fEq(q01N))
      outState(?SAVE?(q010,1,iElem,QQ,QQ,nElems,neigh )) =   &
        &      f010 - omega_tot*(f010 - fEq(q010))
      outState(?SAVE?(q0N0,1,iElem,QQ,QQ,nElems,neigh )) =   &
        &      f0N0 - omega_tot*(f0N0 - fEq(q0N0))
      outState(?SAVE?(q100,1,iElem,QQ,QQ,nElems,neigh )) =   &
        &      f100 - omega_tot*(f100 - fEq(q100))
      outState(?SAVE?(qN00,1,iElem,QQ,QQ,nElems,neigh )) =   &
        &      fN00 - omega_tot*(fN00 - fEq(qN00))
      outState(?SAVE?(q00N,1,iElem,QQ,QQ,nElems,neigh )) =   &
        &      f00N - omega_tot*(f00N - fEq(q00N))
      outState(?SAVE?(q001,1,iElem,QQ,QQ,nElems,neigh )) =   &
        &      f001 - omega_tot*(f001 - fEq(q001))

    enddo nodeloop
!$omp end do nowait

  end subroutine bgk_advRel_d3q19_incomp_LES_new
! ****************************************************************************** !


end module mus_d3q19_turb_module
! ****************************************************************************** !
