! Copyright (c) 2015-2016 Jiaxing Qi <jiaxing.qi@uni-siegen.de>
! Copyright (c) 2016 Tobias Schneider <tobias1.schneider@student.uni-siegen.de>
! Copyright (c) 2016 Verena Krupp <verena.krupp@uni-siegen.de>
! Copyright (c) 2018-2020 Kannan Masilamani <kannan.masilamani@uni-siegen.de>
! Copyright (c) 2018 Raphael Haupt <raphael.haupt@uni-siegen.de>
! Copyright (c) 2019 Harald Klimach <harald.klimach@uni-siegen.de>
! Copyright (c) 2020 Peter Vitt <peter.vitt2@uni-siegen.de>
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions are met:
!
! 1. Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
!
! 2. Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
!
! THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF SIEGEN “AS IS” AND ANY EXPRESS
! OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
! OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
! IN NO EVENT SHALL UNIVERSITY OF SIEGEN OR CONTRIBUTORS BE LIABLE FOR ANY
! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
! ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
! ****************************************************************************** !
?? include 'header/lbm_macros.inc'
?? include 'header/lbm_d3q19Macros.inc'
?? include 'treelm/source/logMacros.inc'
!> summary: Interpolation of flow quantities between different grid levels
!!
!! Ghost elements are employed at grid level interfaces to provide valid
!! pdf values to the neighboring fluid elements. This way, the solvers can
!! act on elements of the same size only, treating the levels successively.
!! Target elements are the ghost elements, which have to be filled with
!! valid values.
!! Source elements are the fluid elements from other levels, from where to
!! take the input values for the interpolation.
!! The target ghost elements on the target level have corresponding source
!! fluid elements on the source level.
!!
!! [[tem_topology_module]] For a detailed description of the grid
!!
module mus_interpolate_d3q19_module
  use iso_c_binding, only: c_loc, c_ptr, c_f_pointer

  ! include treelm modules
  use env_module,              only: rk
  use tem_param_module,        only: cs2inv, cs2, div1_2, &
    &                                div1_36, div1_8, div3_4h, div1_4, div2_3, &
    &                                rho0
  use tem_element_module,      only: eT_ghostFromFiner
  use tem_construction_module, only: tem_levelDesc_type
  use tem_debug_module,        only: dbgUnit
  use tem_logging_module,      only: logUnit
  use tem_varSys_module,       only: tem_varSys_type
  use tem_time_module,         only: tem_time_type

  ! include musubi modules
  use mus_scheme_layout_module,      only: mus_scheme_layout_type
  use mus_derVarPos_module,          only: mus_derVarPos_type
  use mus_physics_module,            only: mus_physics_type
  use mus_interpolate_header_module, only: mus_interpolation_method_type
  use mus_field_prop_module,         only: mus_field_prop_type
  use mus_fluid_module,              only: mus_fluid_type
  use mus_derVarPos_module,          only: mus_derVarPos_type
  ! use mus_interpolate_tools_module,  only: get_fNeqFac_f2c, &
    ! &                                      get_fNeqFac_c2f
  ! use mus_derivedQuantities_module2, only: secondMom
  ! use mus_moments_module,            only: get_moment

  implicit none

  private

  public ::  get_polynomial_3D
  public :: eval_polynomial_3D
  public :: fillCoarser_compactD3Q19

  integer, parameter :: QQ = 19

contains

! ****************************************************************************** !
  pure function eval_polynomial_3D( a, b, c, coord ) result( res )
    ! ---------------------------------------------------------------------------
    real(kind=rk), intent(in) :: a(10), b(10), c(10)
    ! coordinates(x,y,z) of target position
    real(kind=rk), intent(in) :: coord(3)
    real(kind=rk)             :: res(3)
    real(kind=rk)             :: xx, yy, zz, xy, yz, xz
    ! ---------------------------------------------------------------------------

    xx = coord(1)*coord(1)
    yy = coord(2)*coord(2)
    zz = coord(3)*coord(3)
    xy = coord(1)*coord(2)
    yz = coord(2)*coord(3)
    xz = coord(1)*coord(3)

    ! Evaluate polynomial
    res(1) = a(1) + a(2)*coord(1) + a(3)*coord(2) + a(4)*coord(3) &
      &           + a(5)*xx       + a(6)*yy       + a(7)*zz &
      &           + a(8)*xy       + a(9)*yz       + a(10)*xz
    res(2) = b(1) + b(2)*coord(1) + b(3)*coord(2) + b(4)*coord(3) &
      &           + b(5)*xx       + b(6)*yy       + b(7)*zz &
      &           + b(8)*xy       + b(9)*yz       + b(10)*xz
    res(3) = c(1) + c(2)*coord(1) + c(3)*coord(2) + c(4)*coord(3) &
      &           + c(5)*xx       + c(6)*yy       + c(7)*zz &
      &           + c(8)*xy       + c(9)*yz       + c(10)*xz

  end function eval_polynomial_3D
! ****************************************************************************** !

! ****************************************************************************** !
  !> Construct polynomial and interpolate at target position.
  !! To construct polynomial, velocity and strain rate of 4 source locations
  !! are needed:
  !! H(0,0,0), K(1,1,0), M(1,0,1), N(0,1,1)
  subroutine get_polynomial_3D( u, s, a, b, c )
    ! ---------------------------------------------------------------------------
    ! Velocity (Ux, Uy, Uz)
    real(kind=rk), intent(in) :: u(3,4)
    ! Strain rate (Sxx,Syy,Szz,Sxy,Syz,Sxz)
    real(kind=rk), intent(in) :: s(6,4)
    real(kind=rk), intent(out) :: a(10), b(10), c(10)
    ! ---------------------------------------------------------------------------
    real(kind=rk) :: a0,ax,ay,az,axx,ayy,azz,axy,ayz,axz
    real(kind=rk) :: b0,bx,by,bz,bxx,byy,bzz,bxy,byz,bxz
    real(kind=rk) :: c0,cx,cy,cz,cxx,cyy,czz,cxy,cyz,cxz
    real(kind=rk) :: P, Q, R
    real(kind=rk) :: Pa, Qa, Ra
    real(kind=rk) :: Pb, Qb, Rb
    real(kind=rk) :: Pc, Qc, Rc
    ! ---------------------------------------------------------------------------

    a0 = u(1,1)
    b0 = u(2,1)
    c0 = u(3,1)

    axx = ( -s(1,1) + s(1,2) + s(1,3) - s(1,4) ) * div1_4
    axy = ( -s(1,1) + s(1,2) - s(1,3) + s(1,4) ) * div1_2
    axz = ( -s(1,1) - s(1,2) + s(1,3) + s(1,4) ) * div1_2

    bxy = ( -s(2,1) + s(2,2) + s(2,3) - s(2,4) ) * div1_2
    byy = ( -s(2,1) + s(2,2) - s(2,3) + s(2,4) ) * div1_4
    byz = ( -s(2,1) - s(2,2) + s(2,3) + s(2,4) ) * div1_2

    cxz = ( -s(3,1) + s(3,2) + s(3,3) - s(3,4) ) * div1_2
    cyz = ( -s(3,1) + s(3,2) - s(3,3) + s(3,4) ) * div1_2
    czz = ( -s(3,1) - s(3,2) + s(3,3) + s(3,4) ) * div1_4

    !  axy + 2bxx = ( -s(4,1) + s(4,2) + s(4,3) - s(4,4) )
              bxx = ( -s(4,1) + s(4,2) + s(4,3) - s(4,4) - axy ) * div1_2
    ! 2ayy +  bxy = ( -s(4,1) + s(4,2) - s(4,3) + s(4,4) )
       ayy        = ( -s(4,1) + s(4,2) - s(4,3) + s(4,4) - bxy ) * div1_2
                P =   -s(4,1) - s(4,2) + s(4,3) + s(4,4)

    !  byz + 2cyy = ( -s(5,1) + s(5,2) - s(5,3) + s(5,4) )
              cyy = ( -s(5,1) + s(5,2) - s(5,3) + s(5,4) - byz ) * div1_2
    ! 2bzz +  cyz = ( -s(5,1) - s(5,2) + s(5,3) + s(5,4) )
       bzz        = ( -s(5,1) - s(5,2) + s(5,3) + s(5,4) - cyz ) * div1_2
                Q =   -s(5,1) + s(5,2) + s(5,3) - s(5,4)

    !  axz + 2cxx = ( -s(6,1) + s(6,2) + s(6,3) - s(6,4) )
              cxx = ( -s(6,1) + s(6,2) + s(6,3) - s(6,4) - axz ) * div1_2
    ! 2azz +  cxz = ( -s(6,1) - s(6,2) + s(6,3) + s(6,4) )
       azz        = ( -s(6,1) - s(6,2) + s(6,3) + s(6,4) - cxz ) * div1_2
                R =   -s(6,1) + s(6,2) - s(6,3) + s(6,4)

    ! P == ayz + bzx
    ! Q == bzx + cxy
    ! R == ayz + cxy
    ayz = (   P - Q + R ) * div1_2
    bxz = (   P + Q - R ) * div1_2
    cxy = ( - P + Q + R ) * div1_2

    Pa = u(1,2) - a0 - axx - ayy - axy
    Qa = u(1,3) - a0 - axx - azz - axz
    Ra = u(1,4) - a0 - ayy - azz - ayz
    ax = (  Pa + Qa - Ra ) * div1_2
    ay = (  Pa - Qa + Ra ) * div1_2
    az = ( -Pa + Qa + Ra ) * div1_2

    Pb = u(2,2) - b0 - bxx - byy - bxy
    Qb = u(2,3) - b0 - bxx - bzz - bxz
    Rb = u(2,4) - b0 - byy - bzz - byz
    bx = (  Pb + Qb - Rb ) * div1_2
    by = (  Pb - Qb + Rb ) * div1_2
    bz = ( -Pb + Qb + Rb ) * div1_2

    Pc = u(3,2) - c0 - cxx - cyy - cxy
    Qc = u(3,3) - c0 - cxx - czz - cxz
    Rc = u(3,4) - c0 - cyy - czz - cyz
    cx = (  Pc + Qc - Rc ) * div1_2
    cy = (  Pc - Qc + Rc ) * div1_2
    cz = ( -Pc + Qc + Rc ) * div1_2

    a = [a0,ax,ay,az,axx,ayy,azz,axy,ayz,axz]
    b = [b0,bx,by,bz,bxx,byy,bzz,bxy,byz,bxz]
    c = [c0,cx,cy,cz,cxx,cyy,czz,cxy,cyz,cxz]

  end subroutine get_polynomial_3D
! ****************************************************************************** !

! ****************************************************************************** !
  !> No comment yet!
  !!
  !! @TODO add comment
  !!
  !! This subroutine's interface must match the abstract interface definition
  !! [[intpRoutine]] in intp/[[mus_interpolate_header_module]].f90 in order to
  !! be callable via [[mus_interpolation_method_type:do_intp]] function pointer.
  subroutine fillCoarser_compactd3q19( method, fieldProp, tLevelDesc, level, &
    &                                  sState, snSize, tState, tnSize,       &
    &                                  tAuxField, layout, nTargets,          &
    &                                  targetList, physics, time, varSys,    &
    &                                  derVarPos                             )
    ! -------------------------------------------------------------------- !
    class(mus_interpolation_method_type), intent(in) :: method

    !> Array of field properties (fluid or species)
    type(mus_field_prop_type), target, intent(in) :: fieldProp(:)

    !> level descriptor on target level
    type( tem_levelDesc_type ), intent(in) :: tLevelDesc

    !> my refinement level
    integer, intent(in) :: level

    !> State vector of SOURCE FLUID elements
    real(kind=rk), intent(in) :: sState(:)
    ! integer, intent(in) :: sNeigh(:)
    integer, intent(in) :: snSize

    !> State vector of TARGET GHOST elements
    real(kind=rk), intent(inout) :: tState(:)
    ! integer, intent(in) :: tNeigh(:)
    integer, intent(in) :: tnSize

    !> AuxField variable to fill on target GHOST elements
    real(kind=rk), intent(inout) :: tAuxField(:)

    !> the layout used
    type( mus_scheme_layout_type ), intent(in) :: layout

    !> List of target elements ( their position in depSource list )
    integer, intent(in) :: nTargets
    integer, intent(in) :: targetList(nTargets)

    !> physics type to convert lattice to physics SI unit and vice versa
    !! @todo: This can be replaced by scale factor by level
    type( mus_physics_type ), intent(in) :: physics

    !> time required to compute viscosity on target element barycenter
    type(tem_time_type), intent(in) :: time

    !> scheme variable system
    type( tem_varSys_type ), intent(in) :: varSys

    !> position of all derive variable in varSys for all fields
    type(mus_derVarPos_type), intent(in) :: derVarPos(:)
    ! -------------------------------------------------------------------- !
    integer :: sourceLevel    ! level of source elements
    integer :: targetLevel    ! level of target elements
    integer :: pos
    integer :: iElem, indElem, ii
    ! macroscopic velocities for all source elements
    real(kind=rk) :: fEq(QQ), fNeq(QQ)
    real(kind=rk) :: rho, vel(3), S(6)
    real(kind=rk) :: omegaSource, omegaTarget
    real(kind=rk) :: pFac, vFac, sFac, toS, toNeq, sss
    !real(kind=rk) :: a(10), b(10), c(10)
    real(kind=rk) :: mom(1:10,4)
    integer :: src, iS
?? copy :: fEq_d3q19_var
    real(kind=rk) :: a0,axx,ayy,azz,axy,ayz,axz !,ax,ay,az
    real(kind=rk) :: b0,bxx,byy,bzz,bxy,byz,bxz !,bx,by,bz
    real(kind=rk) :: c0,cxx,cyy,czz,cxy,cyz,cxz !,cx,cy,cz
    real(kind=rk) :: P, Q, R
    real(kind=rk) :: Pa, Qa, Ra
    real(kind=rk) :: Pb, Qb, Rb
    real(kind=rk) :: Pc, Qc, Rc
    type(mus_fluid_type), pointer :: fluid
    integer :: nScalars
    real(kind=rk), allocatable :: momBuf(:,:)
    ! ---------------------------------------------------------------------------
    fluid => fieldProp(1)%fluid
    nScalars = varSys%nScalars

    sourceLevel = level + 1
    targetLevel = level
    !KM: assuming viscosity is constant
    omegaSource = fluid%viscKine%omLvl( sourceLevel )%val(1)
    omegaTarget = fluid%viscKine%omLvl( targetLevel )%val(1)
    ! pressure and velocity conversion factor between levels
    pFac = physics%pfac( sourceLevel, targetLevel )
    vFac = physics%vfac( sourceLevel, targetLevel )
    sFac = physics%sfac( sourceLevel, targetLevel )

    ! factor for strain rate calculation
    toS = omegaSource * cs2inv / ?post2pre?( omegaSource ) * div1_2
    toNeq = cs2inv / omegaTarget * ?pre2post?( omegaTarget )

    allocate( momBuf( 10, tLevelDesc%sourceFromFiner%nVals ) )

    call calc_moments(state = sState, &
      &               nScalars = nScalars, &
      &               nSize = snSize, &
      &               elementList = tLevelDesc%sourceFromFiner%val, &
      &               nElems      = tLevelDesc%sourceFromFiner%nVals, &
      &               buffer = momBuf, &
      &               toS  = toS )

    !$omp do schedule(static)
    !DIR$ IVDEP
    !NEC$ ivdep
    do indElem = 1, nTargets

      iElem = targetList( indElem )

      !NEC$ unroll(4)
      do iS = 1, 4
        src = tLevelDesc%depFromFiner( iElem )%elemBuffer%val(iS)
        !NEC$ unroll(4)
        mom(1:10, iS) = momBuf(1:10, src )
      end do

    ! Compact INTP -----------------------------------------------------------

    ! call get_polynomial_3D( mom(2:4,1:4), mom(5:10,1:4), a, b, c)

    a0 = mom(2,1)
    b0 = mom(3,1)
    c0 = mom(4,1)

    axx = ( -mom(5,1) + mom(5,2) + mom(5,3) - mom(5,4) ) * div1_4
    axy = ( -mom(5,1) + mom(5,2) - mom(5,3) + mom(5,4) ) * div1_2
    axz = ( -mom(5,1) - mom(5,2) + mom(5,3) + mom(5,4) ) * div1_2

    bxy = ( -mom(6,1) + mom(6,2) + mom(6,3) - mom(6,4) ) * div1_2
    byy = ( -mom(6,1) + mom(6,2) - mom(6,3) + mom(6,4) ) * div1_4
    byz = ( -mom(6,1) - mom(6,2) + mom(6,3) + mom(6,4) ) * div1_2

    cxz = ( -mom(7,1) + mom(7,2) + mom(7,3) - mom(7,4) ) * div1_2
    cyz = ( -mom(7,1) + mom(7,2) - mom(7,3) + mom(7,4) ) * div1_2
    czz = ( -mom(7,1) - mom(7,2) + mom(7,3) + mom(7,4) ) * div1_4

    bxx = ( -mom( 8,1) + mom( 8,2) + mom( 8,3) - mom( 8,4) - axy ) * div1_2
    cyy = ( -mom( 9,1) + mom( 9,2) - mom( 9,3) + mom( 9,4) - byz ) * div1_2
    cxx = ( -mom(10,1) + mom(10,2) + mom(10,3) - mom(10,4) - axz ) * div1_2

    ayy = ( -mom( 8,1) + mom( 8,2) - mom( 8,3) + mom( 8,4) - bxy ) * div1_2
    bzz = ( -mom( 9,1) - mom( 9,2) + mom( 9,3) + mom( 9,4) - cyz ) * div1_2
    azz = ( -mom(10,1) - mom(10,2) + mom(10,3) + mom(10,4) - cxz ) * div1_2

    P =   -mom( 8,1) - mom( 8,2) + mom( 8,3) + mom( 8,4)
    Q =   -mom( 9,1) + mom( 9,2) + mom( 9,3) - mom( 9,4)
    R =   -mom(10,1) + mom(10,2) - mom(10,3) + mom(10,4)

    ! P == ayz + bzx
    ! Q == bzx + cxy
    ! R == ayz + cxy
    ayz = (   P - Q + R ) * div1_2
    bxz = (   P + Q - R ) * div1_2
    cxy = ( - P + Q + R ) * div1_2

    Pa = mom(2,2) - a0 - axx - ayy - axy
    Qa = mom(2,3) - a0 - axx - azz - axz
    Ra = mom(2,4) - a0 - ayy - azz - ayz
    ! ax = (  Pa + Qa - Ra ) * div1_2
    ! ay = (  Pa - Qa + Ra ) * div1_2
    ! az = ( -Pa + Qa + Ra ) * div1_2

    Pb = mom(3,2) - b0 - bxx - byy - bxy
    Qb = mom(3,3) - b0 - bxx - bzz - bxz
    Rb = mom(3,4) - b0 - byy - bzz - byz
    ! bx = (  Pb + Qb - Rb ) * div1_2
    ! by = (  Pb - Qb + Rb ) * div1_2
    ! bz = ( -Pb + Qb + Rb ) * div1_2

    Pc = mom(4,2) - c0 - cxx - cyy - cxy
    Qc = mom(4,3) - c0 - cxx - czz - cxz
    Rc = mom(4,4) - c0 - cyy - czz - cyz
    ! cx = (  Pc + Qc - Rc ) * div1_2
    ! cy = (  Pc - Qc + Rc ) * div1_2
    ! cz = ( -Pc + Qc + Rc ) * div1_2

    ! vel(1) = ( a0 + (ax+ay+az)*div1_2 + (axx+ayy+azz+axy+ayz+axz)*div1_4 ) * vFac
    ! vel(2) = ( b0 + (bx+by+bz)*div1_2 + (bxx+byy+bzz+bxy+byz+bxz)*div1_4 ) * vFac
    ! vel(3) = ( c0 + (cx+cy+cz)*div1_2 + (cxx+cyy+czz+cxy+cyz+cxz)*div1_4 ) * vFac
    ! ax+ay+az = ( Pa + Qa + Ra ) * div1_2
    vel(1) = ( a0 + (Pa+Qa+Ra+axx+ayy+azz+axy+ayz+axz)*div1_4 ) * vFac
    vel(2) = ( b0 + (Pb+Qb+Rb+bxx+byy+bzz+bxy+byz+bxz)*div1_4 ) * vFac
    vel(3) = ( c0 + (Pc+Qc+Rc+cxx+cyy+czz+cxy+cyz+cxz)*div1_4 ) * vFac
    ! Compact INTP -----------------------------------------------------------

      ! vel(1) = (a(1) + sum(a(2:4))*div1_2 + sum(a(5:10))*div1_4) * vFac
      ! vel(2) = (b(1) + sum(b(2:4))*div1_2 + sum(b(5:10))*div1_4) * vFac
      ! vel(3) = (c(1) + sum(c(2:4))*div1_2 + sum(c(5:10))*div1_4) * vFac

      ! Linear intp p and S
      rho = (mom(1,1)+mom(1,2)+mom(1,3)+mom(1,4)) * div1_4 * pFac + rho0

      !NEC$ unroll(4)
      S(1:6) = (mom(5:10,1)+mom(5:10,2)+mom(5:10,3)+mom(5:10,4)) * div1_4 * sFac

?? copy :: fEq_d3q19_a( fEq, vel(1), vel(2), vel(3), rho, rho0 )
?? copy :: second_moment_d3q19( fNeq, S )

      pos = iElem + tLevelDesc%offset( 1, eT_ghostFromFiner)
      !NEC$ unroll(4)
      do ii = 1, 19
        tState(?IDX?(ii, pos, nScalars, tnSize)) = fEq(ii) - layout%weight(ii)*fNeq(ii)*toNeq
      end do

    end do ! indElem
    !$omp end do nowait

?? IF (loglvl >= 1) THEN
write(dbgUnit(1), *) '---- Leave fillCoarser_compactd3q19  -----------'
write(dbgUnit(1), *) ''
?? ENDIF

  end subroutine fillCoarser_compactd3q19
! ****************************************************************************** !

! ****************************************************************************** !
  pure function getStrain( f, rho, vel, omega, cxDir ) result( S )
    ! ---------------------------------------------------------------------------
    !> stencil layout
    integer, intent(in) :: cxDir(3,QQ)
    !> pdf array ( post-collision value )
    real(kind=rk), intent(in) :: f(QQ), rho, vel(3)
    !> relaxation parameter
    real(kind=rk), intent(in) :: omega
    !> output array: strain rate tensor
    real(kind=rk) :: S(3,3)
    ! ---------------------------------------------------------------------------
    real(kind=rk) :: fac
    ! ---------------------------------------------------------------------------

    S(1,1) = - f(1) - f(4) - sum(f(11:18))                + vel(1) * vel(1) + cs2 * rho
    S(2,2) = - f(2) - f(5) - sum(f(7:10)) - sum(f(15:18)) + vel(2) * vel(2) + cs2 * rho
    S(3,3) = - f(3) - f(6) - sum(f(7:10)) - sum(f(11:14)) + vel(3) * vel(3) + cs2 * rho

    S(2,1) = - f(15) + f(16) + f(17) - f(18) + vel(2) * vel(1)
    S(3,2) = - f( 7) + f( 8) + f( 9) - f(10) + vel(3) * vel(2)
    S(3,1) = - f(11) + f(12) + f(13) - f(14) + vel(3) * vel(1)
    S(1,2) = S(2,1)
    S(2,3) = S(3,2)
    S(1,3) = S(3,1)

    ! Multiply factor and convert to pre-collision value
    fac = omega * cs2inv / ?post2pre?( omega ) * div1_2
    s = s * fac

  end function getStrain
! ****************************************************************************** !

! ****************************************************************************** !
  pure function getNEq( S, omega, cxDirRK, weight ) result( nEq )
    ! ---------------------------------------------------------------------------
    !> Strain rate tensor. It is a symmetric 3x3 matrix
    real(kind=rk), intent(in) :: S(3,3)
    real(kind=rk), intent(in) :: cxDirRK(3,QQ)
    real(kind=rk), intent(in) :: weight(QQ)
    real(kind=rk), intent(in) :: omega
    real(kind=rk) :: nEq(QQ)
    ! ---------------------------------------------------------------------------
    integer :: iDir
    real(kind=rk) :: fac
    ! ---------------------------------------------------------------------------

    do iDir = 1, QQ
      nEq(iDir) =   s(1,1) * ( cxDirRK(1,iDir) * cxDirRK(1,iDir) - cs2 ) &
        &         + s(2,2) * ( cxDirRK(2,iDir) * cxDirRK(2,iDir) - cs2 ) &
        &         + s(3,3) * ( cxDirRK(3,iDir) * cxDirRK(3,iDir) - cs2 ) &
        &         + s(1,2) *   cxDirRK(1,iDir) * cxDirRK(2,iDir) * 2.0_rk &
        &         + s(1,3) *   cxDirRK(1,iDir) * cxDirRK(3,iDir) * 2.0_rk &
        &         + s(2,3) *   cxDirRK(2,iDir) * cxDirRK(3,iDir) * 2.0_rk
    end do
    fac = cs2inv / omega * ?pre2post?( omega )
    nEq(:) = - weight(:) * nEq(:) * fac

  end function getNEq
! ****************************************************************************** !

! ****************************************************************************** !
  subroutine calc_moments(state, nScalars, nSize, elementList, nElems, buffer, toS )
    real(kind=rk), intent(in) :: state(:)
    integer, intent(in) :: nScalars
    integer, intent(in) :: nSize
    integer, intent(in) :: nElems
    integer, intent(in) :: elementList(nElems)
    real(kind=rk), intent(in) :: toS
    real(kind=rk), intent(out) :: buffer(10,nElems)

    real(kind=rk) :: cc1, cc2, cc3
    real(kind=rk) :: f(QQ), rho, vel(3), S(6)
    integer :: iElem, sourceElem, ii

    !$omp do schedule(static)
    !DIR$ IVDEP
    !NEC$ ivdep
    do iElem = 1, nElems

      sourceElem = elementList(iElem)
      !NEC$ unroll(4)
      do ii = 1, 10
        f(ii) = state( ?IDX?(  ii, sourceElem, nScalars, nSize ))
      end do
      !NEC$ unroll(4)
      do ii = 11, QQ
        f(ii) = state( ?IDX?(  ii, sourceElem, nScalars, nSize ))
      end do

      cc1 = f( 7) + f( 8) + f( 9) + f(10)
      cc2 = f(11) + f(12) + f(13) + f(14)
      cc3 = f(15) + f(16) + f(17) + f(18)

      rho = cc1 + cc2 + cc3 + f(19) + f(1) + f(2) + f(3) + f(4) + f(5) + f(6)

      ! ux uy uz
?? copy :: vel_d3q19( vel(1), vel(2), vel(3), f, rho0 )

      S(1) = - f(1) - f(4) - cc2 - cc3 + vel(1) * vel(1) + cs2 * rho
      S(2) = - f(2) - f(5) - cc1 - cc3 + vel(2) * vel(2) + cs2 * rho
      S(3) = - f(3) - f(6) - cc1 - cc2 + vel(3) * vel(3) + cs2 * rho
      S(4) = - f(15) + f(16) + f(17) - f(18) + vel(2) * vel(1)
      S(5) = - f( 7) + f( 8) + f( 9) - f(10) + vel(3) * vel(2)
      S(6) = - f(11) + f(12) + f(13) - f(14) + vel(3) * vel(1)

      buffer(1,iElem) = rho - rho0
      buffer(2,iElem) = vel(1)
      buffer(3,iElem) = vel(2)
      buffer(4,iElem) = vel(3)
      !NEC$ unroll(4)
      buffer(5:10,iElem) = S(1:6) * toS

    end do
!$omp end do
! Here is a implicit barrier, because we can go further until momBuf is complete

  end subroutine calc_moments
! ****************************************************************************** !

end module mus_interpolate_d3q19_module
