! Copyright (c) 2011-2013 Manuel Hasert <m.hasert@grs-sim.de>
! Copyright (c) 2011-2014 Simon Zimny <s.zimny@grs-sim.de>
! Copyright (c) 2011, 2013 Harald Klimach <harald.klimach@uni-siegen.de>
! Copyright (c) 2011-2016, 2019-2020 Kannan Masilamani <kannan.masilamani@uni-siegen.de>
! Copyright (c) 2011 Jan Hueckelheim <j.hueckelheim@grs-sim.de>
! Copyright (c) 2012-2016 Jiaxing Qi <jiaxing.qi@uni-siegen.de>
! Copyright (c) 2012, 2014 Kartik Jain <kartik.jain@uni-siegen.de>
! Copyright (c) 2016 Tobias Schneider <tobias1.schneider@student.uni-siegen.de>
! Copyright (c) 2018 Raphael Haupt <raphael.haupt@uni-siegen.de>
! Copyright (c) 2020 Peter Vitt <peter.vitt2@uni-siegen.de>
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions are met:
!
! 1. Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
!
! 2. Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
!
! THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF SIEGEN “AS IS” AND ANY EXPRESS
! OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
! OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
! IN NO EVENT SHALL UNIVERSITY OF SIEGEN OR CONTRIBUTORS BE LIABLE FOR ANY
! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
! ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
?? include 'header/lbm_macros.inc'
?? include 'treelm/source/logMacros.inc'
! ****************************************************************************** !
!> This module provides the definition and methods for
!! BGK advection relaxation scheme.
module mus_bgk_module
  use iso_c_binding, only: c_f_pointer

  ! include treelm modules
  use env_module,               only: rk
  use tem_varSys_module,        only: tem_varSys_type !, tem_varSys_op_type
  use tem_param_module,         only: div1_3, div1_36, div1_8, div3_4h, div1_4,&
    &                                 div3_8, div9_16, div3_16, cs2inv, cs4inv,&
    &                                 rho0
  ! use tem_spacetime_fun_module, only: tem_st_fun_listElem_type,                &
  !   &                                 tem_spacetime_for
  ! use tem_logging_module,       only: logUnit
  ! use tem_dyn_array_module,     only: PositionOfVal
  ! use tem_aux_module,           only: tem_abort

  ! include musubi modules
  use mus_field_prop_module,    only: mus_field_prop_type
  use mus_scheme_layout_module, only: mus_scheme_layout_type
  use mus_scheme_type_module,   only: mus_scheme_type
  use mus_derVarPos_module,     only: mus_derVarPos_type
  use mus_param_module,         only: mus_param_type
  use mus_varSys_module,        only: mus_varSys_data_type

  implicit none

  private

  public :: bgk_advRel_generic
  public :: bgk_advRel_generic_incomp
  public :: bgk_advRel_forcing
  public :: bgk_advRel_flekkoy
  public :: bgk_advRel_flekkoy_noFluid

contains

! ****************************************************************************** !
  !> Advection relaxation routine for the BGK model.
  !!
  !! This subroutine interface must match the abstract interface definition
  !! [[kernel]] in scheme/[[mus_scheme_type_module]].f90 in order to be callable
  !! via [[mus_scheme_type:compute]] function pointer.
  subroutine bgk_advRel_forcing( fieldProp, inState, outState, auxField, &
    &                            neigh, nElems, nSolve, level, layout,   &
    &                            params, varSys, derVarPos               )
    ! -------------------------------------------------------------------- !
    !> Array of field properties (fluid or species)
    type(mus_field_prop_type), intent(in) :: fieldProp(:)
    !> variable system definition
    type(tem_varSys_type), intent(in) :: varSys
    !> current layout
    type(mus_scheme_layout_type), intent(in) :: layout
    !> number of elements in state Array
    integer, intent(in) :: nElems
    !> input  pdf vector
    real(kind=rk), intent(in)  ::  inState(nElems * varSys%nScalars)
    !> output pdf vector
    real(kind=rk), intent(out) :: outState(nElems * varSys%nScalars)
    !> Auxiliary field computed from pre-collision state
    !! Is updated with correct velocity field for multicomponent models
    real(kind=rk), intent(inout) :: auxField(nElems * varSys%nAuxScalars)
    !> connectivity vector
    integer, intent(in) :: neigh(nElems * layout%fStencil%QQ)
    !> number of elements solved in kernel
    integer, intent(in) :: nSolve
    !> current level
    integer,intent(in) :: level
    !> global parameters
    type(mus_param_type),intent(in) :: params
    !> position of derived quantities in varsys for all fields
    type( mus_derVarPos_type ), intent(in) :: derVarPos(:)
    ! -------------------------------------------------------------------- !
    integer :: iElem, iDir, QQ, nScalars
    real(kind=rk) pdfTmp( layout%fStencil%QQ ) ! temporary local pdf values
    real(kind=rk) rho     ! local density
    real(kind=rk) force( layout%fStencil%QQ )  ! local x-velocity
    real(kind=rk) u_x     ! local x-velocity
    real(kind=rk) u_y     ! local y-velocity
    real(kind=rk) u_z     ! local z-velocity
    real(kind=rk) usq     ! square velocity
    ! derived constants
    real(kind=rk) usqn, usqn_o1, usqn_o2
    real(kind=rk) omega_2, cmpl_o, omega
    real(kind=rk) coeff_1, coeff_2
    real(kind=rk) ui1, ui3, ui10, ui11, ui12, ui13
    real(kind=rk) fac_1, fac_2, fac_3, fac_4, fac_9, fac_10, fac_11, fac_12,   &
      &           fac_13
    real(kind=rk) sum1_1, sum1_2, sum2_1, sum2_2, sum3_1, sum3_2, sum4_1,      &
      &           sum4_2, sum9_1, sum9_2, sum10_1, sum10_2, sum11_1, sum11_2,  &
      &           sum12_1, sum12_2, sum13_1, sum13_2
    integer :: dens_pos, vel_pos(3), elemOff
    ! ---------------------------------------------------------------------------
    dens_pos = varSys%method%val(derVarPos(1)%density)%auxField_varPos(1)
    vel_pos = varSys%method%val(derVarPos(1)%velocity)%auxField_varPos(1:3)

    ! some global variables
    QQ = layout%fStencil%QQ
    ! nElems = size(neigh)/QQ
    nScalars = varSys%nScalars

    !NEC$ ivdep
!$omp do schedule(static)
    nodeloop: do iElem=1,nSolve
      omega = fieldProp(1)%fluid%viscKine%omLvl(level)%val(iElem)
      omega_2 = 2._rk * omega
      cmpl_o  = 1._rk - omega

      ! First load all local values into temp array
      ! Generic! PUSH+PULL is possible
      pdfTmp(  1 ) = inState( ?FETCH?(  1, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  2 ) = inState( ?FETCH?(  2, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  3 ) = inState( ?FETCH?(  3, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  4 ) = inState( ?FETCH?(  4, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  5 ) = inState( ?FETCH?(  5, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  6 ) = inState( ?FETCH?(  6, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  7 ) = inState( ?FETCH?(  7, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  8 ) = inState( ?FETCH?(  8, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp(  9 ) = inState( ?FETCH?(  9, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 10 ) = inState( ?FETCH?( 10, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 11 ) = inState( ?FETCH?( 11, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 12 ) = inState( ?FETCH?( 12, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 13 ) = inState( ?FETCH?( 13, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 14 ) = inState( ?FETCH?( 14, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 15 ) = inState( ?FETCH?( 15, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 16 ) = inState( ?FETCH?( 16, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 17 ) = inState( ?FETCH?( 17, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 18 ) = inState( ?FETCH?( 18, 1, iElem, QQ, nScalars, nElems,neigh ))
      pdfTmp( 19 ) = inState( ?FETCH?( 19, 1, iElem, QQ, nScalars, nElems,neigh ))

      elemOff = (iElem-1)*varSys%nAuxScalars
      ! local density
      rho = auxField(elemOff + dens_pos)
      ! local x-, y- and z-velocity
      u_x = auxField(elemOff + vel_pos(1))
      u_y = auxField(elemOff + vel_pos(2))
      u_z = auxField(elemOff + vel_pos(3))

      ! square velocity and derived constants
      usq = u_x*u_x + u_y*u_y + u_z*u_z
      usqn = div1_36 * (1._rk - 1.5_rk * usq) * rho

      ! Calculate the force for all directions
      do iDir = 1, layout%fStencil%QQ
        force( iDir ) = ( 1._rk -0.5_rk*omega )*layout%Weight( iDir )          &
          &           * ((( layout%fStencil%cxDir( 1, iDir ) - u_x  )        &
          &           * cs2inv + ( layout%fStencil%cxDir( 1, iDir ) * u_x    &
          &           +            layout%fStencil%cxDir( 2, iDir ) * u_y    &
          &           +            layout%fStencil%cxDir( 3, iDir ) * u_z )  &
          &           *     cs4inv*layout%fStencil%cxDir( 1, iDir ))         &
          &           * fieldProp(1)%fluid%force( 1 )                          &
          &           + (( layout%fStencil%cxDir( 2, iDir ) - u_y ) * cs2inv &
          &           +  ( layout%fStencil%cxDir( 2, iDir ) * u_x            &
          &           +    layout%fStencil%cxDir( 2, iDir ) * u_y            &
          &           +    layout%fStencil%cxDir( 3, iDir ) * u_z )          &
          &           * cs4inv*layout%fStencil%cxDir( 2, iDir ))             &
          &           * fieldProp(1)%fluid%force( 2 )                        &
          &           + (( layout%fStencil%cxDir( 3, iDir ) - u_z ) * cs2inv &
          &           +  ( layout%fStencil%cxDir( 1, iDir ) * u_x            &
          &           +    layout%fStencil%cxDir( 2, iDir ) * u_y            &
          &           +    layout%fStencil%cxDir( 3, iDir ) * u_z )          &
          &           * cs4inv*layout%fStencil%cxDir( 3, iDir ))             &
          &           * fieldProp(1)%fluid%force( 3 ))
      end do

      outState( ?SAVE?( 19,1, iElem, QQ, nScalars,nElems,neigh ))          &
        &   = pdfTmp(19)*cmpl_o+omega*rho*( div1_3-0.5_rk*usq ) + force( 19 )

      coeff_1 = div1_8 * omega * rho

      usqn_o1 = omega * usqn

      ui1     =  u_x + u_y
      fac_1   = coeff_1 * ui1
      sum1_1  = fac_1 * div3_4h
      sum1_2  = fac_1 * ui1 + usqn_o1

      outState( ?SAVE?( 18, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(18) *cmpl_o+sum1_1 +sum1_2  + force( 18 )
      outState( ?SAVE?( 15, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(15) *cmpl_o-sum1_1 +sum1_2  + force( 15 )

      ui3     = -u_x + u_y
      fac_3   = coeff_1 * ui3
      sum3_1  = fac_3 * div3_4h
      sum3_2  = fac_3 * ui3 + usqn_o1

      outState( ?SAVE?( 16, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(16) *cmpl_o+sum3_1 +sum3_2 + force( 16 )

      outState( ?SAVE?( 17, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(17) *cmpl_o-sum3_1 +sum3_2 + force( 17 )


      ui10    =  u_x + u_z
      fac_10  = coeff_1 * ui10
      sum10_1 = fac_10 * div3_4h
      sum10_2 = fac_10 * ui10 + usqn_o1

      outState( ?SAVE?( 14, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(14)*cmpl_o+sum10_1+sum10_2  + force( 14 )

      outState( ?SAVE?( 11, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(11)*cmpl_o-sum10_1+sum10_2  + force( 11 )


      ui12    = -u_x + u_z
      fac_12  = coeff_1 * ui12
      sum12_1 = fac_12 * div3_4h
      sum12_2 = fac_12 * ui12 + usqn_o1

      outState( ?SAVE?( 13, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(13)*cmpl_o+sum12_1+sum12_2  + force( 13 )

      outState( ?SAVE?( 12, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(12)*cmpl_o-sum12_1+sum12_2  + force( 12 )


      ui11    =  u_y + u_z
      fac_11  = coeff_1 * ui11
      sum11_1 = fac_11 * div3_4h
      sum11_2 = fac_11 * ui11 + usqn_o1

      outState( ?SAVE?( 10, 1, iElem, QQ, nScalars,nElems,neigh )) &
        &   = pdfTmp(10)*cmpl_o+sum11_1+sum11_2 + force( 10 )

      outState( ?SAVE?( 7, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &   = pdfTmp(7)*cmpl_o-sum11_1+sum11_2  + force( 7 )


      ui13    = -u_y + u_z
      fac_13  = coeff_1 * ui13
      sum13_1 = fac_13 * div3_4h
      sum13_2 = fac_13 * ui13 + usqn_o1

      outState( ?SAVE?( 8, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &   = pdfTmp(8)*cmpl_o+sum13_1+sum13_2  + force( 8 )

      outState( ?SAVE?( 9, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &  = pdfTmp(9)*cmpl_o-sum13_1+sum13_2  + force( 9 )


      coeff_2 = div1_8 * omega_2 * rho
      usqn_o2 = omega_2 * usqn

      fac_2   = coeff_2 * u_y
      sum2_1  = fac_2 * div3_4h
      sum2_2  = fac_2 * u_y + usqn_o2

      outState( ?SAVE?( 5, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &  = pdfTmp(5) *cmpl_o+sum2_1 +sum2_2  + force( 5 )

      outState( ?SAVE?( 2, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &  = pdfTmp(2) *cmpl_o-sum2_1 +sum2_2  + force( 2 )


      fac_4   = coeff_2 * u_x
      sum4_1  = fac_4 * div3_4h
      sum4_2  = fac_4 * u_x + usqn_o2

      outState( ?SAVE?( 1, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &    = pdfTmp(1) *cmpl_o-sum4_1 +sum4_2 + force( 1 )

      outState( ?SAVE?( 4, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &   = pdfTmp(4) *cmpl_o+sum4_1 +sum4_2  + force( 4 )


      fac_9   = coeff_2 * u_z
      sum9_1  = fac_9 * div3_4h
      sum9_2  = fac_9 * u_z + usqn_o2

      outState( ?SAVE?( 6, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &   = pdfTmp(6) *cmpl_o+sum9_1 +sum9_2  + force( 6 )

      outState( ?SAVE?( 3, 1, iElem, QQ, nScalars,nElems,neigh ))  &
        &   = pdfTmp(3) *cmpl_o-sum9_1 +sum9_2  + force( 3 )


    end do nodeloop
!$omp end do nowait

  end subroutine bgk_advRel_forcing
! **************************************************************************** !


! **************************************************************************** !
  !> Advection relaxation routine for the flekkoy diffusion model.
  !!
  !! This subroutine interface must match the abstract interface definition
  !! [[kernel]] in scheme/[[mus_scheme_type_module]].f90 in order to be callable
  !! via [[mus_scheme_type:compute]] function pointer.
  subroutine bgk_advRel_flekkoy( fieldProp, inState, outState, auxField, &
    &                            neigh, nElems, nSolve, level, layout,   &
    &                            params, varSys, derVarPos               )
    ! -------------------------------------------------------------------- !
    !> Array of field properties (fluid or species)
    type(mus_field_prop_type), intent(in) :: fieldProp(:)
    !> variable system definition
    type(tem_varSys_type), intent(in) :: varSys
    !> current layout
    type(mus_scheme_layout_type), intent(in) :: layout
    !> number of elements in state Array
    integer, intent(in) :: nElems
    !> input  pdf vector
    real(kind=rk), intent(in)  ::  inState(nElems * varSys%nScalars)
    !> output pdf vector
    real(kind=rk), intent(out) :: outState(nElems * varSys%nScalars)
    !> Auxiliary field computed from pre-collision state
    !! Is updated with correct velocity field for multicomponent models
    real(kind=rk), intent(inout) :: auxField(nElems * varSys%nAuxScalars)
    !> connectivity vector
    integer, intent(in) :: neigh(nElems * layout%fStencil%QQ)
    !> number of elements solved in kernel
    integer, intent(in) :: nSolve
    !> current level
    integer,intent(in) :: level
    !> global parameters
    type(mus_param_type),intent(in) :: params
    !> position of derived quantities in varsys for all fields
    type( mus_derVarPos_type ), intent(in) :: derVarPos(:)
    ! -------------------------------------------------------------------- !
    integer :: iElem, iDir
    type(mus_varSys_data_type), pointer :: fPtr
    type(mus_scheme_type), pointer :: scheme
    real(kind=rk) :: pdfTmp( layout%fStencil%QQ ) ! temporary local pdf values
    real(kind=rk) :: rho, feq
    real(kind=rk) :: d_omega
    real(kind=rk) :: transVel( nSolve*3 ) ! velocity from the transport field
    real(kind=rk) :: uc ! u_i,fluid * c_i
    integer :: vel_varPos ! position of transport velocity variable in varSys
    real(kind=rk) :: inv_vel, u_fluid(3)
    ! --------------------------------------------------------------------------
    ! access scheme via 1st variable method data which is a state variable
    call C_F_POINTER( varSys%method%val(derVarPos(1)%pdf)%method_Data, fPtr )
    scheme => fPtr%solverData%scheme

    ! passive scalar has only one transport Variable
    vel_varPos = scheme%transVar%method(1)%data_varPos
    ! Get velocity field
    call varSys%method%val(vel_varPos)%get_valOfIndex( &
      & varSys  = varSys,                              &
      & time    = params%general%simControl%now,       &
      & iLevel  = level,                               &
      & idx     = scheme%transVar%method(1)            &
      &           %pntIndex%indexLvl(level)            &
      &           %val(1:nSolve),                      &
      & nVals   = nSolve,                              &
      & res     = transVel                             )

    ! convert physical velocity into LB velocity
    inv_vel = 1.0_rk / params%physics%fac( level )%vel
    transVel = transVel * inv_vel

    ! initialize and define some field wise parameters
    d_omega = 2._rk / ( 1._rk + 6._rk                               &
      &                       * fieldProp(1)%species%diff_coeff( 1 ))

    nodeloop: do iElem = 1, nSolve
      ! x-, y- and z-velocity from transport field
      u_fluid = transVel( (iElem-1)*3+1 : iElem*3 )

      do iDir = 1, layout%fStencil%QQ
        pdfTmp( iDir ) = &
& instate( ?FETCH?( iDir, 1, iElem, layout%fStencil%QQ, varSys%nScalars, nElems,neigh ) )
      end do
      rho = sum( pdfTmp )

      do iDir = 1, layout%fStencil%QQ
        ! compute c_i * u
        uc = dble( layout%fStencil%cxDir(1, iDir)) * u_fluid(1) + &
          &  dble( layout%fStencil%cxDir(2, iDir)) * u_fluid(2) + &
          &  dble( layout%fStencil%cxDir(3, iDir)) * u_fluid(3)

        ! compute the equilibrium (fi_eq = weight_i * rho * ( 1+c_i*u / cs^2))
        feq = rho * layout%weight( iDir ) * (1._rk+3._rk*uc)

        outstate(                                                            &
& ?SAVE?( iDir, 1, iElem, layout%fStencil%QQ, varSys%nScalars, nElems,neigh ) ) =     &
          &                pdfTmp( iDir ) + d_omega * ( feq - pdfTmp( idir ))
      end do

    end do nodeloop

  end subroutine bgk_advRel_flekkoy
! ****************************************************************************** !


! ****************************************************************************** !
  !> Advection relaxation routine for the flekkoy diffusion model without prior
  !! definition of a dependent fluid scheme. Velocity is read from the depend
  !! table.
  !!
  !! This subroutine interface must match the abstract interface definition
  !! [[kernel]] in scheme/[[mus_scheme_type_module]].f90 in order to be callable
  !! via [[mus_scheme_type:compute]] function pointer.
  subroutine bgk_advRel_flekkoy_noFluid( fieldProp, inState, outState,    &
    &                                    auxField, neigh, nElems, nSolve, &
    &                                    level, layout, params, varSys,   &
    &                                    derVarPos                        )
    ! -------------------------------------------------------------------- !
    !> Array of field properties (fluid or species)
    type(mus_field_prop_type), intent(in) :: fieldProp(:)
    !> variable system definition
    type(tem_varSys_type), intent(in) :: varSys
    !> current layout
    type(mus_scheme_layout_type), intent(in) :: layout
    !> number of elements in state Array
    integer, intent(in) :: nElems
    !> input  pdf vector
    real(kind=rk), intent(in)  ::  inState(nElems * varSys%nScalars)
    !> output pdf vector
    real(kind=rk), intent(out) :: outState(nElems * varSys%nScalars)
    !> Auxiliary field computed from pre-collision state
    !! Is updated with correct velocity field for multicomponent models
    real(kind=rk), intent(inout) :: auxField(nElems * varSys%nAuxScalars)
    !> connectivity vector
    integer, intent(in) :: neigh(nElems * layout%fStencil%QQ)
    !> number of elements solved in kernel
    integer, intent(in) :: nSolve
    !> current level
    integer,intent(in) :: level
    !> global parameters
    type(mus_param_type),intent(in) :: params
    !> position of derived quantities in varsys for all fields
    type( mus_derVarPos_type ), intent(in) :: derVarPos(:)
    ! -------------------------------------------------------------------- !
    integer :: iElem
    integer :: iDir
    integer :: QQ, nScalars
    real(kind=rk) :: pdfTmp( layout%fStencil%QQ ) ! temporary local pdf values
    real(kind=rk) :: rho
    real(kind=rk) :: feq
    real(kind=rk) :: d_omega
    ! ---------------------------------------------------------------------------
    QQ       = layout%fStencil%QQ
    nScalars = varSys%nScalars

    ! initialize and define some field wise parameters
    d_omega = 2._rk / ( 1._rk + 6._rk * fieldProp(1)%species%diff_coeff(1) )

    nodeloop: do iElem = 1, nSolve

      rho = 0._rk
      do iDir = 1, QQ
        ! store the pdfs for this element and compute the density locally
        pdfTmp( iDir ) = &
          & instate( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ))
        rho = rho + pdfTmp( iDir )
      end do

      directionloop: do iDir = 1, QQ
        ! compute the equilibrium
        ! fi_eq = weight_i * rho * ( 1+c_i*u / cs^2))
        !       = weight_i * rho
        feq = rho * layout%weight( iDir )

        outstate( ?SAVE?( iDir, 1, iElem, QQ, nScalars, nElems,neigh ) ) =  &
          &                pdfTmp( iDir ) + d_omega * ( feq - pdfTmp( idir ))
      end do directionloop
    end do nodeloop

  end subroutine bgk_advRel_flekkoy_noFluid
! ****************************************************************************** !


! ****************************************************************************** !
  !> Advection relaxation routine for the
  !! BGK model with an explicit calculation of all equilibrium
  !! quantities. Slow and simple. This routine should only be
  !! used for testing purposes
  !!
  !! This subroutine interface must match the abstract interface definition
  !! [[kernel]] in scheme/[[mus_scheme_type_module]].f90 in order to be callable
  !! via [[mus_scheme_type:compute]] function pointer.
  subroutine bgk_advRel_generic( fieldProp, inState, outState, auxField, &
    &                            neigh, nElems, nSolve, level, layout,   &
    &                            params, varSys, derVarPos               )
    ! -------------------------------------------------------------------- !
    !> Array of field properties (fluid or species)
    type(mus_field_prop_type), intent(in) :: fieldProp(:)
    !> variable system definition
    type(tem_varSys_type), intent(in) :: varSys
    !> current layout
    type(mus_scheme_layout_type), intent(in) :: layout
    !> number of elements in state Array
    integer, intent(in) :: nElems
    !> input  pdf vector
    real(kind=rk), intent(in)  ::  inState(nElems * varSys%nScalars)
    !> output pdf vector
    real(kind=rk), intent(out) :: outState(nElems * varSys%nScalars)
    !> Auxiliary field computed from pre-collision state
    !! Is updated with correct velocity field for multicomponent models
    real(kind=rk), intent(inout) :: auxField(nElems * varSys%nAuxScalars)
    !> connectivity vector
    integer, intent(in) :: neigh(nElems * layout%fStencil%QQ)
    !> number of elements solved in kernel
    integer, intent(in) :: nSolve
    !> current level
    integer,intent(in) :: level
    !> global parameters
    type(mus_param_type),intent(in) :: params
    !> position of derived quantities in varsys for all fields
    type( mus_derVarPos_type ), intent(in) :: derVarPos(:)
    ! -------------------------------------------------------------------- !
    integer :: iElem, iDir                       ! voxel element counter
    integer :: QQ, nScalars
    ! temporary distribution variables
    real(kind=rk) pdfTmp(layout%fStencil%QQ)
    real(kind=rk) ux(3)   ! local velocity
    real(kind=rk) rho     ! local density
    real(kind=rk) usq     ! square velocity
    ! derived constants
    ! equilibrium calculation variables
    real(kind=rk) ucx
    real(kind=rk) eqState(layout%fStencil%QQ)
    real(kind=rk) omega
    integer :: dens_pos, vel_pos(3), elemOff
    ! ---------------------------------------------------------------------------
    dens_pos = varSys%method%val(derVarPos(1)%density)%auxField_varPos(1)
    vel_pos = varSys%method%val(derVarPos(1)%velocity)%auxField_varPos(1:3)

    QQ = layout%fStencil%QQ
    ! nElems = size(neigh)/QQ
    nScalars = varSys%nScalars

    nodeloop: do iElem=1,nSolve
      !> Generic fetching step:
      !! Streaming for pull
      !! Local copy for push
      ux = 0._rk
      rho = 0._rk
      do iDir = 1, QQ
        pdfTmp( iDir ) = inState( ?FETCH?( iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! element offset for auxField array
      elemOff = (iElem-1)*varSys%nAuxScalars
      ! local density
      rho = auxField(elemOff + dens_pos)
      ! local x-, y- and z-velocity
      ux(1) = auxField(elemOff + vel_pos(1))
      ux(2) = auxField(elemOff + vel_pos(2))
      ux(3) = auxField(elemOff + vel_pos(3))

      ! square velocity and derived constants
      usq = ux(1)*ux(1) + ux(2)*ux(2) + ux(3)*ux(3)

      !> relaxation parameter
      omega = fieldProp(1)%fluid%viscKine%omLvl(level)%val(iElem)

      do iDir = 1, QQ

        !> Pre-calculate velocitiy terms
        ucx = layout%fStencil%cxDirRK( 1, iDir )*ux(1) &
          & + layout%fStencil%cxDirRK( 2, iDir )*ux(2) &
          & + layout%fStencil%cxDirRK( 3, iDir )*ux(3)

        !> Calculate equilibrium distribution functions fEq
        eqState(iDir) = layout%weight(iDir)*rho*( 1.0_rk &
          &           +  ucx*cs2inv                      &
          &           +  ucx*ucx*cs2inv*cs2inv*0.5_rk     &
          &           -  usq*0.5_rk*cs2inv )

        !> Relaxation
        outState( ?SAVE?( iDir, 1, iElem, QQ, nScalars, nElems,neigh )) =     &
          &            pdfTmp( iDir ) - omega*( pdfTmp(iDir ) - eqState( iDir ))
      end do  !< iDir

    end do nodeloop

  end subroutine bgk_advRel_generic
! ****************************************************************************** !

! ****************************************************************************** !
  !> Advection relaxation routine for the
  !! BGK model with an explicit calculation of all equilibrium
  !! quantities. Slow and simple. This routine should only be
  !! used for testing purposes
  !!
  !! This subroutine interface must match the abstract interface definition
  !! [[kernel]] in scheme/[[mus_scheme_type_module]].f90 in order to be callable
  !! via [[mus_scheme_type:compute]] function pointer.
  subroutine bgk_advRel_generic_incomp( fieldProp, inState, outState,    &
    &                                   auxField, neigh, nElems, nSolve, &
    &                                   level, layout, params, varSys,   &
    &                                   derVarPos                        )
    ! -------------------------------------------------------------------- !
    !> Array of field properties (fluid or species)
    type(mus_field_prop_type), intent(in) :: fieldProp(:)
    !> variable system definition
    type(tem_varSys_type), intent(in) :: varSys
    !> current layout
    type(mus_scheme_layout_type), intent(in) :: layout
    !> number of elements in state Array
    integer, intent(in) :: nElems
    !> input  pdf vector
    real(kind=rk), intent(in)  ::  inState(nElems * varSys%nScalars)
    !> output pdf vector
    real(kind=rk), intent(out) :: outState(nElems * varSys%nScalars)
    !> Auxiliary field computed from pre-collision state
    !! Is updated with correct velocity field for multicomponent models
    real(kind=rk), intent(inout) :: auxField(nElems * varSys%nAuxScalars)
    !> connectivity vector
    integer, intent(in) :: neigh(nElems * layout%fStencil%QQ)
    !> number of elements solved in kernel
    integer, intent(in) :: nSolve
    !> current level
    integer,intent(in) :: level
    !> global parameters
    type(mus_param_type),intent(in) :: params
    !> position of derived quantities in varsys for all fields
    type( mus_derVarPos_type ), intent(in) :: derVarPos(:)
    ! -------------------------------------------------------------------- !
    integer :: iElem, iDir                       ! voxel element counter
    integer :: QQ, nScalars
    ! temporary distribution variables
    real(kind=rk) pdfTmp(layout%fStencil%QQ)
    real(kind=rk) ux(3)   ! local velocity
    real(kind=rk) rho     ! local density
    real(kind=rk) usq      ! square velocity
    ! derived constants
    ! equilibrium calculation variables
    real(kind=rk) ucx
    real(kind=rk) eqState(layout%fStencil%QQ)
    real(kind=rk) omega
    integer :: dens_pos, vel_pos(3), elemOff
    ! ---------------------------------------------------------------------------
    dens_pos = varSys%method%val(derVarPos(1)%density)%auxField_varPos(1)
    vel_pos = varSys%method%val(derVarPos(1)%velocity)%auxField_varPos(1:3)

    QQ = layout%fStencil%QQ
    ! nElems = size(neigh)/QQ
    nScalars = varSys%nScalars

    nodeloop: do iElem = 1, nSolve
      !> Generic fetching step:
      !! Streaming for pull
      !! Local copy for push
      ux = 0._rk
      rho = 0._rk
      do iDir = 1, QQ
        pdfTmp( iDir ) = inState( ?FETCH?(iDir, 1, iElem, QQ, nScalars, nElems,neigh))
      end do

      ! element offset for auxField array
      elemOff = (iElem-1)*varSys%nAuxScalars
      ! local density
      rho = auxField(elemOff + dens_pos)
      ! local x-, y- and z-velocity
      ux(1) = auxField(elemOff + vel_pos(1))
      ux(2) = auxField(elemOff + vel_pos(2))
      ux(3) = auxField(elemOff + vel_pos(3))

      ! square velocity and derived constants
      usq = ux(1)*ux(1) + ux(2)*ux(2) + ux(3)*ux(3)

      !> relaxation parameter
      omega = fieldProp(1)%fluid%viscKine%omLvl(level)%val(iElem)

      do iDir = 1,QQ

        !> Pre-calculate velocitiy terms
        ucx = layout%fStencil%cxDir( 1, iDir )*ux(1) &
          & + layout%fStencil%cxDir( 2, iDir )*ux(2) &
          & + layout%fStencil%cxDir( 3, iDir )*ux(3)

        !> Calculate equilibrium distribution functions fEq
        eqState(iDir) = layout%weight(iDir)*( rho + rho0*(  &
          & +  ucx*cs2inv                                   &
          & +  ucx*ucx*cs2inv*cs2inv*0.5_rk                 &
          & -  usq*0.5_rk*cs2inv ))

        !> Relaxation
        outState( ?SAVE?( iDir,1,iElem,QQ,nScalars,nElems,neigh )) =         &
          &            pdfTmp( iDir ) - omega*( pdfTmp(iDir) - eqState(iDir) )

      end do ! iDir

    end do nodeloop

  end subroutine bgk_advRel_generic_incomp
! ****************************************************************************** !

end module mus_bgk_module
! ****************************************************************************** !
