#include "MAR_pp.def"
subroutine DYNdps_mp(norder)
    ! +------------------------------------------------------------------------+
    ! | MAR DYNAMICS   FAST                                    15-04-2021  MAR |
    ! |   subroutine DYNdps solves the Mass Conservation Equation              |
    ! |                                                                        |
    ! +------------------------------------------------------------------------+
    ! |                                                                        |
    ! |   INPUT/  (via common block)                                           |
    ! |   ^^^^^         iterun          : long  time step counter              |
    ! |                 itFast          : short time step counter              |
    ! |                 norder          : numerical scheme: order of precision |
    ! |                                                                        |
    ! |   INPUT/  (via common block)                                           |
    ! |   OUTPUT        pstDYn(mx,my)   : Pressure Depth p*(t)           (kPa) |
    ! |   ^^^^^^         pstDY(mx,my)   : Pressure Depth p*(t-dt)              |
    ! |                                                                        |
    ! |   INPUT   (via common block)                                           |
    ! |   ^^^^^         uairDY(mx,my,mz): x-Wind Speed                   (m/s) |
    ! |                 vairDY(mx,my,mz): y-Wind Speed                   (m/s) |
    ! |                                                                        |
    ! |   OUTPUT  (via common block)                                           |
    ! |   ^^^^^^  psigDY: p* X Vertical Wind Speed; Sigma Syst.(i.e. p* Ds/Dt) |
    ! |                   psigDY Computed  ON the Sigma Levels   (unstaggered) |
    ! |                                    IN           Layers     (staggered) |
    ! |                                                                        |
    ! |   METHOD: Implicit Time Scheme (pImplc  Switch is .true. )             |
    ! |   ^^^^^^       2th order accurate Time  Scheme (semi-implicit)   .and. |
    ! |                2th order accurate Space Scheme on Arakawa A grid       |
    ! |                                                                        |
    ! |           Explicit Time Scheme (pImplc  Switch is .false.)             |
    ! |            Centered     Scheme (center  Switch is .true. )             |
    ! |                2th order accurate Time  Scheme (leapfrog backw.) .and. |
    ! |   norder.EQ.2 (2th order accurate Space Scheme on Arakawa A grid .OR.  |
    ! |   norder.NE.2  4th order accurate Space Scheme on Arakawa A grid)      |
    ! |                                                                   .OR. |
    ! |            Non-Centered Scheme (center  Switch is .false.)             |
    ! |                0th order accurate Space Scheme (Bott) (norder=0)  .OR. |
    ! |                4th order accurate Space Scheme (Bott) (norder=4)       |
    ! |                                                                        |
    ! |            Robert Time Filter may be used to remove computational mode |
    ! |                                                                        |
    ! |   REFER.:  Use of A grid: Purser and Leslie, MWR 116, 2069--2080, 1988 |
    ! |   ^^^^^^   Time Scheme  : Lin    and Rood    MWR 124, 2046--2070, 1996 |
    ! |                                                                        |
    ! +------------------------------------------------------------------------+

    use marctr
    use marphy
    use mardim
    use margrd
    use mar_ge
    use mar_dy
    use mar_wk
#ifdef ON
    use mar_te
#endif

    implicit none

    integer i, j, k, m
    integer norder

    logical pImplc, locorr

    integer mxx, myy
    parameter(mxx=mx + 1, myy=my + 1)
    real vecx(0:mxx), flux(0:mxx)
    real aa0x(0:mxx), aa1x(0:mxx), aa2x(0:mxx)
    real aa3x(0:mxx), aa4x(0:mxx)
    real cnpx(0:mxx), cnmx(0:mxx)
    real sipx(0:mxx), simx(0:mxx), sidx(0:mxx)
    real vecy(0:myy), fluy(0:myy)
    real aa0y(0:myy), aa1y(0:myy), aa2y(0:myy)
    real aa3y(0:myy), aa4y(0:myy)
    real cnpy(0:myy), cnmy(0:myy)
    real sipy(0:myy), simy(0:myy), sidy(0:myy)

    integer it_pst, nt_pst, idir_x, jdir_y
    integer i1_dps, i2_dps, j1_dps, j2_dps, k1_dps, k2_dps
    real alphpp, betapp, Fp__pp, Fpa_pp, Fpb_pp, facovr
    real CorArg, CorrNH, SRes_0, SRes_1, SRes10, pst_n1
    real dtcorr, dtxfas, dtyfas, uuface, vvface

    integer numdps, ntpsig
    common / DYNdps_int / numdps, ntpsig

    real psigad(mx, my, mz)
    common / DYNdps_rea / psigad

    ! +--DATA
    ! +  ====
    ! +... pImplc=.true.  ==> Implicit Scheme is used to damp Lamb Waves
    data pImplc/.false./

    numdps = numdps + 1

    ! +--Save Mass at the Lateral Boundaries
    ! +  ===================================
    do j = 1, my
        do i = 1, mx
            ! + p*(n-1)
            WTxy1(i, j) = pstDY(i, j)
            ! + p*(n)
            WTxy2(i, j) = pstDYn(i, j)
        enddo
    enddo

    do j = 1, my
        pstDY(1, j) = pstDYn(1, j)
        pstDY(mx, j) = pstDYn(mx, j)
    enddo

    if(mmy > 1) then
        do i = 1, mx
            pstDY(i, 1) = pstDYn(i, 1)
            pstDY(i, my) = pstDYn(i, my)
        enddo
    endif

    if(pImplc) then
        ! ++++++++++++++++++++++++++++
        ! +--IMPLICIT SCHEME (BEGIN) +
        ! ++++++++++++++++++++++++++++
        if(it_Mix == 1 .and. itFast == 1) then
            ! +--Horizontal Wind Speed: Average along the Vertical
            ! +  -------------------------------------------------
            do j = 1, my
                do i = 1, mx
                    WTxy3(i, j) = 0.0
                    WTxy4(i, j) = 0.0
                enddo
            enddo

            do k = 1, mz
                do j = 1, my
                    do i = 1, mx
                        WTxy3(i, j) = WTxy3(i, j) + uairDY(i, j, k) * dsigm1(k)
                        WTxy4(i, j) = WTxy4(i, j) + vairDY(i, j, k) * dsigm1(k)
                    enddo
                enddo
            enddo

            ! +--Tridiagonal Matrix Coefficients
            ! +  -------------------------------
            betapp = 0.6
            alphpp = 1.0 - betapp
            Fp__pp = dt / dx
            Fpa_pp = Fp__pp * alphpp
            Fpb_pp = Fp__pp * betapp
            do i = ip11, mx1
                do j = jp11, my1
                    WTxyz1(i, j, 1) = Fpb_pp * WTxy3(ip1(i), j) ! k=1: 3-Diag Matrix, x-Dir
                    WTxyz3(i, j, 1) = -Fpb_pp * WTxy3(im1(i), j) ! k=1: 3-Diag Matrix, x-Dir
                enddo
            enddo

            do j = jp11, my1
                do i = ip11, mx1
                    WTxyz1(i, j, 2) = Fpb_pp * WTxy4(i, jp1(j)) ! k=2: 3-Diag Matrix, y-Dir
                    WTxyz3(i, j, 2) = -Fpb_pp * WTxy4(i, jm1(j)) ! k=2: 3-Diag Matrix, y-Dir
                enddo
            enddo

            ! +--Overrelaxation Starting Block
            ! +  -----------------------------

            ! +--Independant Term: constant contribution ! x-Dir
            ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            do i = ip11, mx1
                do j = jp11, my1
                    WTxyz8(i, j, 1) = pstDY(i, j) &
                                      - Fpa_pp * WTxy3(ip1(i), j) * pstDY(ip1(i), j) &
                                      + Fpa_pp * WTxy3(im1(i), j) * pstDY(im1(i), j)
                enddo
            enddo

            do j = jp11, my1
                do i = ip11, mx1
                    WTxyz8(i, j, 1) = WTxyz8(i, j, 1) &
                                      - Fpa_pp * WTxy4(i, jp1(j)) * pstDY(i, jp1(j)) &
                                      + Fpa_pp * WTxy4(i, jm1(j)) * pstDY(i, jm1(j))
                enddo
            enddo

            ! +--Dirichlet Condition x-LBC ! x-Dir
            ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~
            i = mx1
            do j = jp11, my1
                WTxyz8(i, j, 1) = WTxyz8(i, j, 1) - Fpb_pp * WTxy3(ip1(i), j) * pstDY(ip1(i), j)
                WTxyz1(i, j, 1) = 0.0
#ifdef WR
                write(6, *) ip1(i), ' ', pstDY(ip1(i), j)
#endif
            enddo

            i = ip11
            do j = jp11, my1
                WTxyz8(i, j, 1) = WTxyz8(i, j, 1) &
                                  + Fpb_pp * WTxy3(im1(i), j) * pstDY(im1(i), j)
                WTxyz3(i, j, 1) = 0.0
#ifdef WR
                write(6, *) im1(i), ' ', pstDY(im1(i), j)
#endif
            enddo

            ! +--Dirichlet Condition y-LBC
            ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~
            if(mmy > 1) then
                j = my1
                do i = ip11, mx1
                    WTxyz8(i, j, 1) = WTxyz8(i, j, 1) &
                                      - Fpb_pp * WTxy3(i, jp1(j)) * pstDY(i, jp1(j))
                    WTxyz1(i, j, 2) = 0.0
#ifdef WR
                    write(6, *) jp1(j), ' ', pstDY(i, jp1(j))
#endif
                enddo
                ! +
                j = jp11
                do i = ip11, mx1
                    WTxyz8(i, j, 1) = WTxyz8(i, j, 1) &
                                      + Fpb_pp * WTxy3(i, jm1(j)) * pstDY(i, jm1(j))
                    WTxyz3(i, j, 2) = 0.0
#ifdef WR
                    write(6, *) jm1(j), ' ', pstDY(i, jm1(j))
#endif
                enddo
            endif

            ! +--First Estimate
            ! +  ~~~~~~~~~~~~~~
            do j = jp11, my1
                do i = ip11, mx1
                    WTxyz7(i, j, 1) = pstDY(i, j)              ! Previous       Estimate
                    WTxyz7(i, j, 2) = 0.0                     ! Half-Iteration Estimate
                    WTxyz7(i, j, 3) = pstDY(i, j)              ! Next to update Estimate
                enddo
            enddo

            ! +--Recurrence
            ! +  ~~~~~~~~~~
            facovr = 1.1
            nt_pst = 4
            it_pst = 0
            SRes_1 = 0.0
1000        continue
            it_pst = it_pst + 1

            ! +--Resolution along the x-Direction
            ! +  --------------------------------

            ! +--Tridiagonal Matrix Coefficients
            ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            do j = jp11, my1
                do i = ip11, mx1
                    WTxyz1(i, j, 3) = WTxyz1(i, j, 1)        ! Index 1 ==>      x-Dir
                    WTxyz2(i, j, 3) = 1.0d+0               !
                    WTxyz3(i, j, 3) = WTxyz3(i, j, 1)        !
                enddo
            enddo

            ! +--Independant Term
            ! +  ~~~~~~~~~~~~~~~~
            do j = jp11, my1
                do i = ip11, mx1
                    ! Index 1 ==>    ALL-Dir
                    WTxyz4(i, j, 3) = WTxyz8(i, j, 1) &
                                      - WTxyz1(i, j, 2) * WTxyz7(i, jp1(j), 1) &
                                      - WTxyz3(i, j, 2) * WTxyz7(i, jm1(j), 1)
                enddo
            enddo

            ! +--Tridiagonal Matrix Inversion ! OUTPUT is WTxyz7(i,j,3)
            ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            i1_dps = ip11
            i2_dps = mx1
            j1_dps = jp11
            j2_dps = my1
            k1_dps = 3
            k2_dps = 3

            ! +  ********
            call MARgau_x(i1_dps, i2_dps, j1_dps, j2_dps, k1_dps, k2_dps)
            ! +  ********

            ! +--Resolution along the y-Direction
            ! +  --------------------------------

            if(mmy > 1) then
                ! +--Tridiagonal Matrix Coefficients ! Index 2 ==> y-Dir
                ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                do j = jp11, my1
                    do i = ip11, mx1
                        WTxyz1(i, j, 3) = WTxyz1(i, j, 2)
                        WTxyz2(i, j, 3) = 1.
                        WTxyz3(i, j, 3) = WTxyz3(i, j, 2)
                        ! Half-Iteration Estimate
                        WTxyz7(i, j, 2) = WTxyz7(i, j, 3)
                    enddo
                enddo

                ! +--Independant Term ! y-Dir
                ! +  ~~~~~~~~~~~~~~~~
                do i = ip11, mx1
                    do j = jp11, my1
                        ! Index 1 ==> ALL-Dir
                        WTxyz4(i, j, 3) = WTxyz8(i, j, 1) &
                                          - WTxyz1(i, j, 1) * WTxyz7(ip1(i), j, 2) &
                                          - WTxyz3(i, j, 1) * WTxyz7(im1(i), j, 2)
                    enddo
                enddo

                ! +--Tridiagonal Matrix Inversion ! OUTPUT is WTxyz7(i,j,3)
                ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                i1_dps = ip11
                i2_dps = mx1
                j1_dps = jp11
                j2_dps = my1
                k1_dps = 3
                k2_dps = 3

                ! +  ********
                call MARgau_y(i1_dps, i2_dps, j1_dps, j2_dps, k1_dps, k2_dps)
                ! +  ********

            endif

            ! +--Residual is obtained by substracting next from former estimated Equation
            ! +  ------------------------------------------------------------------------

            do i = ip11, mx1
                do j = jp11, my1
                    WTxyz6(i, j, 1) = WTxyz1(i, j, 1) * WTxyz7(ip1(i), j, 3) &
                                      + WTxyz3(i, j, 1) * WTxyz7(im1(i), j, 3)
                enddo
            enddo

            do j = jp11, my1
                do i = ip11, mx1
                    WTxyz6(i, j, 1) = WTxyz6(i, j, 1) &
                                      + WTxyz1(i, j, 2) * WTxyz7(i, jp1(j), 3) &
                                      + WTxyz3(i, j, 2) * WTxyz7(i, jm1(j), 3) &
                                      + WTxyz7(i, j, 3) &
                                      - WTxyz8(i, j, 1)
                enddo
            enddo

            SRes_1 = 0.0
            do j = jp11, my1
                do i = ip11, mx1
                    SRes_1 = SRes_1 + abs(WTxyz6(i, j, 1))
                enddo
            enddo

            if(it_pst > 1) then
                SRes10 = SRes_1 / SRes_0
            else
                SRes_0 = SRes_1
                SRes10 = 1.0
            endif

            ! +--New Estimate
            ! +  ------------

            if(SRes10 > 0.1 .and. it_pst < nt_pst &
               .and. mmy > 1) then
                do j = jp11, my1
                    do i = ip11, mx1
                        WTxyz7(i, j, 1) = WTxyz7(i, j, 3) &
                                          - facovr * WTxyz6(i, j, 1) / WTxyz2(i, j, 1)
                        WTxyz7(i, j, 3) = WTxyz7(i, j, 1)
                    enddo
                enddo
            endif
            ! +
#ifdef WR
            write(6, 1001) iterun, it_pst, SRes10
1001        format(2i9, f21.15)
#endif
            ! +
            if(SRes10 > 1.0d-1 .and. it_pst < nt_pst &
               .and. mmy > 1) go to 1000

            ! +--Final Estimate
            ! +  --------------

            do j = jp11, my1
                do i = ip11, mx1
                    pstDY(i, j) = WTxyz7(i, j, 3)
                    pstDYn(i, j) = WTxyz7(i, j, 3)
                enddo
            enddo

            ! +--Lateral Boundary Conditions
            ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~
            do j = jp11, my1
                pstDY(1, j) = WTxy1(1, j)
                pstDY(mx, j) = WTxy1(mx, j)
                pstDYn(1, j) = WTxy1(1, j)
                pstDYn(mx, j) = WTxy1(mx, j)
            enddo

            if(mmy > 1) then
                do i = ip11, mx1
                    pstDY(i, 1) = WTxy1(i, 1)
                    pstDY(i, my) = WTxy1(i, my)
                    pstDYn(i, 1) = WTxy1(i, 1)
                    pstDYn(i, my) = WTxy1(i, my)
                enddo
            endif

            ! +--Contribution to Vertical Material Speed
            ! +  ---------------------------------------
            do k = 1, mz
                do j = 1, my
                    do i = 1, mx
                        WTxyz3(i, j, k) = uairDY(i, j, k) * pstDYn(i, j)
                        WTxyz4(i, j, k) = vairDY(i, j, k) * pstDYn(i, j)
                        WTxyz8(i, j, k) = 0.
                    enddo
                enddo

                do j = 1, my
                    do i = 1, mx
                        WTxyz7(i, j, k) = (WTxyz3(im1(i), j, k) - WTxyz3(ip1(i), j, k) &
                                           )
                    enddo
                enddo

                do i = 1, mx
                    do j = 1, my
                        WTxyz7(i, j, k) = (WTxyz7(i, j, k) &
                                           + WTxyz4(i, jm1(j), k) - WTxyz4(i, jp1(j), k) &
                                           ) * dxinv3(i, j)
                    enddo
                enddo
                !! Two previous Loops are the vectorized version of the following Loop
                !!               do j=1,my
                !!               do i=1,mx
                !!                 WTxyz7(i,j,k)=               -dxinv3(i, j)
                !!   .                      *(WTxyz3(ip1(i),j,k)-WTxyz3(im1(i),j,k)
                !!   .                       +WTxyz4(i,jp1(j),k)-WTxyz4(i,jm1(j),k))
                !!               end do
                !!               end do
            enddo
        endif

        ! ++++++++++++++++++++++++++
        ! +--IMPLICIT SCHEME (END) +
        ! ++++++++++++++++++++++++++
    else
        ! ++++++++++++++++++++++++++++
        ! +--EXPLICIT SCHEME (BEGIN) +
        ! ++++++++++++++++++++++++++++

        ! +++++++++++++++++++++++
        ! +--ERROR TEST (BEGIN) +
        ! +++++++++++++++++++++++

        if(norder < 0) then
            stop '++++++++ Horizontal Advection badly conditioned / Order<0 ++'
        else
            if(center) then
                ! ++++++++++++++++++++++++++++++++++
                ! +...Is  Centered Scheme  (BEGIN) +
                ! ++++++++++++++++++++++++++++++++++
                !CAa usual setting go here
                locorr = .false.
                dtcorr = 1.000

                ! +--Mass Divergence / First  Direction
                ! +  ==================================

                ! +--Mass Flux Update / x-Direction
                ! +  ------------------------------

                !#ifdef SP
                ! CAa : SP not activated because weird if / else / end if
                ! if (mod(numdps,2).eq.0.or.mmy.eq.1) then
                !#endif

                do j = 1, my
                    do i = 1, mx
                        WTxy3(i, j) = 0.0
                    enddo
                enddo

                !$OMP PARALLEL do private(i,j,k)
                do k = 1, mz
                    ! +--Mass Flux Update / x-Direction
                    ! +  ------------------------------
                    if(norder == 2) then
                        ! CAa  usually norder==4
                        ! do k=1,mz
                        do j = 1, my
                            do i = 1, mx
                                WTxyz3(i, j, k) = uairDY(i, j, k) * pstDYn(i, j)
#ifdef Z2
                                WTxyz3(i, j, k) = WTxyz3(i, j, k) * clatGE(i, j)
#endif
                            enddo
                        enddo
                        !c #vL end do

                        !c #vL do k=1,mz
                        do i = 1, mx
                            do j = 1, my
                                WTxyz7(i, j, k) = dxinv3(i, j) * ( &
                                                  WTxyz3(im1(i), j, k) - WTxyz3(ip1(i), j, k))
#ifdef Z2
                                WTxyz7(i, j, k) = WTxyz7(i, j, k) / clatGE(i, j)
#endif
                            enddo
                        enddo
                        ! end do
                    else
                        ! CAa usual setting go here
                        ! do k=1,mz
                        do j = 1, my
                            do i = 1, mx
                                WTxyz3(i, j, k) = uairDY(i, j, k) * pstDYn(i, j)
#ifdef Z2
                                WTxyz3(i, j, k) = WTxyz3(i, j, k) * clatGE(i, j)
#endif
                            enddo
                        enddo
                        !c #vL end do

                        !c #vL do k=1,mz
                        do i = 1, mx
                            do j = 1, my
                                WTxyz7(i, j, k) = dxinv3(i, j) * fac43 * ( &
                                                  0.125 * (WTxyz3(ip2(i), j, k) - WTxyz3(im2(i), j, k)) &
                                                  - WTxyz3(ip1(i), j, k) + WTxyz3(im1(i), j, k))
#ifdef Z2
                                WTxyz7(i, j, k) = WTxyz7(i, j, k) / clatGE(i, j)
#endif
                            enddo
                        enddo
                        ! end do
                    endif
                    ! +--Mass Flux Update / y-Direction
                    ! +  ------------------------------
                    !#ifdef SP
                    ! else
                    !#endif
                    if(norder == 2) then
                        ! do k=1,mz
                        do j = 1, my
                            do i = 1, mx
                                WTxyz4(i, j, k) = vairDY(i, j, k) * pstDYn(i, j)
                            enddo
                        enddo
                        !c #vL end do

                        !c #vL do k=1,mz
                        do j = 1, my
                            do i = 1, mx
                                WTxyz8(i, j, k) = dyinv3(i, j) * ( &
                                                  WTxyz4(i, jm1(j), k) - WTxyz4(i, jp1(j), k))
                            enddo
                        enddo
                        !#ifdef SP
                        ! do j=1,my
                        ! do i=1,mx
                        !     WTxyz7(i,j,k)=                WTxyz8(i,j,k)
                        !     WTxyz8(i,j,k)=                0.0
                        ! end do
                        ! end do
                        !#endif
                    else
                        ! do k=1,mz
                        do j = 1, my
                            do i = 1, mx
                                WTxyz4(i, j, k) = vairDY(i, j, k) * pstDYn(i, j)
                            enddo
                        enddo
                        !c #vL end do

                        !c #vL do k=1,mz
                        do j = 1, my
                            do i = 1, mx
                                WTxyz8(i, j, k) = dyinv3(i, j) * fac43 * ( &
                                                  0.125 * (WTxyz4(i, jp2(j), k) - WTxyz4(i, jm2(j), k)) &
                                                  - WTxyz4(i, jp1(j), k) + WTxyz4(i, jm1(j), k))
                            enddo
                        enddo
                        !#ifdef SP
                        ! do j=1,my
                        ! do i=1,mx
                        !     WTxyz7(i,j,k)= WTxyz8(i,j,k)
                        !     WTxyz8(i,j,k)= 0.0
                        ! end do
                        ! end do
                        !#endif

                    endif
                    !#ifdef SP
                    ! end if
                    !#endif
                enddo
                !$OMP END PARALLEL DO

                ! +--Pressure Depth Increment
                ! +  ------------------------
                do k = 1, mz
                    do j = 1, my
                        do i = 1, mx
                            WTxy3(i, j) = WTxy3(i, j) &
                                          - (WTxyz7(i, j, k) + WTxyz8(i, j, k)) * dsigm1(k)
                        enddo
                    enddo
                enddo

                ! +--Pressure Depth Update (Leapfrog-Backward)
                ! +  -----------------------------------------
                if(itFast == 1) then
                    do j = 1, my
                        do i = 1, mx
                            pstDY(i, j) = WTxy2(i, j) - WTxy3(i, j) * dtfast
                            pstDYn(i, j) = WTxy2(i, j) - WTxy3(i, j) * 2.0 * dtfast
                        enddo
                    enddo
                else
                    if(itFast <= ntFast) then
                        do j = 1, my
                            do i = 1, mx
                                pstDY(i, j) = WTxy2(i, j)
                                pstDYn(i, j) = WTxy1(i, j) - WTxy3(i, j) * 2.0 * dtfast
#ifdef rt
                                ! +--Robert Time Filter
                                ! +  ~~~~~~~~~~~~~~~~~~
                                pstDY(i, j) = pstDY(i, j) + &
                                              Robert * (0.5 * (pstDYn(i, j) + WTxy1(i, j)) - pstDY(i, j))
#endif
                            enddo
                        enddo
                    else
                        do j = 1, my
                            do i = 1, mx
                                pstDY(i, j) = WTxy2(i, j)
                                pstDYn(i, j) = WTxy1(i, j) - WTxy3(i, j) * dtfast
                            enddo
                        enddo
                        ! +*** Leapfrog-Backward (e.g. Haltiner and Williams, p.152)
                    endif
                endif

                !#ifdef SP
                ! +--Mass Divergence / Second Direction
                ! +  ==================================
                ! if  (mmy.gt.1) then
                ! +--Mass Flux Update / x-Direction
                ! +  ------------------------------
                !   if (mod(numdps,2).eq.1) then
                !    if(norder   .EQ.2) then
                !     do k=1,mz
                !       do j=1,my
                !       do i=1,mx
                !         WTxyz3(i,j,k)=uairDY(i,j,k) * pstDYn(i,j)
                !       end do
                !       end do
                !       do j=1,my
                !       do i=1,mx
                !         WTxyz8(i,j,k)=                dxinv3(i, j)          *  ( &
                !                    WTxyz3(im1(i),j,k)-WTxyz3(ip1(i),j,k) )
                !       end do
                !       end do
                !     end do
                !    else
                !     do k=1,mz
                !       do j=1,my
                !       do i=1,mx
                !         WTxyz3(i,j,k)=uairDY(i,j,k) * pstDYn(i,j)
                !       end do
                !       end do
                !       do j=1,my
                !       do i=1,mx
                !         WTxyz8(i,j,k)=                dxinv3(i, j) * fac43  *  ( &
                !             0.125*(WTxyz3(ip2(i),j,k)-WTxyz3(im2(i),j,k)) &
                !                   -WTxyz3(ip1(i),j,k)+WTxyz3(im1(i),j,k) )
                !       end do
                !       end do
                !     end do
                !    end if
                ! +--Mass Flux Update / y-Direction
                ! +  ------------------------------
                !    else
                !     if(norder   .EQ.2) then
                !      do k=1,mz
                !        do j=1,my
                !        do i=1,mx
                !          WTxyz4(i,j,k)=vairDY(i,j,k) * pstDYn(i,j)
                !        end do
                !        end do
                !        do j=1,my
                !        do i=1,mx
                !          WTxyz8(i,j,k)=                dyinv3(i, j)          *  ( &
                !                     WTxyz4(i,jm1(j),k)-WTxyz4(i,jp1(j),k) )
                !        end do
                !        end do
                !      end do
                !     else
                !      do k=1,mz
                !        do j=1,my
                !        do i=1,mx
                !          WTxyz4(i,j,k)=vairDY(i,j,k) * pstDYn(i,j)
                !        end do
                !        end do
                !        do j=1,my
                !        do i=1,mx
                !          WTxyz8(i,j,k)=                dyinv3(i, j) * fac43  *  ( &
                !              0.125*(WTxyz4(i,jp2(j),k)-WTxyz4(i,jm2(j),k)) &
                !                    -WTxyz4(i,jp1(j),k)+WTxyz4(i,jm1(j),k) )
                !        end do
                !        end do
                !      end do
                !     end if
                !    end if
                !#endif

                ! +--Pressure Depth Increment
                ! +  ------------------------

                do j = 1, my
                    do i = 1, mx
                        WTxy4(i, j) = 0.0
                    enddo
                enddo
                !#ifdef SP
                !    do k=1,mz
                !    do j=1,my
                !    do i=1,mx
                !      WTxy4(i,j)=WTxy4(i,j)-WTxyz8(i,j,k)*dsigm1(k)
                !    end do
                !    end do
                !    end do
                !    do j=1,my
                !    do i=1,mx
                !      WTxy3(i,j)=WTxy3(i,j)+ WTxy4(i,j)
                !    end do
                !    end do
                ! +--Pressure Depth    Update (Leapfrog-Backward)
                ! +  --------------------------------------------
                !    if  (itFast.eq.1) then
                !        do j=1,my
                !        do i=1,mx
                !          pstDY( i,j) = pstDY( i,j) - WTxy4(i,j)     *dtfast
                !          pstDYn(i,j) = pstDYn(i,j) - WTxy4(i,j) *2.0*dtfast
                !        end do
                !        end do
                !    else
                !      if (itFast.le.ntFast) then
                !        do j=1,my
                !        do i=1,mx
                !          pstDY( i,j) = WTxy2( i,j)
                !          pst_n1      = pstDYn(i,j)
                !          pstDYn(i,j) = pstDYn(i,j) - WTxy4(i,j) *2.0*dtfast
                !#endif
#ifdef rt
                ! +--Robert Time Filter
                ! +  ~~~~~~~~~~~~~~~~~~
                pstDY(i, j) = pstDY(i, j) + &
                              Robert * (0.5 * (pstDYn(i, j) + pst_n1) - pstDY(i, j))
#endif
                !#ifdef SP
                !       end do
                !       end do
                !     else
                !       do j=1,my
                !       do i=1,mx
                !         pstDY( i,j) = WTxy2( i,j)
                !         pstDYn(i,j) = pstDYn(i,j) - WTxy4(i,j)      *dtfast
                !       end do
                !       end do
                ! +***            Leapfrog-Backward (e.g. Haltiner and Williams, p.152)
                !     end if
                !   end if
                ! end if
                !#endif
                norder = -1

                ! ++++++++++++++++++++++++++++++++++
                ! +...Is  Centered Scheme    (END) +
                ! ++++++++++++++++++++++++++++++++++
            else
                ! ++++++++++++++++++++++++++++++++++
                ! +...Non Centered Schemes (BEGIN) +
                ! ++++++++++++++++++++++++++++++++++

                locorr = .true.
                dtcorr = dtfast

                ! +--Vector for Positive Definite Variables
                ! +  ======================================

                dtxfas = dtx / (ntFast + 1)
                dtyfas = dty / (ntFast + 1)

                do k = 1, mz

                    do j = 1, my
                        do i = 1, mx
                            WTxyz1(i, j, k) = -uairDY(i, j, k) * dtxfas
                            WTxyz2(i, j, k) = -vairDY(i, j, k) * dtyfas
                            uuface = 0.5 * (uairDY(i, j, k) + uairDY(ip1(i), j, k))
                            vvface = 0.5 * (vairDY(i, j, k) + vairDY(i, jp1(j), k))
                            WTxyz3(i, j, k) = 0.5 * (abs(uuface) + uuface) * dtxfas
                            WTxyz5(i, j, k) = 0.5 * (abs(uuface) - uuface) * dtxfas
                            WTxyz4(i, j, k) = 0.5 * (abs(vvface) + vvface) * dtyfas
                            WTxyz6(i, j, k) = 0.5 * (abs(vvface) - vvface) * dtyfas
                        enddo
                    enddo

                enddo

                ! +--Advection (Time Splitting)
                ! +  ==========================

                ! +--Mass and Mass Flux
                ! +  ~~~~~~~~~~~~~~~~~~
                do j = 1, my
                    do i = 1, mx
                        WTxy5(i, j) = pstDY(i, j)
                        WTxy1(i, j) = WTxy5(i, j)
                        WTxy2(i, j) = WTxy5(i, j)
                        WTxy3(i, j) = 0.
                    enddo
                enddo

                ! +--Conservative Scheme Order 0
                ! +  ===========================
                ! +
                if(norder == 0) then
                    norder = -1

                    ! +--Time Splitting
                    ! +  --------------

                    ! +--x-Direction First
                    ! +  ~~~~~~~~~~~~~~~~~
                    if(mod(numdps, 2) == 0) then

                        do k = 1, mz
                            do j = 1, my
                                do i = 1, mx
                                    WTxyz7(i, j, k) = &
                                        -WTxyz3(i, j, k) * WTxy5(i, j) &
                                        + WTxyz5(i, j, k) * WTxy5(ip1(i), j) &
                                        + WTxyz3(im1(i), j, k) * WTxy5(im1(i), j) &
                                        - WTxyz5(im1(i), j, k) * WTxy5(i, j)

                                    WTxy3(i, j) = WTxy3(i, j) &
                                                  + WTxyz7(i, j, k) * dsigm1(k)
                                enddo
                            enddo
                        enddo

                        do k = 1, mz
                            do j = 1, my
                                do i = 1, mx
                                    WTxyz8(i, j, k) = &
                                        -WTxyz4(i, j, k) * WTxy3(i, j) &
                                        + WTxyz6(i, j, k) * WTxy3(i, jp1(j)) &
                                        + WTxyz4(i, jm1(j), k) * WTxy3(i, jm1(j)) &
                                        - WTxyz6(i, jm1(j), k) * WTxy3(i, j)

                                    WTxy3(i, j) = WTxy3(i, j) &
                                                  + WTxyz8(i, j, k) * dsigm1(k)
                                enddo
                            enddo
                        enddo

                        ! +--y-Direction First
                        ! +  ~~~~~~~~~~~~~~~~~
                    else

                        do k = 1, mz
                            do j = 1, my
                                do i = 1, mx
                                    WTxyz7(i, j, k) = &
                                        -WTxyz4(i, j, k) * WTxy5(i, j) &
                                        + WTxyz6(i, j, k) * WTxy5(i, jp1(j)) &
                                        + WTxyz4(i, jm1(j), k) * WTxy5(i, jm1(j)) &
                                        - WTxyz6(i, jm1(j), k) * WTxy5(i, j)

                                    WTxy3(i, j) = WTxy3(i, j) &
                                                  + WTxyz7(i, j, k) * dsigm1(k)
                                enddo
                            enddo
                        enddo

                        do k = 1, mz
                            do j = 1, my
                                do i = 1, mx
                                    WTxyz8(i, j, k) = &
                                        -WTxyz3(i, j, k) * WTxy3(i, j) &
                                        + WTxyz5(i, j, k) * WTxy3(ip1(i), j) &
                                        + WTxyz3(im1(i), j, k) * WTxy3(im1(i), j) &
                                        - WTxyz5(im1(i), j, k) * WTxy3(i, j)

                                    WTxy3(i, j) = WTxy3(i, j) &
                                                  + WTxyz8(i, j, k) * dsigm1(k)
                                enddo
                            enddo
                        enddo

                    endif

                endif

                ! +--Conservative Scheme order 4
                ! +  ===========================

                if(norder == 4) then
                    norder = -1

                    ! +--Time Splitting
                    ! +  --------------

                    ! +--Parameters
                    ! +  ~~~~~~~~~~
                    if(mod(numdps, 2) == 0) then
                        idir_x = 2
                        jdir_y = 3
                    else
                        idir_x = 3
                        jdir_y = 2
                    endif

                    ! +--Auxiliary  Variables
                    ! +  ~~~~~~~~~~~~~~~~~~~~
                    do j = 1, my
                        do i = 1, mx
                            WTxy3(i, j) = WTxy5(i, j)
                            WTxy4(i, j) = WTxy5(i, j)
                        enddo
                    enddo

                    ! +--1D Computation
                    ! +  --------------

401                 continue

                    if(idir_x == 0 .or. &
                       jdir_y == 3) go to 402

                    ! +--x-Direction  here
                    ! +  ~~~~~~~~~~~~~~~~~
                    do k = 1, mz
                        do j = 1, my
                            do i = 1, mx
                                cnpx(i) = WTxyz3(i, j, k)
                                cnmx(i) = WTxyz5(i, j, k)
                                vecx(i) = WTxy2(i, j)
                            enddo
                            cnpx(0) = WTxyz3(1, j, k)
                            cnpx(mxx) = WTxyz3(mx, j, k)
                            cnmx(0) = WTxyz5(1, j, k)
                            cnmx(mxx) = WTxyz5(mx, j, k)
                            vecx(0) = WTxy2(1, j)
                            vecx(mxx) = WTxy2(mx, j)

                            ! +  ********
                            call ADVbot_4(flux, vecx, aa0x, aa1x, aa2x, aa3x, aa4x, &
                                          cnpx, cnmx, sipx, simx, sidx, mxx, 1)
                            ! +  ********

                            do i = 1, mx
                                WTxyz7(i, j, k) = -flux(i) + flux(im1(i))
                            enddo

                            do i = 1, mx
                                WTxy3(i, j) = WTxy3(i, j) + WTxyz7(i, j, k) * dsigm1(k)
                            enddo
                        enddo
                    enddo

                    ! +--Assignation in case of Time Splitting
                    ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                    if(idir_x > 1 .or. &
                       jdir_y > 1) then
                        do j = 1, my
                            do i = 1, mx
                                WTxy1(i, j) = WTxy3(i, j)
                                WTxy4(i, j) = WTxy3(i, j)
                            enddo
                        enddo
                    endif

                    idir_x = 0

402                 continue

                    if(idir_x == 0 .and. &
                       jdir_y == 0) go to 403

                    ! +--y-Direction  here
                    ! +  ~~~~~~~~~~~~~~~~~
                    do k = 1, mz
                        do i = 1, mx
                            do j = 1, my
                                cnpy(j) = WTxyz4(i, j, k)
                                cnmy(j) = WTxyz6(i, j, k)
                                vecy(j) = WTxy1(i, j)
                            enddo
                            cnpy(0) = WTxyz4(i, 1, k)
                            cnpy(myy) = WTxyz4(i, my, k)
                            cnmy(0) = WTxyz6(i, 1, k)
                            cnmy(myy) = WTxyz6(i, my, k)
                            vecy(0) = WTxy1(i, 1)
                            vecy(myy) = WTxy1(i, my)

                            ! +  ********
                            call ADVbot_4(fluy, vecy, aa0y, aa1y, aa2y, aa3y, aa4y, &
                                          cnpy, cnmy, sipy, simy, sidy, myy, 1)
                            ! +  ********

                            do j = 1, my
                                WTxyz8(i, j, k) = -fluy(j) + fluy(jm1(j))
                            enddo

                            do j = 1, my
                                WTxy4(i, j) = WTxy4(i, j) + WTxyz8(i, j, k) * dsigm1(k)
                            enddo
                        enddo
                    enddo

                    ! +--Assignation in case of Time Splitting
                    ! +  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                    if(idir_x > 1 .or. &
                       jdir_y > 1) then
                        do j = 1, my
                            do i = 1, mx
                                WTxy2(i, j) = WTxy4(i, j)
                                WTxy3(i, j) = WTxy4(i, j)
                            enddo
                        enddo
                    endif

                    jdir_y = 0

                    go to 401
403                 continue
                endif

                ! +--Update
                ! +  ======
                do j = 1, my
                    do i = 1, mx
                        ! New Pressure Thickness
                        pstDYn(i, j) = WTxy3(i, j)

                        ! New Pressure Thickness Increment
                        WTxy3(i, j) = pstDY(i, j) - WTxy3(i, j)

                    enddo
                enddo
                ! ++++++++++++++++++++++++++++++++++
                ! +...Non Centered Schemes   (END) +
                ! ++++++++++++++++++++++++++++++++++
            endif
            ! +++++++++++++++++++++
            ! +--ERROR TEST (END) +
            ! +++++++++++++++++++++
        endif
        if(norder >= 0) &
            stop '++++++++ Horizontal Advection badly conditioned ++++++++++++'
        ! ++++++++++++++++++++++++++
        ! +--EXPLICIT SCHEME (END) +
        ! ++++++++++++++++++++++++++
    endif

    ! +--Vertical Wind Speed (sigma coordinate)
    ! +  ======================================

    ! +--Staggered Vertical Grid
    ! +  -----------------------

    if(itFast == 1) ntpsig = 0

    ntpsig = 1 + ntpsig

    CFLzDY = 0.0

    !$OMP PARALLEL do private (i,j,k) reduction (+:CFLzDY)
    do j = 1, my
        if(itFast == 1) then
            do k = 1, mz
                ! do j=1,my
                do i = 1, mx
                    psigad(i, j, k) = 0.
                enddo
                ! end do
            enddo
        endif

        if(staggr) then
            ! CAa usual setting go here
            ! do j=1,my
            do i = 1, mx
                WTxyz3(i, j, 1) = sigmid(2) * WTxy3(i, j) &
                                  + dsigm1(1) * (WTxyz7(i, j, 1) + WTxyz8(i, j, 1))
            enddo
            ! end do

            do k = kp1(1), mmz
                ! do j=1,my
                do i = 1, mx
                    ! +... Computation of p*Sigma. BETWEEN Sigma Levels
                    WTxyz3(i, j, k) = dsigm1(k) * ( &
                                      WTxy3(i, j) + (WTxyz7(i, j, k) + WTxyz8(i, j, k))) + WTxyz3(i, j, k - 1)
                enddo
                ! end do
            enddo
        else
            ! +--Unstaggered Vertical Grid
            ! +  -------------------------
            ! do j=1,my
            do i = 1, mx
                ! +... Open Upper Boundary Condition: WTxyz7(i,j,0)=WTxyz7(i,j,1)
                ! +                                   WTxyz8(i,j,0)=WTxyz8(i,j,1)
                WTxyz3(i, j, 1) = sigma(1) * WTxy3(i, j) &
                                  + sigma(1) * (WTxyz7(i, j, 1) + WTxyz8(i, j, 1))
            enddo
            ! end do

            do k = kp1(1), mz
                ! do j=1,my
                do i = 1, mx
                    WTxyz3(i, j, k) = dsig_1(k - 1) * (WTxy3(i, j) &
                                                       + 0.50 * (WTxyz7(i, j, k) + WTxyz7(i, j, k - 1) &
                                                                 + WTxyz8(i, j, k) + WTxyz8(i, j, k - 1))) &
                                      + WTxyz3(i, j, k - 1)
                    ! +...        Computation of p*Sigma. ON      Sigma Levels

                enddo
                ! end do
            enddo
        endif

        do k = 1, mz
            ! do j=1,my
            do i = 1, mx
                psigad(i, j, k) = psigad(i, j, k) + WTxyz3(i, j, k)
            enddo
        enddo
        ! end do

        if(((itFast == ntFast + 1) .and. .not. pImplc) .OR. &
           ((itFast == 1) .and. pImplc)) then

            do k = 1, mz
                ! do j=1,my
                do i = 1, mx
                    psigDY(i, j, k) = psigad(i, j, k) / ntpsig
                enddo
                ! end do
            enddo

            if(locorr) then
                do k = 1, mz
                    ! do j=1,my
                    do i = 1, mx
                        psigDY(i, j, k) = psigDY(i, j, k) / dtcorr
                    enddo
                    ! end do
                enddo
            endif
        endif

#ifdef ON
        ! +--Simple non-hydrostatic Correction
        ! +  =================================

        ! +--Filtering
        ! +  ~~~~~~~~~
        do j = 1, my
            do i = 1, mx
                WTxy4(i, j) = &
                    zi__TE(im1(i), jp1(j)) + 2.0 * zi__TE(i, jp1(j)) &
                    + zi__TE(ip1(i), jp1(j)) &
                    + 2.0 * zi__TE(im1(i), j) + 4.0 * zi__TE(i, j) &
                    + 2.0 * zi__TE(ip1(i), j) &
                    + zi__TE(im1(i), jm1(j)) + 2.0 * zi__TE(i, jm1(j)) &
                    + zi__TE(ip1(i), jm1(j))
            enddo
        enddo
        do j = 1, my
            do i = 1, mx
                WTxy4(i, j) = 0.0625 * WTxy4(i, j)
            enddo
        enddo

        ! +--Correction
        ! +  ~~~~~~~~~~
        do j = 1, my
            do i = 1, mx
                ! Weisman et al., 1997, MWR125, p.541
                CorArg = 1.0 + WTxy4(i, j) * WTxy4(i, j) / (4.0 * dx * dx)
                CorrNH = 1.0 / sqrt(CorArg)
                do k = 1, mz
                    psigDY(i, j, k) = psigDY(i, j, k) * CorrNH
                enddo
            enddo
        enddo
#endif

        ! +--Vertical Velocity (sigma coordinates)
        ! +  =====================================

        if(staggr) then
            do k = 1, mz
                ! do j=jp11,my1
                do i = ip11, mx1
                    wsigDY(i, j, k) = psigDY(i, j, k) / pstDYn(i, j)
                enddo
                ! end do
            enddo
        else
            do k = 1, mz1
                ! do j=jp11,my1
                do i = ip11, mx1
                    wsigDY(i, j, k) = (psigDY(i, j, k) + psigDY(i, j, kp1(k))) * 0.5 &
                                      / pstDYn(i, j)
                enddo
                ! end do
            enddo
            k = mz
            ! do j=jp11,my1
            do i = ip11, mx1
                wsigDY(i, j, k) = psigDY(i, j, k) * 0.5 &
                                  / pstDYn(i, j)
            enddo
            ! end do
        endif

        ! +--Maximum Vertical Courant Number
        ! +  ===============================

        do k = 1, mz
            !       do j=1,my
            do i = 1, mx
                CFLzDY = max(CFLzDY &
                             , abs(wsigDY(i, j, k)) * 2.0 * dt / dsigm1(k))
            enddo
            !       end do
        enddo

    enddo
    !$OMP END PARALLEL DO

#ifdef OM
    ! +--Slip condition for Mountain Wave Experiments
    ! +  ============================================
    do j = 1, my
        do i = 1, mx
            psigDY(i, j, mz) = 0.0
        enddo
    enddo
#endif
    return
endsubroutine DYNdps_mp
