From dc130e8c97e9b1b76525450c8e8d051e0614ab7b Mon Sep 17 00:00:00 2001
From: Pierre Kestener <pierre.kestener@cea.fr>
Date: Mon, 9 Sep 2024 18:10:54 +0200
Subject: [PATCH] back port MHD implementation from euler_kokkos.

---
 settings/test_mhd_orszag_tang_2D.ini |    8 +-
 src/muscl/MHDBaseFunctor2D.h         |  398 +++--
 src/muscl/MHDBaseFunctor3D.h         |  364 +++--
 src/muscl/MHDRunFunctors2D.h         | 1188 ++++++++++++++-
 src/muscl/MHDRunFunctors3D.h         | 2046 ++++++++++++++++++++++----
 src/muscl/SolverMHDMuscl.cpp         |  201 ++-
 src/muscl/SolverMHDMuscl.h           |  127 +-
 src/shared/HydroParams.cpp           |    4 +-
 src/shared/mhd_utils.h               |   13 +
 9 files changed, 3736 insertions(+), 613 deletions(-)
diff --git a/settings/test_mhd_orszag_tang_2D.ini b/settings/test_mhd_orszag_tang_2D.ini
index 56d8b07..f331d11 100644
--- a/settings/test_mhd_orszag_tang_2D.ini
+++ b/settings/test_mhd_orszag_tang_2D.ini
@@ -1,8 +1,8 @@
 [run]
 solver_name=MHD_Muscl_2D
 tEnd=1.0
-nStepmax=100
-nOutput=-1
+nStepmax=1000
+nOutput=10
 
 [mesh]
 nx=256
@@ -26,7 +26,7 @@ smallc=1e-7
 [output]
 outputPrefix=orszag_tang_2d
 outputVtkAscii=false
+hdf5_enabled=yes
 
 [other]
-implementationVersion=0
-
+implementationVersion=2
diff --git a/src/muscl/MHDBaseFunctor2D.h b/src/muscl/MHDBaseFunctor2D.h
index 173b215..1cebea3 100644
--- a/src/muscl/MHDBaseFunctor2D.h
+++ b/src/muscl/MHDBaseFunctor2D.h
@@ -48,7 +48,7 @@ class MHDBaseFunctor2D
    */
   KOKKOS_INLINE_FUNCTION
   void
-  get_state(DataArray data, int i, int j, MHDState & q) const
+  get_state(DataArray const & data, int i, int j, MHDState & q) const
   {
 
     q[ID] = data(i, j, ID);
@@ -56,9 +56,9 @@ class MHDBaseFunctor2D
     q[IU] = data(i, j, IU);
     q[IV] = data(i, j, IV);
     q[IW] = data(i, j, IW);
-    q[IBX] = data(i, j, IBX);
-    q[IBY] = data(i, j, IBY);
-    q[IBZ] = data(i, j, IBZ);
+    q[IA] = data(i, j, IA);
+    q[IB] = data(i, j, IB);
+    q[IC] = data(i, j, IC);
 
   } // get_state
 
@@ -75,9 +75,9 @@ class MHDBaseFunctor2D
     data(i, j, IU) = q[IU];
     data(i, j, IV) = q[IV];
     data(i, j, IW) = q[IW];
-    data(i, j, IBX) = q[IBX];
-    data(i, j, IBY) = q[IBY];
-    data(i, j, IBZ) = q[IBZ];
+    data(i, j, IA) = q[IA];
+    data(i, j, IB) = q[IB];
+    data(i, j, IC) = q[IC];
 
   } // set_state
 
@@ -89,12 +89,71 @@ class MHDBaseFunctor2D
   get_magField(const DataArray & data, int i, int j, BField & b) const
   {
 
-    b[IBFX] = data(i, j, IBX);
-    b[IBFY] = data(i, j, IBY);
-    b[IBFZ] = data(i, j, IBZ);
+    b[IBFX] = data(i, j, IA);
+    b[IBFY] = data(i, j, IB);
+    b[IBFZ] = data(i, j, IC);
 
   } // get_magField
 
+  /**
+   * Compute slopes of face-centered magnetic field along normal direction
+   *
+   * \param[in] data is Conservative data array (which is the one containing face-centered magnetic
+   * field components)
+   * \param[in] i x-coordinate
+   * \param[in] j y-coordinate
+   *
+   * \return db magnetic slopes
+   */
+  KOKKOS_INLINE_FUNCTION
+  BField
+  compute_normal_mag_field_slopes(const DataArray & data, int i, int j) const
+  {
+
+    BField db;
+
+    // clang-format off
+    db[IX] = data(i + 1, j    , IA) - data(i, j, IA);
+    db[IY] = data(i    , j + 1, IB) - data(i, j, IB);
+    db[IZ] = 0.0;
+    // clang-format on
+
+    return db;
+  } // compute_normal_mag_field_slopes
+
+  /**
+   * Compute limited slope of face centered magnetic field component.
+   *
+   * \param[in] udata is a conservative variable array
+   */
+  template <Direction dir>
+  KOKKOS_INLINE_FUNCTION real_t
+  compute_limited_slope(DataArray udata, int i, int j, int component) const
+  {
+    KOKKOS_ASSERT(((component == IA) or (component == IB) or (component == IC)) &&
+                  "Wrong component index for a magnetic field.");
+
+    constexpr auto delta_i = dir == DIR_X ? 1 : 0;
+    constexpr auto delta_j = dir == DIR_Y ? 1 : 0;
+
+    // clang-format off
+    const auto b       = udata(i          , j          , component);
+    const auto b_plus  = udata(i + delta_i, j + delta_j, component);
+    const auto b_minus = udata(i - delta_i, j - delta_j, component);
+    // clang-format on
+
+    const real_t dlft = params.settings.slope_type * (b - b_minus);
+    const real_t drgt = params.settings.slope_type * (b_plus - b);
+    const real_t dcen = HALF_F * (b_plus - b_minus);
+    const real_t dsgn = (dcen >= ZERO_F) ? ONE_F : -ONE_F;
+    const real_t slop = fmin(fabs(dlft), fabs(drgt));
+    real_t       dlim = slop;
+    if ((dlft * drgt) <= ZERO_F)
+      dlim = ZERO_F;
+    return dsgn * fmin(dlim, fabs(dcen));
+
+  } // compute_limited_slope
+
   /**
    * Equation of state:
    * compute pressure p and speed of sound c, from density rho and
@@ -148,13 +207,13 @@ class MHDBaseFunctor2D
     q[IW] = u[IW] / q[ID];
 
     // compute cell-centered magnetic field
-    q[IBX] = 0.5 * (u[IBX] + magFieldNeighbors[0]);
-    q[IBY] = 0.5 * (u[IBY] + magFieldNeighbors[1]);
-    q[IBZ] = 0.5 * (u[IBZ] + magFieldNeighbors[2]);
+    q[IA] = 0.5 * (u[IA] + magFieldNeighbors[0]);
+    q[IB] = 0.5 * (u[IB] + magFieldNeighbors[1]);
+    q[IC] = 0.5 * (u[IC] + magFieldNeighbors[2]);
 
     // compute specific kinetic energy and magnetic energy
     real_t eken = 0.5 * (q[IU] * q[IU] + q[IV] * q[IV] + q[IW] * q[IW]);
-    real_t emag = 0.5 * (q[IBX] * q[IBX] + q[IBY] * q[IBY] + q[IBZ] * q[IBZ]);
+    real_t emag = 0.5 * (q[IA] * q[IA] + q[IB] * q[IB] + q[IC] * q[IC]);
 
     // compute pressure
 
@@ -235,6 +294,38 @@ class MHDBaseFunctor2D
 
   } // slope_unsplit_hydro_2d_scalar
 
+  /**
+   * Compute primitive variables slopes (dqX,dqY) for one component from q and its neighbors.
+   * This routine is only used in the 2D UNSPLIT integration and slope_type = 0,1 and 2.
+   *
+   * Only slope_type 1 and 2 are supported.
+   *
+   * \param[in]  q      : current primitive variable
+   * \param[in]  qPlus  : value in the next neighbor cell
+   * \param[in]  qMinus : value in the previous neighbor cell
+   *
+   */
+  KOKKOS_INLINE_FUNCTION
+  real_t
+  slope_unsplit_hydro_2d_scalar(real_t q, real_t qPlus, real_t qMinus) const
+  {
+    real_t slope_type = params.settings.slope_type;
+
+    real_t dlft, drgt, dcen, dsgn, slop, dlim;
+
+    // slopes in first coordinate direction
+    dlft = slope_type * (q - qMinus);
+    drgt = slope_type * (qPlus - q);
+    dcen = 0.5 * (qPlus - qMinus);
+    dsgn = (dcen >= ZERO_F) ? ONE_F : -ONE_F;
+    slop = fmin(fabs(dlft), fabs(drgt));
+    dlim = slop;
+    if ((dlft * drgt) <= ZERO_F)
+      dlim = ZERO_F;
+    return dsgn * fmin(dlim, fabs(dcen));
+
+  } // slope_unsplit_hydro_2d_scalar
+
   /**
    * Compute primitive variables slope (vector dq) from q and its neighbors.
    * This routine is only used in the 2D UNSPLIT integration and slope_type = 0,1,2 and 3.
@@ -264,11 +355,13 @@ class MHDBaseFunctor2D
     };
 
     // aliases to input qState neighbors
-    const MHDState & q = qNb[CENTER][CENTER];
-    const MHDState & qPlusX = qNb[CENTER + 1][CENTER];
-    const MHDState & qMinusX = qNb[CENTER - 1][CENTER];
-    const MHDState & qPlusY = qNb[CENTER][CENTER + 1];
-    const MHDState & qMinusY = qNb[CENTER][CENTER - 1];
+    // clang-format off
+    const MHDState & q       = qNb[CENTER    ][CENTER    ];
+    const MHDState & qPlusX  = qNb[CENTER + 1][CENTER    ];
+    const MHDState & qMinusX = qNb[CENTER - 1][CENTER    ];
+    const MHDState & qPlusY  = qNb[CENTER    ][CENTER + 1];
+    const MHDState & qMinusY = qNb[CENTER    ][CENTER - 1];
+    // clang-format on
 
     MHDState & dqX = dq[IX];
     MHDState & dqY = dq[IY];
@@ -287,52 +380,55 @@ class MHDBaseFunctor2D
       slope_unsplit_hydro_2d_scalar(
         q[IW], qPlusX[IW], qMinusX[IW], qPlusY[IW], qMinusY[IW], &(dqX[IW]), &(dqY[IW]));
       slope_unsplit_hydro_2d_scalar(
-        q[IBX], qPlusX[IBX], qMinusX[IBX], qPlusY[IBX], qMinusY[IBX], &(dqX[IBX]), &(dqY[IBX]));
+        q[IA], qPlusX[IA], qMinusX[IA], qPlusY[IA], qMinusY[IA], &(dqX[IA]), &(dqY[IA]));
       slope_unsplit_hydro_2d_scalar(
-        q[IBY], qPlusX[IBY], qMinusX[IBY], qPlusY[IBY], qMinusY[IBY], &(dqX[IBY]), &(dqY[IBY]));
+        q[IB], qPlusX[IB], qMinusX[IB], qPlusY[IB], qMinusY[IB], &(dqX[IB]), &(dqY[IB]));
       slope_unsplit_hydro_2d_scalar(
-        q[IBZ], qPlusX[IBZ], qMinusX[IBZ], qPlusY[IBZ], qMinusY[IBZ], &(dqX[IBZ]), &(dqY[IBZ]));
+        q[IC], qPlusX[IC], qMinusX[IC], qPlusY[IC], qMinusY[IC], &(dqX[IC]), &(dqY[IC]));
     }
-    // else if (::gParams.slope_type == 3) {
 
-    //   real_t slop, dlim;
-    //   real_t dfll, dflm, dflr, dfml, dfmm, dfmr, dfrl, dfrm, dfrr;
-    //   real_t vmin, vmax;
-    //   real_t dfx, dfy, dff;
-
-    //   for (int nVar=0; nVar<NVAR_MHD; ++nVar) {
-
-    // 	dfll = qNb[CENTER-1][CENTER-1][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dflm = qNb[CENTER-1][CENTER  ][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dflr = qNb[CENTER-1][CENTER+1][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dfml = qNb[CENTER  ][CENTER-1][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dfmm = qNb[CENTER  ][CENTER  ][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dfmr = qNb[CENTER  ][CENTER+1][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dfrl = qNb[CENTER+1][CENTER-1][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dfrm = qNb[CENTER+1][CENTER  ][nVar]-qNb[CENTER][CENTER][nVar];
-    // 	dfrr = qNb[CENTER+1][CENTER+1][nVar]-qNb[CENTER][CENTER][nVar];
-
-    // 	vmin = FMIN9_(dfll,dflm,dflr,dfml,dfmm,dfmr,dfrl,dfrm,dfrr);
-    // 	vmax = FMAX9_(dfll,dflm,dflr,dfml,dfmm,dfmr,dfrl,dfrm,dfrr);
-
-    // 	dfx  = HALF_F * (qNb[CENTER+1][CENTER  ][nVar] - qNb[CENTER-1][CENTER  ][nVar]);
-    // 	dfy  = HALF_F * (qNb[CENTER  ][CENTER+1][nVar] - qNb[CENTER  ][CENTER-1][nVar]);
-    // 	dff  = HALF_F * (fabs(dfx) + fabs(dfy));
+  } // slope_unsplit_hydro_2d
 
-    // 	if (dff>ZERO_F) {
-    // 	  slop = fmin(ONE_F, fmin(fabs(vmin), fabs(vmax))/dff);
-    // 	} else {
-    // 	  slop = ONE_F;
-    // 	}
+  /**
+   * Compute primitive variables slopes (vector dq) from q and its neighbors using cell-centered
+   * values.
+   * This routine is only used in the 2D UNSPLIT integration and slope_type = 1 or 2.
+   *
+   * Only slope_type 1 and 2 are supported.
+   *
+   * \param[in]  q     : primitive variable state in center cell
+   * \param[in]  qp    : primitive variable state in center cell plus one
+   * \param[in]  qm    : primitive variable state in center cell minus one
+   * \param[out] dq    : reference to a slope state
+   *
+   */
+  KOKKOS_INLINE_FUNCTION void
+  slope_unsplit_hydro_2d(MHDState const & q,
+                         MHDState const & qp,
+                         MHDState const & qm,
+                         MHDState &       dq) const
+  {
+    real_t slope_type = params.settings.slope_type;
 
-    // 	dlim = slop;
+    // index of current cell in the neighborhood
+    enum
+    {
+      CENTER = 1
+    };
 
-    // 	dqX[nVar] = dlim*dfx;
-    // 	dqY[nVar] = dlim*dfy;
 
-    //   } // end for nVar
+    if (slope_type == 1 or slope_type == 2)
+    { // minmod or average
 
-    // } // end slope_type
+      dq[ID] = slope_unsplit_hydro_2d_scalar(q[ID], qp[ID], qm[ID]);
+      dq[IP] = slope_unsplit_hydro_2d_scalar(q[IP], qp[IP], qm[IP]);
+      dq[IU] = slope_unsplit_hydro_2d_scalar(q[IU], qp[IU], qm[IU]);
+      dq[IV] = slope_unsplit_hydro_2d_scalar(q[IV], qp[IV], qm[IV]);
+      dq[IW] = slope_unsplit_hydro_2d_scalar(q[IW], qp[IW], qm[IW]);
+      dq[IA] = slope_unsplit_hydro_2d_scalar(q[IA], qp[IA], qm[IA]);
+      dq[IB] = slope_unsplit_hydro_2d_scalar(q[IB], qp[IB], qm[IB]);
+      dq[IC] = slope_unsplit_hydro_2d_scalar(q[IC], qp[IC], qm[IC]);
+    }
 
   } // slope_unsplit_hydro_2d
 
@@ -359,13 +455,16 @@ class MHDBaseFunctor2D
   void
   slope_unsplit_mhd_2d(const real_t (&bfNeighbors)[6], real_t (&dbf)[2][3]) const
   {
-    /* layout for face centered magnetic field */
-    const real_t & bfx = bfNeighbors[0];
-    const real_t & bfx_yplus = bfNeighbors[1];
+    // layout for face centered magnetic field
+
+    // clang-format off
+    const real_t & bfx        = bfNeighbors[0];
+    const real_t & bfx_yplus  = bfNeighbors[1];
     const real_t & bfx_yminus = bfNeighbors[2];
-    const real_t & bfy = bfNeighbors[3];
-    const real_t & bfy_xplus = bfNeighbors[4];
+    const real_t & bfy        = bfNeighbors[3];
+    const real_t & bfy_xplus  = bfNeighbors[4];
     const real_t & bfy_xminus = bfNeighbors[5];
+    // clang-format on
 
     real_t(&dbfX)[3] = dbf[IX];
     real_t(&dbfY)[3] = dbf[IY];
@@ -409,6 +508,39 @@ class MHDBaseFunctor2D
 
   } // slope_unsplit_mhd_2d
 
+  /**
+   * Compute electric field E = U ^ B at mid-edge
+   *
+   * \param[in] conservative data array
+   * \param[in] primitive data array
+   * \param[in] x-coordinate
+   * \param[in] y-coordinate
+   *
+   * \return electric field at mid-edge
+   */
+  KOKKOS_INLINE_FUNCTION real_t
+  compute_electric_field_2d(DataArray const & udata, DataArray const & qdata, int i, int j) const
+  {
+    // clang-format off
+    const real_t u = ONE_FOURTH_F * (qdata(i - 1, j - 1, IU) +
+                                     qdata(i - 1, j    , IU) +
+                                     qdata(i    , j - 1, IU) +
+                                     qdata(i    , j    , IU));
+
+    const real_t v = ONE_FOURTH_F * (qdata(i - 1, j - 1, IV) +
+                                     qdata(i - 1, j    , IV) +
+                                     qdata(i    , j - 1, IV) +
+                                     qdata(i    , j    , IV));
+
+
+    const real_t A = HALF_F * (udata(i,j-1,IA) + udata(i,j,IA));
+
+    const real_t B = HALF_F * (udata(i-1,j,IB) + udata(i,j,IB));
+    // clang-format on
+
+    return u * B - v * A;
+  }
+
   /**
    * Trace computations for unsplit Godunov scheme.
    *
@@ -621,11 +753,21 @@ class MHDBaseFunctor2D
    * computation.
    *
    * \param[in]  qNb        state in neighbor cells (3-by-3 neighborhood indexed as qNb[i][j], for
-   * i,j=0,1,2); current center cell is at index (i=j=1). \param[in]  bfNb       face centered
-   * magnetic field in neighbor cells (4-by-4 neighborhood indexed as bfNb[i][j] for i,j=0,1,2,3);
-   * current cell is located at index (i=j=1) \param[in]  c          local sound speed. \param[in]
-   * dtdx       dt over dx \param[in]  dtdy       dt over dy \param[in]  xPos       x location of
-   * current cell (needed for shear computation) \param[out] qm         qm state (one per dimension)
+   * i,j=0,1,2); current center cell is at index (i=j=1).
+   *
+   * \param[in]  bfNb       face centered
+   * magnetic field in neighbor cells (4-by-4 neighborhood indexed as bfNb[i][j]
+   * for i,j=0,1,2,3); current cell is located at index (i=j=1)
+   *
+   * \param[in]  c          local sound speed.
+   *
+   * \param[in]  dtdx       dt over dx
+   *
+   * \param[in]  dtdy       dt over dy
+   *
+   * \param[in]  xPos       x location of current cell (needed for shear computation)
+   *
+   * \param[out] qm         qm state (one per dimension)
    * \param[out] qp         qp state (one per dimension)
    * \param[out] qEdge      q state on cell edges (qRT, qRB, qLT, qLB)
    */
@@ -673,23 +815,27 @@ class MHDBaseFunctor2D
     // compute u,v,A,B,Ez (electric field)
     real_t Ez[2][2];
     for (int di = 0; di < 2; di++)
+    {
       for (int dj = 0; dj < 2; dj++)
       {
 
-        int    centerX = CENTER + di;
-        int    centerY = CENTER + dj;
+        int centerX = CENTER + di;
+        int centerY = CENTER + dj;
+        // clang-format off
         real_t u = 0.25f * (qNb[centerX - 1][centerY - 1][IU] + qNb[centerX - 1][centerY][IU] +
-                            qNb[centerX][centerY - 1][IU] + qNb[centerX][centerY][IU]);
+                            qNb[centerX    ][centerY - 1][IU] + qNb[centerX    ][centerY][IU]);
 
         real_t v = 0.25f * (qNb[centerX - 1][centerY - 1][IV] + qNb[centerX - 1][centerY][IV] +
-                            qNb[centerX][centerY - 1][IV] + qNb[centerX][centerY][IV]);
+                            qNb[centerX    ][centerY - 1][IV] + qNb[centerX    ][centerY][IV]);
 
-        real_t A = 0.5f * (bfNb[centerX][centerY - 1][IBFX] + bfNb[centerX][centerY][IBFX]);
+        real_t A = 0.5f * (bfNb[centerX    ][centerY - 1][IBFX] + bfNb[centerX][centerY][IBFX]);
 
-        real_t B = 0.5f * (bfNb[centerX - 1][centerY][IBFY] + bfNb[centerX][centerY][IBFY]);
+        real_t B = 0.5f * (bfNb[centerX - 1][centerY    ][IBFY] + bfNb[centerX][centerY][IBFY]);
+        // clang-format on
 
         Ez[di][dj] = u * B - v * A;
       }
+    }
 
     // Electric field
     real_t & ELL = Ez[0][0];
@@ -703,21 +849,23 @@ class MHDBaseFunctor2D
     real_t u = q[IU];
     real_t v = q[IV];
     real_t w = q[IW];
-    real_t A = q[IBX];
-    real_t B = q[IBY];
-    real_t C = q[IBZ];
+    real_t A = q[IA];
+    real_t B = q[IB];
+    real_t C = q[IC];
 
     // Face centered variables
-    real_t AL = bfNb[CENTER][CENTER][IBFX];
-    real_t AR = bfNb[CENTER + 1][CENTER][IBFX];
-    real_t BL = bfNb[CENTER][CENTER][IBFY];
-    real_t BR = bfNb[CENTER][CENTER + 1][IBFY];
+    // clang-format off
+    real_t AL = bfNb[CENTER    ][CENTER    ][IBFX];
+    real_t AR = bfNb[CENTER + 1][CENTER    ][IBFX];
+    real_t BL = bfNb[CENTER    ][CENTER    ][IBFY];
+    real_t BR = bfNb[CENTER    ][CENTER + 1][IBFY];
+    // clang-format on
 
     // TODO LATER : compute xL, xR and xC using ::gParam
     // this is only needed when doing cylindrical or spherical coordinates
 
     /*
-     * compute dq slopes
+     * compute hydro slopes
      */
     MHDState dq[2];
 
@@ -738,9 +886,9 @@ class MHDBaseFunctor2D
     dvx *= 0.5;
     real_t dwx = dq[IX][IW];
     dwx *= 0.5;
-    real_t dCx = dq[IX][IBZ];
+    real_t dCx = dq[IX][IC];
     dCx *= 0.5;
-    real_t dBx = dq[IX][IBY];
+    real_t dBx = dq[IX][IB];
     dBx *= 0.5;
 
     // Cell centered TVD slopes in Y direction
@@ -754,9 +902,9 @@ class MHDBaseFunctor2D
     dvy *= 0.5;
     real_t dwy = dq[IY][IW];
     dwy *= 0.5;
-    real_t dCy = dq[IY][IBZ];
+    real_t dCy = dq[IY][IC];
     dCy *= 0.5;
-    real_t dAy = dq[IY][IBX];
+    real_t dAy = dq[IY][IA];
     dAy *= 0.5;
 
     /*
@@ -767,12 +915,14 @@ class MHDBaseFunctor2D
     real_t(&dbfX)[3] = dbf[IX];
     real_t(&dbfY)[3] = dbf[IY];
 
-    bfNeighbors[0] = bfNb[CENTER][CENTER][IBFX];
-    bfNeighbors[1] = bfNb[CENTER][CENTER + 1][IBFX];
-    bfNeighbors[2] = bfNb[CENTER][CENTER - 1][IBFX];
-    bfNeighbors[3] = bfNb[CENTER][CENTER][IBFY];
-    bfNeighbors[4] = bfNb[CENTER + 1][CENTER][IBFY];
-    bfNeighbors[5] = bfNb[CENTER - 1][CENTER][IBFY];
+    // clang-format off
+    bfNeighbors[0] = bfNb[CENTER    ][CENTER    ][IBFX];
+    bfNeighbors[1] = bfNb[CENTER    ][CENTER + 1][IBFX];
+    bfNeighbors[2] = bfNb[CENTER    ][CENTER - 1][IBFX];
+    bfNeighbors[3] = bfNb[CENTER    ][CENTER    ][IBFY];
+    bfNeighbors[4] = bfNb[CENTER + 1][CENTER    ][IBFY];
+    bfNeighbors[5] = bfNb[CENTER - 1][CENTER    ][IBFY];
+    // clang-format on
 
     slope_unsplit_mhd_2d(bfNeighbors, dbf);
 
@@ -781,24 +931,28 @@ class MHDBaseFunctor2D
     real_t dBLx = 0.5 * dbfX[IY];
 
     // change neighbors to i+1, j and recompute dbf
-    bfNeighbors[0] = bfNb[CENTER + 1][CENTER][IBFX];
+    // clang-format off
+    bfNeighbors[0] = bfNb[CENTER + 1][CENTER    ][IBFX];
     bfNeighbors[1] = bfNb[CENTER + 1][CENTER + 1][IBFX];
     bfNeighbors[2] = bfNb[CENTER + 1][CENTER - 1][IBFX];
-    bfNeighbors[3] = bfNb[CENTER + 1][CENTER][IBFY];
-    bfNeighbors[4] = bfNb[CENTER + 2][CENTER][IBFY];
-    bfNeighbors[5] = bfNb[CENTER][CENTER][IBFY];
+    bfNeighbors[3] = bfNb[CENTER + 1][CENTER    ][IBFY];
+    bfNeighbors[4] = bfNb[CENTER + 2][CENTER    ][IBFY];
+    bfNeighbors[5] = bfNb[CENTER    ][CENTER    ][IBFY];
+    // clang-format on
 
     slope_unsplit_mhd_2d(bfNeighbors, dbf);
 
     real_t dARy = 0.5 * dbfY[IX];
 
     // change neighbors to i, j+1 and recompute dbf
-    bfNeighbors[0] = bfNb[CENTER][CENTER + 1][IBFX];
-    bfNeighbors[1] = bfNb[CENTER][CENTER + 2][IBFX];
-    bfNeighbors[2] = bfNb[CENTER][CENTER][IBFX];
-    bfNeighbors[3] = bfNb[CENTER][CENTER + 1][IBFY];
+    // clang-format off
+    bfNeighbors[0] = bfNb[CENTER    ][CENTER + 1][IBFX];
+    bfNeighbors[1] = bfNb[CENTER    ][CENTER + 2][IBFX];
+    bfNeighbors[2] = bfNb[CENTER    ][CENTER    ][IBFX];
+    bfNeighbors[3] = bfNb[CENTER    ][CENTER + 1][IBFY];
     bfNeighbors[4] = bfNb[CENTER + 1][CENTER + 1][IBFY];
     bfNeighbors[5] = bfNb[CENTER - 1][CENTER + 1][IBFY];
+    // clang-format on
 
     slope_unsplit_mhd_2d(bfNeighbors, dbf);
 
@@ -861,9 +1015,9 @@ class MHDBaseFunctor2D
     qp[0][IV] = v - dvx;
     qp[0][IW] = w - dwx;
     qp[0][IP] = p - dpx;
-    qp[0][IBX] = AL;
-    qp[0][IBY] = B - dBx;
-    qp[0][IBZ] = C - dCx;
+    qp[0][IA] = AL;
+    qp[0][IB] = B - dBx;
+    qp[0][IC] = C - dCx;
     qp[0][ID] = fmax(smallR, qp[0][ID]);
     qp[0][IP] = fmax(smallp * qp[0][ID], qp[0][IP]);
 
@@ -873,9 +1027,9 @@ class MHDBaseFunctor2D
     qm[0][IV] = v + dvx;
     qm[0][IW] = w + dwx;
     qm[0][IP] = p + dpx;
-    qm[0][IBX] = AR;
-    qm[0][IBY] = B + dBx;
-    qm[0][IBZ] = C + dCx;
+    qm[0][IA] = AR;
+    qm[0][IB] = B + dBx;
+    qm[0][IC] = C + dCx;
     qm[0][ID] = fmax(smallR, qm[0][ID]);
     qm[0][IP] = fmax(smallp * qm[0][ID], qm[0][IP]);
 
@@ -885,9 +1039,9 @@ class MHDBaseFunctor2D
     qp[1][IV] = v - dvy;
     qp[1][IW] = w - dwy;
     qp[1][IP] = p - dpy;
-    qp[1][IBX] = A - dAy;
-    qp[1][IBY] = BL;
-    qp[1][IBZ] = C - dCy;
+    qp[1][IA] = A - dAy;
+    qp[1][IB] = BL;
+    qp[1][IC] = C - dCy;
     qp[1][ID] = fmax(smallR, qp[1][ID]);
     qp[1][IP] = fmax(smallp * qp[1][ID], qp[1][IP]);
 
@@ -897,9 +1051,9 @@ class MHDBaseFunctor2D
     qm[1][IV] = v + dvy;
     qm[1][IW] = w + dwy;
     qm[1][IP] = p + dpy;
-    qm[1][IBX] = A + dAy;
-    qm[1][IBY] = BR;
-    qm[1][IBZ] = C + dCy;
+    qm[1][IA] = A + dAy;
+    qm[1][IB] = BR;
+    qm[1][IC] = C + dCy;
     qm[1][ID] = fmax(smallR, qm[1][ID]);
     qm[1][IP] = fmax(smallp * qm[1][ID], qm[1][IP]);
 
@@ -910,9 +1064,9 @@ class MHDBaseFunctor2D
     qRT[IV] = v + (+dvx + dvy);
     qRT[IW] = w + (+dwx + dwy);
     qRT[IP] = p + (+dpx + dpy);
-    qRT[IBX] = AR + (+dARy);
-    qRT[IBY] = BR + (+dBRx);
-    qRT[IBZ] = C + (+dCx + dCy);
+    qRT[IA] = AR + (+dARy);
+    qRT[IB] = BR + (+dBRx);
+    qRT[IC] = C + (+dCx + dCy);
     qRT[ID] = fmax(smallR, qRT[ID]);
     qRT[IP] = fmax(smallp * qRT[ID], qRT[IP]);
 
@@ -922,9 +1076,9 @@ class MHDBaseFunctor2D
     qRB[IV] = v + (+dvx - dvy);
     qRB[IW] = w + (+dwx - dwy);
     qRB[IP] = p + (+dpx - dpy);
-    qRB[IBX] = AR + (-dARy);
-    qRB[IBY] = BL + (+dBLx);
-    qRB[IBZ] = C + (+dCx - dCy);
+    qRB[IA] = AR + (-dARy);
+    qRB[IB] = BL + (+dBLx);
+    qRB[IC] = C + (+dCx - dCy);
     qRB[ID] = fmax(smallR, qRB[ID]);
     qRB[IP] = fmax(smallp * qRB[ID], qRB[IP]);
 
@@ -934,9 +1088,9 @@ class MHDBaseFunctor2D
     qLB[IV] = v + (-dvx - dvy);
     qLB[IW] = w + (-dwx - dwy);
     qLB[IP] = p + (-dpx - dpy);
-    qLB[IBX] = AL + (-dALy);
-    qLB[IBY] = BL + (-dBLx);
-    qLB[IBZ] = C + (-dCx - dCy);
+    qLB[IA] = AL + (-dALy);
+    qLB[IB] = BL + (-dBLx);
+    qLB[IC] = C + (-dCx - dCy);
     qLB[ID] = fmax(smallR, qLB[ID]);
     qLB[IP] = fmax(smallp * qLB[ID], qLB[IP]);
 
@@ -946,9 +1100,9 @@ class MHDBaseFunctor2D
     qLT[IV] = v + (-dvx + dvy);
     qLT[IW] = w + (-dwx + dwy);
     qLT[IP] = p + (-dpx + dpy);
-    qLT[IBX] = AL + (+dALy);
-    qLT[IBY] = BR + (-dBRx);
-    qLT[IBZ] = C + (-dCx + dCy);
+    qLT[IA] = AL + (+dALy);
+    qLT[IB] = BR + (-dBRx);
+    qLT[IC] = C + (-dCx + dCy);
     qLT[ID] = fmax(smallR, qLT[ID]);
     qLT[IP] = fmax(smallp * qLT[ID], qLT[IP]);
 
diff --git a/src/muscl/MHDBaseFunctor3D.h b/src/muscl/MHDBaseFunctor3D.h
index 7f718a2..ce2e8f6 100644
--- a/src/muscl/MHDBaseFunctor3D.h
+++ b/src/muscl/MHDBaseFunctor3D.h
@@ -55,9 +55,9 @@ class MHDBaseFunctor3D
     q[IU] = data(i, j, k, IU);
     q[IV] = data(i, j, k, IV);
     q[IW] = data(i, j, k, IW);
-    q[IBX] = data(i, j, k, IBX);
-    q[IBY] = data(i, j, k, IBY);
-    q[IBZ] = data(i, j, k, IBZ);
+    q[IA] = data(i, j, k, IA);
+    q[IB] = data(i, j, k, IB);
+    q[IC] = data(i, j, k, IC);
 
   } // get_state
 
@@ -74,9 +74,9 @@ class MHDBaseFunctor3D
     data(i, j, k, IU) = q[IU];
     data(i, j, k, IV) = q[IV];
     data(i, j, k, IW) = q[IW];
-    data(i, j, k, IBX) = q[IBX];
-    data(i, j, k, IBY) = q[IBY];
-    data(i, j, k, IBZ) = q[IBZ];
+    data(i, j, k, IA) = q[IA];
+    data(i, j, k, IB) = q[IB];
+    data(i, j, k, IC) = q[IC];
 
   } // set_state
 
@@ -88,12 +88,72 @@ class MHDBaseFunctor3D
   get_magField(const DataArray & data, int i, int j, int k, BField & b) const
   {
 
-    b[IBFX] = data(i, j, k, IBX);
-    b[IBFY] = data(i, j, k, IBY);
-    b[IBFZ] = data(i, j, k, IBZ);
+    b[IBFX] = data(i, j, k, IA);
+    b[IBFY] = data(i, j, k, IB);
+    b[IBFZ] = data(i, j, k, IC);
 
   } // get_magField
 
+  /**
+   * Compute slopes of face-centered magnetic field along normal direction
+   *
+   * \param[in] data is Conservative data array (which is the one containing face-centered magnetic
+   * field components)
+   * \param[in] i x-coordinate
+   * \param[in] j y-coordinate
+   *
+   * \return db magnetic slopes
+   */
+  KOKKOS_INLINE_FUNCTION
+  BField
+  compute_normal_mag_field_slopes(const DataArray & data, int i, int j, int k) const
+  {
+
+    BField db;
+
+    // clang-format off
+    db[IX] = data(i + 1, j    , k    , IA) - data(i, j, k, IA);
+    db[IY] = data(i    , j + 1, k    , IB) - data(i, j, k, IB);
+    db[IZ] = data(i    , j    , k + 1, IC) - data(i, j, k, IC);
+    // clang-format on
+
+    return db;
+  } // compute_normal_mag_field_slopes
+
+  /**
+   * Compute limited slope of face centered magnetic field component.
+   *
+   * \param[in] udata is a conservative variable array
+   */
+  template <Direction dir>
+  KOKKOS_INLINE_FUNCTION real_t
+  compute_limited_slope(DataArray udata, int i, int j, int k, int component) const
+  {
+    KOKKOS_ASSERT(((component == IA) or (component == IB) or (component == IC)) &&
+                  "Wrong component index for a magnetic field.");
+
+    constexpr auto delta_i = dir == DIR_X ? 1 : 0;
+    constexpr auto delta_j = dir == DIR_Y ? 1 : 0;
+    constexpr auto delta_k = dir == DIR_Z ? 1 : 0;
+
+    // clang-format off
+    const auto b       = udata(i          , j          , k          , component);
+    const auto b_plus  = udata(i + delta_i, j + delta_j, k + delta_k, component);
+    const auto b_minus = udata(i - delta_i, j - delta_j, k + delta_k, component);
+    // clang-format on
+
+    const real_t dlft = params.settings.slope_type * (b - b_minus);
+    const real_t drgt = params.settings.slope_type * (b_plus - b);
+    const real_t dcen = HALF_F * (b_plus - b_minus);
+    const real_t dsgn = (dcen >= ZERO_F) ? ONE_F : -ONE_F;
+    const real_t slop = fmin(fabs(dlft), fabs(drgt));
+    real_t       dlim = slop;
+    if ((dlft * drgt) <= ZERO_F)
+      dlim = ZERO_F;
+    return dsgn * fmin(dlim, fabs(dcen));
+
+  } // compute_limited_slope
+
   /**
    * Equation of state:
    * compute pressure p and speed of sound c, from density rho and
@@ -147,13 +207,13 @@ class MHDBaseFunctor3D
     q[IW] = u[IW] / q[ID];
 
     // compute cell-centered magnetic field
-    q[IBX] = 0.5 * (u[IBX] + magFieldNeighbors[0]);
-    q[IBY] = 0.5 * (u[IBY] + magFieldNeighbors[1]);
-    q[IBZ] = 0.5 * (u[IBZ] + magFieldNeighbors[2]);
+    q[IA] = 0.5 * (u[IA] + magFieldNeighbors[0]);
+    q[IB] = 0.5 * (u[IB] + magFieldNeighbors[1]);
+    q[IC] = 0.5 * (u[IC] + magFieldNeighbors[2]);
 
     // compute specific kinetic energy and magnetic energy
     real_t eken = 0.5 * (q[IU] * q[IU] + q[IV] * q[IV] + q[IW] * q[IW]);
-    real_t emag = 0.5 * (q[IBX] * q[IBX] + q[IBY] * q[IBY] + q[IBZ] * q[IBZ]);
+    real_t emag = 0.5 * (q[IA] * q[IA] + q[IB] * q[IB] + q[IC] * q[IC]);
 
     // compute pressure
 
@@ -251,6 +311,37 @@ class MHDBaseFunctor3D
 
   } // slope_unsplit_hydro_3d_scalar
 
+  /**
+   * Compute primitive variables slopes (dqX,dqY,dqZ) for one component from q and its neighbors.
+   * This routine is only used in the 3D UNSPLIT integration and slope_type = 0,1 and 2.
+   *
+   * Only slope_type 1 and 2 are supported.
+   *
+   * \param[in]  q      : current primitive variable
+   * \param[in]  qPlus  : value in the next neighbor cell
+   * \param[in]  qMinus : value in the previous neighbor cell
+   *
+   */
+  KOKKOS_INLINE_FUNCTION
+  real_t
+  slope_unsplit_hydro_3d_scalar(real_t q, real_t qPlus, real_t qMinus) const
+  {
+    real_t slope_type = params.settings.slope_type;
+
+    real_t dlft, drgt, dcen, dsgn, slop, dlim;
+
+    // slopes in first coordinate direction
+    dlft = slope_type * (q - qMinus);
+    drgt = slope_type * (qPlus - q);
+    dcen = 0.5 * (qPlus - qMinus);
+    dsgn = (dcen >= ZERO_F) ? ONE_F : -ONE_F;
+    slop = fmin(fabs(dlft), fabs(drgt));
+    dlim = slop;
+    if ((dlft * drgt) <= ZERO_F)
+      dlim = ZERO_F;
+    return dsgn * fmin(dlim, fabs(dcen));
+
+  } // slope_unsplit_hydro_3d_scalar
 
   /**
    * Compute primitive variables slope (vector dq) from q and its neighbors.
@@ -338,36 +429,36 @@ class MHDBaseFunctor3D
                                     &(dqX[IW]),
                                     &(dqY[IW]),
                                     &(dqZ[IW]));
-      slope_unsplit_hydro_3d_scalar(q[IBX],
-                                    qPlusX[IBX],
-                                    qMinusX[IBX],
-                                    qPlusY[IBX],
-                                    qMinusY[IBX],
-                                    qPlusZ[IBX],
-                                    qMinusZ[IBX],
-                                    &(dqX[IBX]),
-                                    &(dqY[IBX]),
-                                    &(dqZ[IBX]));
-      slope_unsplit_hydro_3d_scalar(q[IBY],
-                                    qPlusX[IBY],
-                                    qMinusX[IBY],
-                                    qPlusY[IBY],
-                                    qMinusY[IBY],
-                                    qPlusZ[IBY],
-                                    qMinusZ[IBY],
-                                    &(dqX[IBY]),
-                                    &(dqY[IBY]),
-                                    &(dqZ[IBY]));
-      slope_unsplit_hydro_3d_scalar(q[IBZ],
-                                    qPlusX[IBZ],
-                                    qMinusX[IBZ],
-                                    qPlusY[IBZ],
-                                    qMinusY[IBZ],
-                                    qPlusZ[IBZ],
-                                    qMinusY[IBZ],
-                                    &(dqX[IBZ]),
-                                    &(dqY[IBZ]),
-                                    &(dqZ[IBZ]));
+      slope_unsplit_hydro_3d_scalar(q[IA],
+                                    qPlusX[IA],
+                                    qMinusX[IA],
+                                    qPlusY[IA],
+                                    qMinusY[IA],
+                                    qPlusZ[IA],
+                                    qMinusZ[IA],
+                                    &(dqX[IA]),
+                                    &(dqY[IA]),
+                                    &(dqZ[IA]));
+      slope_unsplit_hydro_3d_scalar(q[IB],
+                                    qPlusX[IB],
+                                    qMinusX[IB],
+                                    qPlusY[IB],
+                                    qMinusY[IB],
+                                    qPlusZ[IB],
+                                    qMinusZ[IB],
+                                    &(dqX[IB]),
+                                    &(dqY[IB]),
+                                    &(dqZ[IB]));
+      slope_unsplit_hydro_3d_scalar(q[IC],
+                                    qPlusX[IC],
+                                    qMinusX[IC],
+                                    qPlusY[IC],
+                                    qMinusY[IC],
+                                    qPlusZ[IC],
+                                    qMinusY[IC],
+                                    &(dqX[IC]),
+                                    &(dqY[IC]),
+                                    &(dqZ[IC]));
     }
     else
     {
@@ -388,21 +479,64 @@ class MHDBaseFunctor3D
       dqX[IW] = ZERO_F;
       dqY[IW] = ZERO_F;
       dqZ[IW] = ZERO_F;
-      dqX[IBX] = ZERO_F;
-      dqY[IBX] = ZERO_F;
-      dqZ[IBX] = ZERO_F;
-      dqX[IBY] = ZERO_F;
-      dqY[IBY] = ZERO_F;
-      dqZ[IBY] = ZERO_F;
-      dqX[IBZ] = ZERO_F;
-      dqY[IBZ] = ZERO_F;
-      dqZ[IBZ] = ZERO_F;
+      dqX[IA] = ZERO_F;
+      dqY[IA] = ZERO_F;
+      dqZ[IA] = ZERO_F;
+      dqX[IB] = ZERO_F;
+      dqY[IB] = ZERO_F;
+      dqZ[IB] = ZERO_F;
+      dqX[IC] = ZERO_F;
+      dqY[IC] = ZERO_F;
+      dqZ[IC] = ZERO_F;
 
       return;
     }
 
   } // slope_unsplit_hydro_3d
 
+  /**
+   * Compute primitive variables slopes (vector dq) from q and its neighbors using cell-centered
+   * values.
+   * This routine is only used in the 3D UNSPLIT integration and slope_type = 1 or 2.
+   *
+   * Only slope_type 1 and 2 are supported.
+   *
+   * \param[in]  q     : primitive variable state in center cell
+   * \param[in]  qp    : primitive variable state in center cell plus one
+   * \param[in]  qm    : primitive variable state in center cell minus one
+   * \param[out] dq    : reference to a slope state
+   *
+   */
+  KOKKOS_INLINE_FUNCTION void
+  slope_unsplit_hydro_3d(MHDState const & q,
+                         MHDState const & qp,
+                         MHDState const & qm,
+                         MHDState &       dq) const
+  {
+    real_t slope_type = params.settings.slope_type;
+
+    // index of current cell in the neighborhood
+    enum
+    {
+      CENTER = 1
+    };
+
+
+    if (slope_type == 1 or slope_type == 2)
+    { // minmod or average
+
+      dq[ID] = slope_unsplit_hydro_3d_scalar(q[ID], qp[ID], qm[ID]);
+      dq[IP] = slope_unsplit_hydro_3d_scalar(q[IP], qp[IP], qm[IP]);
+      dq[IU] = slope_unsplit_hydro_3d_scalar(q[IU], qp[IU], qm[IU]);
+      dq[IV] = slope_unsplit_hydro_3d_scalar(q[IV], qp[IV], qm[IV]);
+      dq[IW] = slope_unsplit_hydro_3d_scalar(q[IW], qp[IW], qm[IW]);
+      dq[IA] = slope_unsplit_hydro_3d_scalar(q[IA], qp[IA], qm[IA]);
+      dq[IB] = slope_unsplit_hydro_3d_scalar(q[IB], qp[IB], qm[IB]);
+      dq[IC] = slope_unsplit_hydro_3d_scalar(q[IC], qp[IC], qm[IC]);
+    }
+
+  } // slope_unsplit_hydro_3d
+
   /**
    * slope_unsplit_mhd_3d computes only magnetic field slopes in 3D; hydro
    * slopes are always computed in slope_unsplit_hydro_3d.
@@ -618,9 +752,9 @@ class MHDBaseFunctor3D
     real_t u = q[IU];
     real_t v = q[IV];
     real_t w = q[IW];
-    real_t A = q[IBX];
-    real_t B = q[IBY];
-    real_t C = q[IBZ];
+    real_t A = q[IA];
+    real_t B = q[IB];
+    real_t C = q[IC];
 
     // Face centered variables
     real_t AL = bfNb[0];
@@ -641,9 +775,9 @@ class MHDBaseFunctor3D
     dvx *= HALF_F;
     real_t & dwx = dq[IX][IW];
     dwx *= HALF_F;
-    real_t & dCx = dq[IX][IBZ];
+    real_t & dCx = dq[IX][IC];
     dCx *= HALF_F;
-    real_t & dBx = dq[IX][IBY];
+    real_t & dBx = dq[IX][IB];
     dBx *= HALF_F;
 
     // Cell centered TVD slopes in Y direction
@@ -657,9 +791,9 @@ class MHDBaseFunctor3D
     dvy *= HALF_F;
     real_t & dwy = dq[IY][IW];
     dwy *= HALF_F;
-    real_t & dCy = dq[IY][IBZ];
+    real_t & dCy = dq[IY][IC];
     dCy *= HALF_F;
-    real_t & dAy = dq[IY][IBX];
+    real_t & dAy = dq[IY][IA];
     dAy *= HALF_F;
 
     // Cell centered TVD slopes in Z direction
@@ -673,9 +807,9 @@ class MHDBaseFunctor3D
     dvz *= HALF_F;
     real_t & dwz = dq[IZ][IW];
     dwz *= HALF_F;
-    real_t & dAz = dq[IZ][IBX];
+    real_t & dAz = dq[IZ][IA];
     dAz *= HALF_F;
-    real_t & dBz = dq[IZ][IBY];
+    real_t & dBz = dq[IZ][IB];
     dBz *= HALF_F;
 
 
@@ -767,9 +901,9 @@ class MHDBaseFunctor3D
     qp[0][IV] = v - dvx;
     qp[0][IW] = w - dwx;
     qp[0][IP] = p - dpx;
-    qp[0][IBX] = AL;
-    qp[0][IBY] = B - dBx;
-    qp[0][IBZ] = C - dCx;
+    qp[0][IA] = AL;
+    qp[0][IB] = B - dBx;
+    qp[0][IC] = C - dCx;
     qp[0][ID] = fmax(smallR, qp[0][ID]);
     qp[0][IP] = fmax(smallp /** qp[0][ID]*/, qp[0][IP]);
 
@@ -779,9 +913,9 @@ class MHDBaseFunctor3D
     qm[0][IV] = v + dvx;
     qm[0][IW] = w + dwx;
     qm[0][IP] = p + dpx;
-    qm[0][IBX] = AR;
-    qm[0][IBY] = B + dBx;
-    qm[0][IBZ] = C + dCx;
+    qm[0][IA] = AR;
+    qm[0][IB] = B + dBx;
+    qm[0][IC] = C + dCx;
     qm[0][ID] = fmax(smallR, qm[0][ID]);
     qm[0][IP] = fmax(smallp /** qm[0][ID]*/, qm[0][IP]);
 
@@ -791,9 +925,9 @@ class MHDBaseFunctor3D
     qp[1][IV] = v - dvy;
     qp[1][IW] = w - dwy;
     qp[1][IP] = p - dpy;
-    qp[1][IBX] = A - dAy;
-    qp[1][IBY] = BL;
-    qp[1][IBZ] = C - dCy;
+    qp[1][IA] = A - dAy;
+    qp[1][IB] = BL;
+    qp[1][IC] = C - dCy;
     qp[1][ID] = fmax(smallR, qp[1][ID]);
     qp[1][IP] = fmax(smallp /** qp[1][ID]*/, qp[1][IP]);
 
@@ -803,9 +937,9 @@ class MHDBaseFunctor3D
     qm[1][IV] = v + dvy;
     qm[1][IW] = w + dwy;
     qm[1][IP] = p + dpy;
-    qm[1][IBX] = A + dAy;
-    qm[1][IBY] = BR;
-    qm[1][IBZ] = C + dCy;
+    qm[1][IA] = A + dAy;
+    qm[1][IB] = BR;
+    qm[1][IC] = C + dCy;
     qm[1][ID] = fmax(smallR, qm[1][ID]);
     qm[1][IP] = fmax(smallp /** qm[1][ID]*/, qm[1][IP]);
 
@@ -815,9 +949,9 @@ class MHDBaseFunctor3D
     qp[2][IV] = v - dvz;
     qp[2][IW] = w - dwz;
     qp[2][IP] = p - dpz;
-    qp[2][IBX] = A - dAz;
-    qp[2][IBY] = B - dBz;
-    qp[2][IBZ] = CL;
+    qp[2][IA] = A - dAz;
+    qp[2][IB] = B - dBz;
+    qp[2][IC] = CL;
     qp[2][ID] = fmax(smallR, qp[2][ID]);
     qp[2][IP] = fmax(smallp /** qp[2][ID]*/, qp[2][IP]);
 
@@ -827,9 +961,9 @@ class MHDBaseFunctor3D
     qm[2][IV] = v + dvz;
     qm[2][IW] = w + dwz;
     qm[2][IP] = p + dpz;
-    qm[2][IBX] = A + dAz;
-    qm[2][IBY] = B + dBz;
-    qm[2][IBZ] = CR;
+    qm[2][IA] = A + dAz;
+    qm[2][IB] = B + dBz;
+    qm[2][IC] = CR;
     qm[2][ID] = fmax(smallR, qm[2][ID]);
     qm[2][IP] = fmax(smallp /** qm[2][ID]*/, qm[2][IP]);
 
@@ -839,9 +973,9 @@ class MHDBaseFunctor3D
     qRT_X[IV] = v + (+dvy + dvz);
     qRT_X[IW] = w + (+dwy + dwz);
     qRT_X[IP] = p + (+dpy + dpz);
-    qRT_X[IBX] = A + (+dAy + dAz);
-    qRT_X[IBY] = BR + (+dBRz);
-    qRT_X[IBZ] = CR + (+dCRy);
+    qRT_X[IA] = A + (+dAy + dAz);
+    qRT_X[IB] = BR + (+dBRz);
+    qRT_X[IC] = CR + (+dCRy);
     qRT_X[ID] = fmax(smallR, qRT_X[ID]);
     qRT_X[IP] = fmax(smallp /** qRT_X[ID]*/, qRT_X[IP]);
 
@@ -851,9 +985,9 @@ class MHDBaseFunctor3D
     qRB_X[IV] = v + (+dvy - dvz);
     qRB_X[IW] = w + (+dwy - dwz);
     qRB_X[IP] = p + (+dpy - dpz);
-    qRB_X[IBX] = A + (+dAy - dAz);
-    qRB_X[IBY] = BR + (-dBRz);
-    qRB_X[IBZ] = CL + (+dCLy);
+    qRB_X[IA] = A + (+dAy - dAz);
+    qRB_X[IB] = BR + (-dBRz);
+    qRB_X[IC] = CL + (+dCLy);
     qRB_X[ID] = fmax(smallR, qRB_X[ID]);
     qRB_X[IP] = fmax(smallp /** qRB_X[ID]*/, qRB_X[IP]);
 
@@ -863,9 +997,9 @@ class MHDBaseFunctor3D
     qLT_X[IV] = v + (-dvy + dvz);
     qLT_X[IW] = w + (-dwy + dwz);
     qLT_X[IP] = p + (-dpy + dpz);
-    qLT_X[IBX] = A + (-dAy + dAz);
-    qLT_X[IBY] = BL + (+dBLz);
-    qLT_X[IBZ] = CR + (-dCRy);
+    qLT_X[IA] = A + (-dAy + dAz);
+    qLT_X[IB] = BL + (+dBLz);
+    qLT_X[IC] = CR + (-dCRy);
     qLT_X[ID] = fmax(smallR, qLT_X[ID]);
     qLT_X[IP] = fmax(smallp /** qLT_X[ID]*/, qLT_X[IP]);
 
@@ -875,9 +1009,9 @@ class MHDBaseFunctor3D
     qLB_X[IV] = v + (-dvy - dvz);
     qLB_X[IW] = w + (-dwy - dwz);
     qLB_X[IP] = p + (-dpy - dpz);
-    qLB_X[IBX] = A + (-dAy - dAz);
-    qLB_X[IBY] = BL + (-dBLz);
-    qLB_X[IBZ] = CL + (-dCLy);
+    qLB_X[IA] = A + (-dAy - dAz);
+    qLB_X[IB] = BL + (-dBLz);
+    qLB_X[IC] = CL + (-dCLy);
     qLB_X[ID] = fmax(smallR, qLB_X[ID]);
     qLB_X[IP] = fmax(smallp /** qLB_X[ID]*/, qLB_X[IP]);
 
@@ -887,9 +1021,9 @@ class MHDBaseFunctor3D
     qRT_Y[IV] = v + (+dvx + dvz);
     qRT_Y[IW] = w + (+dwx + dwz);
     qRT_Y[IP] = p + (+dpx + dpz);
-    qRT_Y[IBX] = AR + (+dARz);
-    qRT_Y[IBY] = B + (+dBx + dBz);
-    qRT_Y[IBZ] = CR + (+dCRx);
+    qRT_Y[IA] = AR + (+dARz);
+    qRT_Y[IB] = B + (+dBx + dBz);
+    qRT_Y[IC] = CR + (+dCRx);
     qRT_Y[ID] = fmax(smallR, qRT_Y[ID]);
     qRT_Y[IP] = fmax(smallp /** qRT_Y[ID]*/, qRT_Y[IP]);
 
@@ -899,9 +1033,9 @@ class MHDBaseFunctor3D
     qRB_Y[IV] = v + (+dvx - dvz);
     qRB_Y[IW] = w + (+dwx - dwz);
     qRB_Y[IP] = p + (+dpx - dpz);
-    qRB_Y[IBX] = AR + (-dARz);
-    qRB_Y[IBY] = B + (+dBx - dBz);
-    qRB_Y[IBZ] = CL + (+dCLx);
+    qRB_Y[IA] = AR + (-dARz);
+    qRB_Y[IB] = B + (+dBx - dBz);
+    qRB_Y[IC] = CL + (+dCLx);
     qRB_Y[ID] = fmax(smallR, qRB_Y[ID]);
     qRB_Y[IP] = fmax(smallp /** qRB_Y[ID]*/, qRB_Y[IP]);
 
@@ -911,9 +1045,9 @@ class MHDBaseFunctor3D
     qLT_Y[IV] = v + (-dvx + dvz);
     qLT_Y[IW] = w + (-dwx + dwz);
     qLT_Y[IP] = p + (-dpx + dpz);
-    qLT_Y[IBX] = AL + (+dALz);
-    qLT_Y[IBY] = B + (-dBx + dBz);
-    qLT_Y[IBZ] = CR + (-dCRx);
+    qLT_Y[IA] = AL + (+dALz);
+    qLT_Y[IB] = B + (-dBx + dBz);
+    qLT_Y[IC] = CR + (-dCRx);
     qLT_Y[ID] = fmax(smallR, qLT_Y[ID]);
     qLT_Y[IP] = fmax(smallp /** qLT_Y[ID]*/, qLT_Y[IP]);
 
@@ -923,9 +1057,9 @@ class MHDBaseFunctor3D
     qLB_Y[IV] = v + (-dvx - dvz);
     qLB_Y[IW] = w + (-dwx - dwz);
     qLB_Y[IP] = p + (-dpx - dpz);
-    qLB_Y[IBX] = AL + (-dALz);
-    qLB_Y[IBY] = B + (-dBx - dBz);
-    qLB_Y[IBZ] = CL + (-dCLx);
+    qLB_Y[IA] = AL + (-dALz);
+    qLB_Y[IB] = B + (-dBx - dBz);
+    qLB_Y[IC] = CL + (-dCLx);
     qLB_Y[ID] = fmax(smallR, qLB_Y[ID]);
     qLB_Y[IP] = fmax(smallp /** qLB_Y[ID]*/, qLB_Y[IP]);
 
@@ -935,9 +1069,9 @@ class MHDBaseFunctor3D
     qRT_Z[IV] = v + (+dvx + dvy);
     qRT_Z[IW] = w + (+dwx + dwy);
     qRT_Z[IP] = p + (+dpx + dpy);
-    qRT_Z[IBX] = AR + (+dARy);
-    qRT_Z[IBY] = BR + (+dBRx);
-    qRT_Z[IBZ] = C + (+dCx + dCy);
+    qRT_Z[IA] = AR + (+dARy);
+    qRT_Z[IB] = BR + (+dBRx);
+    qRT_Z[IC] = C + (+dCx + dCy);
     qRT_Z[ID] = fmax(smallR, qRT_Z[ID]);
     qRT_Z[IP] = fmax(smallp /** qRT_Z[ID]*/, qRT_Z[IP]);
 
@@ -947,9 +1081,9 @@ class MHDBaseFunctor3D
     qRB_Z[IV] = v + (+dvx - dvy);
     qRB_Z[IW] = w + (+dwx - dwy);
     qRB_Z[IP] = p + (+dpx - dpy);
-    qRB_Z[IBX] = AR + (-dARy);
-    qRB_Z[IBY] = BL + (+dBLx);
-    qRB_Z[IBZ] = C + (+dCx - dCy);
+    qRB_Z[IA] = AR + (-dARy);
+    qRB_Z[IB] = BL + (+dBLx);
+    qRB_Z[IC] = C + (+dCx - dCy);
     qRB_Z[ID] = fmax(smallR, qRB_Z[ID]);
     qRB_Z[IP] = fmax(smallp /** qRB_Z[ID]*/, qRB_Z[IP]);
 
@@ -959,9 +1093,9 @@ class MHDBaseFunctor3D
     qLT_Z[IV] = v + (-dvx + dvy);
     qLT_Z[IW] = w + (-dwx + dwy);
     qLT_Z[IP] = p + (-dpx + dpy);
-    qLT_Z[IBX] = AL + (+dALy);
-    qLT_Z[IBY] = BR + (-dBRx);
-    qLT_Z[IBZ] = C + (-dCx + dCy);
+    qLT_Z[IA] = AL + (+dALy);
+    qLT_Z[IB] = BR + (-dBRx);
+    qLT_Z[IC] = C + (-dCx + dCy);
     qLT_Z[ID] = fmax(smallR, qLT_Z[ID]);
     qLT_Z[IP] = fmax(smallp /** qLT_Z[ID]*/, qLT_Z[IP]);
 
@@ -971,9 +1105,9 @@ class MHDBaseFunctor3D
     qLB_Z[IV] = v + (-dvx - dvy);
     qLB_Z[IW] = w + (-dwx - dwy);
     qLB_Z[IP] = p + (-dpx - dpy);
-    qLB_Z[IBX] = AL + (-dALy);
-    qLB_Z[IBY] = BL + (-dBLx);
-    qLB_Z[IBZ] = C + (-dCx - dCy);
+    qLB_Z[IA] = AL + (-dALy);
+    qLB_Z[IB] = BL + (-dBLx);
+    qLB_Z[IC] = C + (-dCx - dCy);
     qLB_Z[ID] = fmax(smallR, qLB_Z[ID]);
     qLB_Z[IP] = fmax(smallp /** qLB_Z[ID]*/, qLB_Z[IP]);
 
diff --git a/src/muscl/MHDRunFunctors2D.h b/src/muscl/MHDRunFunctors2D.h
index f44f646..29148d3 100644
--- a/src/muscl/MHDRunFunctors2D.h
+++ b/src/muscl/MHDRunFunctors2D.h
@@ -5,10 +5,6 @@
 #include "MHDBaseFunctor2D.h"
 #include "shared/RiemannSolvers_MHD.h"
 
-#ifndef SQR
-#  define SQR(x) ((x) * (x))
-#endif
-
 namespace ppkMHD
 {
 namespace muscl
@@ -49,7 +45,10 @@ class ComputeDtFunctor2D_MHD : public MHDBaseFunctor2D
     const real_t dx = params.dx;
     const real_t dy = params.dy;
 
-    if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth)
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth and
+        i >= ghostWidth and i < isize - ghostWidth)
+    // clang-format on
     {
 
       MHDState qLoc; // primitive    variables in current cell
@@ -60,9 +59,9 @@ class ComputeDtFunctor2D_MHD : public MHDBaseFunctor2D
       qLoc[IU] = Qdata(i, j, IU);
       qLoc[IV] = Qdata(i, j, IV);
       qLoc[IW] = Qdata(i, j, IW);
-      qLoc[IBX] = Qdata(i, j, IBX);
-      qLoc[IBY] = Qdata(i, j, IBY);
-      qLoc[IBZ] = Qdata(i, j, IBZ);
+      qLoc[IA] = Qdata(i, j, IA);
+      qLoc[IB] = Qdata(i, j, IB);
+      qLoc[IC] = Qdata(i, j, IC);
 
       // compute fastest information speeds
       real_t fastInfoSpeed[3];
@@ -114,7 +113,10 @@ class ConvertToPrimitivesFunctor2D_MHD : public MHDBaseFunctor2D
     // magnetic field in neighbor cells
     real_t magFieldNeighbors[3];
 
-    if (j >= 0 and j < jsize - 1 and i >= 0 and i < isize - 1)
+    // clang-format off
+    if (j >= 0 and j < jsize - 1 and
+        i >= 0 and i < isize - 1)
+    // clang-format on
     {
 
       MHDState uLoc; // conservative    variables in current cell
@@ -127,13 +129,13 @@ class ConvertToPrimitivesFunctor2D_MHD : public MHDBaseFunctor2D
       uLoc[IU] = Udata(i, j, IU);
       uLoc[IV] = Udata(i, j, IV);
       uLoc[IW] = Udata(i, j, IW);
-      uLoc[IBX] = Udata(i, j, IBX);
-      uLoc[IBY] = Udata(i, j, IBY);
-      uLoc[IBZ] = Udata(i, j, IBZ);
+      uLoc[IA] = Udata(i, j, IA);
+      uLoc[IB] = Udata(i, j, IB);
+      uLoc[IC] = Udata(i, j, IC);
 
       // get mag field in neighbor cells
-      magFieldNeighbors[IX] = Udata(i + 1, j, IBX);
-      magFieldNeighbors[IY] = Udata(i, j + 1, IBY);
+      magFieldNeighbors[IX] = Udata(i + 1, j, IA);
+      magFieldNeighbors[IY] = Udata(i, j + 1, IB);
       magFieldNeighbors[IZ] = 0.0;
 
       // get primitive variables in current cell
@@ -145,9 +147,9 @@ class ConvertToPrimitivesFunctor2D_MHD : public MHDBaseFunctor2D
       Qdata(i, j, IU) = qLoc[IU];
       Qdata(i, j, IV) = qLoc[IV];
       Qdata(i, j, IW) = qLoc[IW];
-      Qdata(i, j, IBX) = qLoc[IBX];
-      Qdata(i, j, IBY) = qLoc[IBY];
-      Qdata(i, j, IBZ) = qLoc[IBZ];
+      Qdata(i, j, IA) = qLoc[IA];
+      Qdata(i, j, IB) = qLoc[IB];
+      Qdata(i, j, IC) = qLoc[IC];
     }
   }
 
@@ -156,6 +158,217 @@ class ConvertToPrimitivesFunctor2D_MHD : public MHDBaseFunctor2D
 
 }; // ConvertToPrimitivesFunctor2D_MHD
 
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeSlopesFunctor2D_MHD : public MHDBaseFunctor2D
+{
+
+public:
+  /**
+   * Compute limited slopes of primitives variables.
+   *
+   * Magnetic field slopes are computed for transverse component at cell center, and for normal
+   * component at cell faces.
+   *
+   * \note Normal magnetic field component slopes are not limited.
+   *
+   * \param[in] Udata conservative variables
+   * \param[in] Qdata primitive variables
+   * \param[out] Slopes_x limited slopes along direction X
+   * \param[out] Slopes_y limited slopes along direction Y
+   */
+  ComputeSlopesFunctor2D_MHD(HydroParams params,
+                             DataArray2d Udata,
+                             DataArray2d Qdata,
+                             DataArray2d Slopes_x,
+                             DataArray2d Slopes_y)
+    : MHDBaseFunctor2D(params)
+    , Udata(Udata)
+    , Qdata(Qdata)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray2d Udata,
+        DataArray2d Qdata,
+        DataArray2d Slopes_x,
+        DataArray2d Slopes_y)
+  {
+    ComputeSlopesFunctor2D_MHD functor(params, Udata, Qdata, Slopes_x, Slopes_y);
+    Kokkos::parallel_for(
+      "ComputeSlopesFunctor2D_MHD",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    // const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (j >= 1 and j < jsize - 1 and
+        i >= 1 and i < isize - 1)
+    // clang-format on
+    {
+      MHDState qc, qm, qp;
+      get_state(Qdata, i, j, qc);
+
+      MHDState dq;
+
+      // slopes along X
+      get_state(Qdata, i - 1, j, qm);
+      get_state(Qdata, i + 1, j, qp);
+      slope_unsplit_hydro_2d(qc, qp, qm, dq);
+      set_state(Slopes_x, i, j, dq);
+
+      // modify slopes for normal magnetic field component using face centered value
+      Slopes_x(i, j, IA) = Udata(i + 1, j, IA) - Udata(i, j, IA);
+
+      // slopes along Y
+      get_state(Qdata, i, j - 1, qm);
+      get_state(Qdata, i, j + 1, qp);
+      slope_unsplit_hydro_2d(qc, qp, qm, dq);
+      set_state(Slopes_y, i, j, dq);
+
+      // modify slopes for normal magnetic field component using face centered value
+      Slopes_y(i, j, IB) = Udata(i, j + 1, IB) - Udata(i, j, IB);
+    }
+  } // end operator ()
+
+  DataArray2d Udata, Qdata;
+  DataArray2d Slopes_x, Slopes_y;
+
+}; // ComputeSlopesFunctor2D_MHD
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeElecFieldFunctor2D : public MHDBaseFunctor2D
+{
+
+public:
+  ComputeElecFieldFunctor2D(HydroParams params,
+                            DataArray2d Udata,
+                            DataArray2d Qdata,
+                            DataArray2d ElecField)
+    : MHDBaseFunctor2D(params)
+    , Udata(Udata)
+    , Qdata(Qdata)
+    , ElecField(ElecField){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params, DataArray2d Udata, DataArray2d Qdata, DataArray2d ElecField)
+  {
+    ComputeElecFieldFunctor2D functor(params, Udata, Qdata, ElecField);
+    Kokkos::parallel_for(
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }), functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+
+    // clang-format off
+    if (j > 0 and j < jsize  and
+        i > 0 and i < isize)
+    {
+
+      // compute Ez
+
+      // clang-format off
+      const real_t u = ONE_FOURTH_F * (Qdata(i - 1, j - 1, IU) +
+                                       Qdata(i - 1, j    , IU) +
+                                       Qdata(i    , j - 1, IU) +
+                                       Qdata(i    , j    , IU));
+
+      const real_t v = ONE_FOURTH_F * (Qdata(i - 1, j - 1, IV) +
+                                       Qdata(i - 1, j    , IV) +
+                                       Qdata(i    , j - 1, IV) +
+                                       Qdata(i    , j    , IV));
+
+      const real_t A = HALF_F * (Udata(i    , j - 1, IA) + Udata(i, j, IA));
+      const real_t B = HALF_F * (Udata(i - 1, j    , IB) + Udata(i, j, IB));
+      // clang-format on
+
+      ElecField(i, j, 0) = u * B - v * A;
+    }
+    // clang-format on
+
+  } // operator ()
+
+  DataArray2d Udata;
+  DataArray2d Qdata;
+  DataArray2d ElecField;
+
+}; // ComputeElecFieldFunctor2D
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeSourceFaceMagFunctor2D : public MHDBaseFunctor2D
+{
+
+public:
+  ComputeSourceFaceMagFunctor2D(HydroParams params,
+                                DataArray2d ElecField,
+                                DataArray2d sFaceMag,
+                                real_t      dtdx,
+                                real_t      dtdy)
+    : MHDBaseFunctor2D(params)
+    , ElecField(ElecField)
+    , sFaceMag(sFaceMag)
+    , dtdx(dtdx)
+    , dtdy(dtdy){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params, DataArray2d ElecField, DataArray2d sFaceMag, real_t dtdx, real_t dtdy)
+  {
+    ComputeSourceFaceMagFunctor2D functor(params, ElecField, sFaceMag, dtdx, dtdy);
+    Kokkos::parallel_for(
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }), functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+
+    // clang-format off
+    if (j > 0 and j < jsize-1  and
+        i > 0 and i < isize-1)
+    {
+
+      // sAL0 = +(ELR - ELL) * 0.5 * dtdy
+      sFaceMag(i, j, IX) = (ElecField(i    , j + 1, 0) - ElecField(i, j, 0)) * 0.5 * dtdy;
+
+      // sBL0 = -(ERL - ELL) * 0.5 * dtdx;
+      sFaceMag(i, j, IY) = -(ElecField(i + 1, j    , 0) - ElecField(i, j, 0)) * 0.5 * dtdx;
+
+    }
+    // clang-format on
+
+  } // operator ()
+
+  DataArray2d ElecField;
+  DataArray2d sFaceMag;
+  real_t      dtdx, dtdy;
+
+}; // ComputeSourceFaceMagFunctor2D
+
 /*************************************************/
 /*************************************************/
 /*************************************************/
@@ -196,8 +409,10 @@ class ComputeFluxesAndStoreFunctor2D_MHD : public MHDBaseFunctor2D
   {
     ComputeFluxesAndStoreFunctor2D_MHD functor(
       params, Qm_x, Qm_y, Qp_x, Qp_y, Flux_x, Flux_y, dtdx, dtdy);
-    Kokkos::parallel_for("ComputeFluxesAndStoreFunctor2D_MHD",
-      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }), functor);
+    Kokkos::parallel_for(
+      "ComputeFluxesAndStoreFunctor2D_MHD",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
   }
 
   KOKKOS_INLINE_FUNCTION
@@ -208,8 +423,9 @@ class ComputeFluxesAndStoreFunctor2D_MHD : public MHDBaseFunctor2D
     const int jsize = params.jsize;
     const int ghostWidth = params.ghostWidth;
 
-    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and i >= ghostWidth and
-        i < isize - ghostWidth + 1)
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
     {
 
       MHDState qleft, qright;
@@ -219,7 +435,7 @@ class ComputeFluxesAndStoreFunctor2D_MHD : public MHDBaseFunctor2D
       // Solve Riemann problem at X-interfaces and compute X-fluxes
       //
       get_state(Qm_x, i - 1, j, qleft);
-      get_state(Qp_x, i, j, qright);
+      get_state(Qp_x, i    , j, qright);
 
       // compute hydro flux along X
       riemann_mhd(qleft, qright, flux, params);
@@ -230,13 +446,13 @@ class ComputeFluxesAndStoreFunctor2D_MHD : public MHDBaseFunctor2D
       //
       // Solve Riemann problem at Y-interfaces and compute Y-fluxes
       //
-      get_state(Qm_y, i, j - 1, qleft);
+      get_state(Qm_y, i    , j - 1, qleft);
       swapValues(&(qleft[IU]), &(qleft[IV]));
-      swapValues(&(qleft[IBX]), &(qleft[IBY]));
+      swapValues(&(qleft[IA]), &(qleft[IB]));
 
-      get_state(Qp_y, i, j, qright);
+      get_state(Qp_y, i    , j    , qright);
       swapValues(&(qright[IU]), &(qright[IV]));
-      swapValues(&(qright[IBX]), &(qright[IBY]));
+      swapValues(&(qright[IA]), &(qright[IB]));
 
       // compute hydro flux along Y
       riemann_mhd(qleft, qright, flux, params);
@@ -244,6 +460,7 @@ class ComputeFluxesAndStoreFunctor2D_MHD : public MHDBaseFunctor2D
       // store fluxes
       set_state(Fluxes_y, i, j, flux);
     }
+    // clang-format on
   }
 
   DataArray2d Qm_x, Qm_y, Qp_x, Qp_y;
@@ -252,6 +469,143 @@ class ComputeFluxesAndStoreFunctor2D_MHD : public MHDBaseFunctor2D
 
 }; // ComputeFluxesAndStoreFunctor2D_MHD
 
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeFluxesAndUpdateFunctor2D_MHD : public MHDBaseFunctor2D
+{
+
+public:
+  ComputeFluxesAndUpdateFunctor2D_MHD(HydroParams params,
+                                      DataArray2d Qm_x,
+                                      DataArray2d Qm_y,
+                                      DataArray2d Qp_x,
+                                      DataArray2d Qp_y,
+                                      DataArray2d Udata,
+                                      real_t      dtdx,
+                                      real_t      dtdy)
+    : MHDBaseFunctor2D(params)
+    , Qm_x(Qm_x)
+    , Qm_y(Qm_y)
+    , Qp_x(Qp_x)
+    , Qp_y(Qp_y)
+    , Udata(Udata)
+    , dtdx(dtdx)
+    , dtdy(dtdy){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray2d Qm_x,
+        DataArray2d Qm_y,
+        DataArray2d Qp_x,
+        DataArray2d Qp_y,
+        DataArray2d Udata,
+        real_t      dtdx,
+        real_t      dtdy)
+  {
+    ComputeFluxesAndUpdateFunctor2D_MHD functor(params, Qm_x, Qm_y, Qp_x, Qp_y, Udata, dtdx, dtdy);
+    Kokkos::parallel_for(
+      "ComputeFluxesAndUpdateFunctor2D_MHD",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+
+      MHDState qleft, qright;
+      MHDState flux;
+
+      //
+      // Solve Riemann problem at X-interfaces and compute X-fluxes
+      //
+      get_state(Qm_x, i - 1, j, qleft);
+      get_state(Qp_x, i, j, qright);
+
+      // compute hydro flux along X
+      riemann_mhd(qleft, qright, flux, params);
+
+      //
+      // Update with fluxes along X
+      //
+      if (j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i, j, ID), flux[ID] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, IP), flux[IP] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, IU), flux[IU] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, IV), flux[IV] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, IW), flux[IW] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, IC), flux[IC] * dtdx);
+      }
+
+      if (j < jsize - ghostWidth and i > ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i - 1, j, ID), flux[ID] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, IP), flux[IP] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, IU), flux[IU] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, IV), flux[IV] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, IW), flux[IW] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, IC), flux[IC] * dtdx);
+      }
+
+      //
+      // Solve Riemann problem at Y-interfaces and compute Y-fluxes
+      //
+      get_state(Qm_y, i, j - 1, qleft);
+      swapValues(&(qleft[IU]), &(qleft[IV]));
+      swapValues(&(qleft[IA]), &(qleft[IB]));
+
+      get_state(Qp_y, i, j, qright);
+      swapValues(&(qright[IU]), &(qright[IV]));
+      swapValues(&(qright[IA]), &(qright[IB]));
+
+      // compute hydro flux along Y
+      riemann_mhd(qleft, qright, flux, params);
+
+      swapValues(&(flux[IU]), &(flux[IV]));
+      swapValues(&(flux[IA]), &(flux[IB]));
+
+      //
+      // update with fluxes Y
+      //
+      if (j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i, j, ID), flux[ID] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, IP), flux[IP] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, IU), flux[IU] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, IV), flux[IV] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, IW), flux[IW] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, IC), flux[IC] * dtdy);
+      }
+      if (j > ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i, j - 1, ID), flux[ID] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, IP), flux[IP] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, IU), flux[IU] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, IV), flux[IV] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, IW), flux[IW] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, IC), flux[IC] * dtdy);
+      }
+    }
+  }
+
+  DataArray2d Qm_x, Qm_y, Qp_x, Qp_y, Udata;
+  real_t      dtdx, dtdy;
+
+}; // ComputeFluxesAndUpdateFunctor2D_MHD
+
 /*************************************************/
 /*************************************************/
 /*************************************************/
@@ -289,8 +643,10 @@ class ComputeEmfAndStoreFunctor2D : public MHDBaseFunctor2D
   {
     ComputeEmfAndStoreFunctor2D functor(
       params, QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, Emf, dtdx, dtdy);
-    Kokkos::parallel_for("ComputeEmfAndStoreFunctor2D",
-      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }), functor);
+    Kokkos::parallel_for(
+      "ComputeEmfAndStoreFunctor2D",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
   }
 
   KOKKOS_INLINE_FUNCTION
@@ -301,22 +657,21 @@ class ComputeEmfAndStoreFunctor2D : public MHDBaseFunctor2D
     const int jsize = params.jsize;
     const int ghostWidth = params.ghostWidth;
 
-    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and i >= ghostWidth and
-        i < isize - ghostWidth + 1)
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
     {
 
       // in 2D, we only need to compute emfZ
       MHDState qEdge_emfZ[4];
 
-      // preparation for calling compute_emf (equivalent to cmp_mag_flx
-      // in DUMSES)
-      // in the following, the 2 first indexes in qEdge_emf array play
-      // the same offset role as in the calling argument of cmp_mag_flx
-      // in DUMSES (if you see what I mean ?!)
+      // clang-format off
       get_state(QEdge_RT, i - 1, j - 1, qEdge_emfZ[IRT]);
-      get_state(QEdge_RB, i - 1, j, qEdge_emfZ[IRB]);
-      get_state(QEdge_LT, i, j - 1, qEdge_emfZ[ILT]);
-      get_state(QEdge_LB, i, j, qEdge_emfZ[ILB]);
+      get_state(QEdge_RB, i - 1, j    , qEdge_emfZ[IRB]);
+      get_state(QEdge_LT, i    , j - 1, qEdge_emfZ[ILT]);
+      get_state(QEdge_LB, i    , j    , qEdge_emfZ[ILB]);
+      // clang-format on
 
       // actually compute emfZ
       real_t emfZ = compute_emf<EMFZ>(qEdge_emfZ, params);
@@ -330,6 +685,96 @@ class ComputeEmfAndStoreFunctor2D : public MHDBaseFunctor2D
 
 }; // ComputeEmfAndStoreFunctor2D
 
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeEmfAndUpdateFunctor2D : public MHDBaseFunctor2D
+{
+
+public:
+  ComputeEmfAndUpdateFunctor2D(HydroParams params,
+                               DataArray2d QEdge_RT,
+                               DataArray2d QEdge_RB,
+                               DataArray2d QEdge_LT,
+                               DataArray2d QEdge_LB,
+                               DataArray2d Udata,
+                               real_t      dtdx,
+                               real_t      dtdy)
+    : MHDBaseFunctor2D(params)
+    , QEdge_RT(QEdge_RT)
+    , QEdge_RB(QEdge_RB)
+    , QEdge_LT(QEdge_LT)
+    , QEdge_LB(QEdge_LB)
+    , Udata(Udata)
+    , dtdx(dtdx)
+    , dtdy(dtdy){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray2d QEdge_RT,
+        DataArray2d QEdge_RB,
+        DataArray2d QEdge_LT,
+        DataArray2d QEdge_LB,
+        DataArray2d Udata,
+        real_t      dtdx,
+        real_t      dtdy)
+  {
+    ComputeEmfAndUpdateFunctor2D functor(
+      params, QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, Udata, dtdx, dtdy);
+    Kokkos::parallel_for(
+      "ComputeEmfAndUpdateFunctor2D",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+
+      // in 2D, we only need to compute emfZ
+      MHDState qEdge_emfZ[4];
+
+      // preparation for calling compute_emf (equivalent to cmp_mag_flx
+      // in DUMSES)
+      // in the following, the 2 first indexes in qEdge_emf array play
+      // the same offset role as in the calling argument of cmp_mag_flx
+      // in DUMSES (if you see what I mean ?!)
+
+      // clang-format off
+      get_state(QEdge_RT, i - 1, j - 1, qEdge_emfZ[IRT]);
+      get_state(QEdge_RB, i - 1, j    , qEdge_emfZ[IRB]);
+      get_state(QEdge_LT, i    , j - 1, qEdge_emfZ[ILT]);
+      get_state(QEdge_LB, i    , j    , qEdge_emfZ[ILB]);
+      // clang-format on
+
+      // actually compute emfZ
+      const real_t emfZ = compute_emf<EMFZ>(qEdge_emfZ, params);
+
+      // clang-format off
+      Kokkos::atomic_sub(&Udata(i    , j    , IA), emfZ * dtdy);
+      Kokkos::atomic_add(&Udata(i    , j    , IB), emfZ * dtdx);
+
+      Kokkos::atomic_add(&Udata(i    , j - 1, IA), emfZ * dtdy);
+      Kokkos::atomic_sub(&Udata(i - 1, j    , IB), emfZ * dtdx);
+      // clang-format on
+    }
+  }
+
+  DataArray2d QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, Udata;
+  real_t      dtdx, dtdy;
+
+}; // ComputeEmfAndUpdateFunctor2D
 
 /*************************************************/
 /*************************************************/
@@ -394,8 +839,10 @@ class ComputeTraceFunctor2D_MHD : public MHDBaseFunctor2D
                                       QEdge_LB,
                                       dtdx,
                                       dtdy);
-    Kokkos::parallel_for("ComputeTraceFunctor2D_MHD",
-      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }), functor);
+    Kokkos::parallel_for(
+      "ComputeTraceFunctor2D_MHD",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
   }
 
   KOKKOS_INLINE_FUNCTION
@@ -406,8 +853,10 @@ class ComputeTraceFunctor2D_MHD : public MHDBaseFunctor2D
     const int jsize = params.jsize;
     const int ghostWidth = params.ghostWidth;
 
-    if (j >= ghostWidth - 2 and j < jsize - ghostWidth + 1 and i >= ghostWidth - 2 and
-        i < isize - ghostWidth + 1)
+    // clang-format off
+    if (j >= ghostWidth - 2 and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth - 2 and i < isize - ghostWidth + 1)
+    // clang-format on
     {
 
       MHDState qNb[3][3];
@@ -461,6 +910,620 @@ class ComputeTraceFunctor2D_MHD : public MHDBaseFunctor2D
 
 }; // ComputeTraceFunctor2D_MHD
 
+/*************************************************/
+/*************************************************/
+/*************************************************/
+/**
+ * Compute cell-centered primitive data at t_{n+1/2} (half time step in
+ * Muscl-Hancock).
+ */
+class ComputeUpdatedPrimVarFunctor2D_MHD : public MHDBaseFunctor2D
+{
+
+public:
+  ComputeUpdatedPrimVarFunctor2D_MHD(HydroParams params,
+                                     DataArray2d Udata,
+                                     DataArray2d Qdata,
+                                     DataArray2d Slopes_x,
+                                     DataArray2d Slopes_y,
+                                     DataArray2d Qdata2,
+                                     real_t      dtdx,
+                                     real_t      dtdy)
+    : MHDBaseFunctor2D(params)
+    , Udata(Udata)
+    , Qdata(Qdata)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y)
+    , Qdata2(Qdata2)
+    , dtdx(dtdx)
+    , dtdy(dtdy){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray2d Udata,
+        DataArray2d Qdata,
+        DataArray2d Slopes_x,
+        DataArray2d Slopes_y,
+        DataArray2d Qdata2,
+        real_t      dtdx,
+        real_t      dtdy)
+  {
+    ComputeUpdatedPrimVarFunctor2D_MHD functor(
+      params, Udata, Qdata, Slopes_x, Slopes_y, Qdata2, dtdx, dtdy);
+    Kokkos::parallel_for(
+      "ComputeUpdatedPrimVarFunctor2D_MHD",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int    isize = params.isize;
+    const int    jsize = params.jsize;
+    const int    ghostWidth = params.ghostWidth;
+    const real_t gamma = params.settings.gamma0;
+
+    // clang-format off
+    if (j >= ghostWidth - 2 and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth - 2 and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+      // get primitive variable in current cell
+      MHDState q;
+      get_state(Qdata, i, j, q);
+
+      MHDState dq[2];
+
+      // retrieve hydro slopes along X
+      get_state(Slopes_x, i, j, dq[IX]);
+
+      // retrieve hydro slopes along Y
+      get_state(Slopes_y, i, j, dq[IY]);
+
+      // Cell centered values
+      auto const & r = q[ID];
+      auto const & p = q[IP];
+      auto const & u = q[IU];
+      auto const & v = q[IV];
+      auto const & w = q[IW];
+      auto const & A = q[IA];
+      auto const & B = q[IB];
+      auto const & C = q[IC];
+
+      // Cell centered TVD slopes in X direction
+      auto const & drx = dq[IX][ID];
+      auto const & dpx = dq[IX][IP];
+      auto const & dux = dq[IX][IU];
+      auto const & dvx = dq[IX][IV];
+      auto const & dwx = dq[IX][IW];
+      auto const & dBx = dq[IX][IB];
+      auto const & dCx = dq[IX][IC];
+
+      // Cell centered TVD slopes in Y direction
+      auto const & dry = dq[IY][ID];
+      auto const & dpy = dq[IY][IP];
+      auto const & duy = dq[IY][IU];
+      auto const & dvy = dq[IY][IV];
+      auto const & dwy = dq[IY][IW];
+      auto const & dAy = dq[IY][IA];
+      auto const & dCy = dq[IY][IC];
+
+      auto const   db = compute_normal_mag_field_slopes(Udata, i, j);
+      auto const & dAx = db[IX];
+      auto const & dBy = db[IY];
+
+      real_t sr0, su0, sv0, sw0, sp0, sA0, sB0, sC0;
+      {
+
+        sr0 = (-u * drx - dux * r) * dtdx + (-v * dry - dvy * r) * dtdy;
+        su0 =
+          (-u * dux - dpx / r - B * dBx / r - C * dCx / r) * dtdx + (-v * duy + B * dAy / r) * dtdy;
+        sv0 =
+          (-u * dvx + A * dBx / r) * dtdx + (-v * dvy - dpy / r - A * dAy / r - C * dCy / r) * dtdy;
+        sw0 = (-u * dwx + A * dCx / r) * dtdx + (-v * dwy + B * dCy / r) * dtdy;
+        sp0 = (-u * dpx - dux * gamma * p) * dtdx + (-v * dpy - dvy * gamma * p) * dtdy;
+        sA0 = (u * dBy + B * duy - v * dAy - A * dvy) * dtdy;
+        sB0 = (-u * dBx - B * dux + v * dAx + A * dvx) * dtdx;
+        sC0 = (w * dAx + A * dwx - u * dCx - C * dux) * dtdx +
+              (-v * dCy - C * dvy + w * dBy + B * dwy) * dtdy;
+      }
+
+      // Update in time the primitive variables (half time step)
+      Qdata2(i, j, ID) = r + 0.5 * sr0;
+      Qdata2(i, j, IU) = u + 0.5 * su0;
+      Qdata2(i, j, IV) = v + 0.5 * sv0;
+      Qdata2(i, j, IW) = w + 0.5 * sw0;
+      Qdata2(i, j, IP) = p + 0.5 * sp0;
+      Qdata2(i, j, IA) = A + 0.5 * sA0;
+      Qdata2(i, j, IB) = B + 0.5 * sB0;
+      Qdata2(i, j, IC) = C + 0.5 * sC0;
+    }
+  } // operator ()
+
+  DataArray2d Udata, Qdata; // input
+  DataArray2d Slopes_x, Slopes_y; // input
+  DataArray2d Qdata2;       // output
+  real_t      dtdx, dtdy;
+
+}; // ComputeUpdatedPrimvarFunctor2D_MHD
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+/**
+ * Reconstruct hydro state at cell face center, solve Riemann problems and update.
+ *
+ * This is done in a direction by direction manner.
+ */
+template <Direction dir>
+class ComputeFluxAndUpdateAlongDirFunctor2D_MHD : public MHDBaseFunctor2D
+{
+
+public:
+  //!
+  //! \param[in] Udata_in is conservative variables at t_n
+  //! \param[in] Udata_out is conservative variables at t_{n+1}
+  //! \param[in] Qdata2 is primitive variables array at t_{n+1/2}
+  //!
+  ComputeFluxAndUpdateAlongDirFunctor2D_MHD(HydroParams params,
+                                            DataArray2d Udata_in,
+                                            DataArray2d Udata_out,
+                                            DataArray2d Qdata2,
+                                            DataArray2d Slopes_x,
+                                            DataArray2d Slopes_y,
+                                            DataArray2d sFaceMag,
+                                            real_t      dtdx,
+                                            real_t      dtdy)
+    : MHDBaseFunctor2D(params)
+    , Udata_in(Udata_in)
+    , Udata_out(Udata_out)
+    , Qdata2(Qdata2)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y)
+    , sFaceMag(sFaceMag)
+    , dtdx(dtdx)
+    , dtdy(dtdy){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray2d Udata_in,
+        DataArray2d Udata_out,
+        DataArray2d Qdata2,
+        DataArray2d Slopes_x,
+        DataArray2d Slopes_y,
+        DataArray2d sFaceMag,
+        real_t      dtdx,
+        real_t      dtdy)
+  {
+    ComputeFluxAndUpdateAlongDirFunctor2D_MHD<dir> functor(
+      params, Udata_in, Udata_out, Qdata2, Slopes_x, Slopes_y, sFaceMag, dtdx, dtdy);
+    Kokkos::parallel_for(
+      "ComputeFluxAndUpdateAlongDirFunctor2D_MHD",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ghostWidth = params.ghostWidth;
+
+    // const real_t gamma = params.settings.gamma0;
+    const real_t smallR = params.settings.smallr;
+    const real_t smallp = params.settings.smallp;
+
+    constexpr auto delta_i = dir == DIR_X ? 1 : 0;
+    constexpr auto delta_j = dir == DIR_Y ? 1 : 0;
+
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+      // left and right reconstructed state (input for Riemann solver)
+      MHDState qR, qL;
+
+      // slopes in left and right cells
+      MHDState dq, dqN;
+
+      //
+      // Right state at left interface (current cell)
+      //
+
+      // load hydro slopes along dir
+      if constexpr (dir == DIR_X)
+      {
+        get_state(Slopes_x, i, j, dq);
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        get_state(Slopes_y, i, j, dq);
+      }
+
+      // get primitive variable in current cell at t_{n+1/2}
+      MHDState q2;
+      get_state(Qdata2, i, j, q2);
+
+      qR[ID] = q2[ID] - 0.5 * dq[ID];
+      qR[IU] = q2[IU] - 0.5 * dq[IU];
+      qR[IV] = q2[IV] - 0.5 * dq[IV];
+      qR[IW] = q2[IW] - 0.5 * dq[IW];
+      qR[IP] = q2[IP] - 0.5 * dq[IP];
+      qR[ID] = fmax(smallR, qR[ID]);
+      qR[IP] = fmax(smallp * qR[ID], qR[IP]);
+      if constexpr (dir == DIR_X)
+      {
+        const real_t AL = Udata_in(i, j, IA) + sFaceMag(i, j, IX);
+        qR[IA] = AL;
+        qR[IB] = q2[IB] - 0.5 * dq[IB];
+        qR[IC] = q2[IC] - 0.5 * dq[IC];
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        const real_t BL = Udata_in(i, j, IB) + sFaceMag(i, j, IY);
+        qR[IA] = q2[IA] - 0.5 * dq[IA];
+        qR[IB] = BL;
+        qR[IC] = q2[IC] - 0.5 * dq[IC];
+      }
+
+      //
+      // Left state at right interface (neighbor cell)
+      //
+
+      // load hydro slopes along dir
+      if constexpr (dir == DIR_X)
+      {
+        get_state(Slopes_x, i - delta_i, j - delta_j, dqN);
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        get_state(Slopes_y, i - delta_i, j - delta_j, dqN);
+      }
+
+      // get primitive variable in neighbor cell at t_{n+1/2}
+      MHDState q2N;
+      get_state(Qdata2, i - delta_i, j - delta_j, q2N);
+
+      qL[ID] = q2N[ID] + 0.5 * dqN[ID];
+      qL[IU] = q2N[IU] + 0.5 * dqN[IU];
+      qL[IV] = q2N[IV] + 0.5 * dqN[IV];
+      qL[IW] = q2N[IW] + 0.5 * dqN[IW];
+      qL[IP] = q2N[IP] + 0.5 * dqN[IP];
+      qL[ID] = fmax(smallR, qL[ID]);
+      qL[IP] = fmax(smallp * qL[ID], qL[IP]);
+      if constexpr (dir == DIR_X)
+      {
+        qL[IA] = qR[IA];
+        qL[IB] = q2N[IB] + 0.5 * dqN[IB];
+        qL[IC] = q2N[IC] + 0.5 * dqN[IC];
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        qL[IA] = q2N[IA] + 0.5 * dqN[IA];
+        qL[IB] = qR[IB];
+        qL[IC] = q2N[IC] + 0.5 * dqN[IC];
+      }
+
+      // now we are ready for computing hydro flux
+      MHDState flux;
+
+      if constexpr (dir == DIR_Y)
+      {
+        swapValues(&(qL[IU]), &(qL[IV]));
+        swapValues(&(qL[IA]), &(qL[IB]));
+        swapValues(&(qR[IU]), &(qR[IV]));
+        swapValues(&(qR[IA]), &(qR[IB]));
+      }
+      riemann_mhd(qL, qR, flux, params);
+      if constexpr (dir == DIR_Y)
+      {
+        swapValues(&(flux[IU]), &(flux[IV]));
+        swapValues(&(flux[IA]), &(flux[IB]));
+      }
+
+      if constexpr (dir == DIR_X)
+      {
+        if (j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i, j, ID), flux[ID] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, IP), flux[IP] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, IU), flux[IU] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, IV), flux[IV] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, IW), flux[IW] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, IC), flux[IC] * dtdx);
+        }
+
+        if (j < jsize - ghostWidth and i > ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, ID), flux[ID] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, IP), flux[IP] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, IU), flux[IU] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, IV), flux[IV] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, IW), flux[IW] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, IC), flux[IC] * dtdx);
+        }
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        if (j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i, j, ID), flux[ID] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, IP), flux[IP] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, IU), flux[IU] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, IV), flux[IV] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, IW), flux[IW] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, IC), flux[IC] * dtdy);
+        }
+        if (j > ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, ID), flux[ID] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, IP), flux[IP] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, IU), flux[IU] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, IV), flux[IV] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, IW), flux[IW] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, IC), flux[IC] * dtdy);
+        }
+      }
+    }
+  } // operator ()
+
+  DataArray2d Udata_in, Udata_out;
+  DataArray2d Qdata2;
+  DataArray2d Slopes_x, Slopes_y;
+  DataArray2d sFaceMag;
+  real_t      dtdx, dtdy;
+
+}; // ComputeFluxAndUpdateAlongDirFunctor2D_MHD
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+/**
+ * Reconstruct hydrodynamics variables and magnetic field at edge center, compute Emf and update.
+ */
+class ReconstructEdgeComputeEmfAndUpdateFunctor2D : public MHDBaseFunctor2D
+{
+
+public:
+  //!
+  //! \param[in] Udata_in is conservative variables at t_n
+  //! \param[in] Udata_out is conservative variables at t_{n+1}
+  //! \param[in] Qdata2 is primitive variables array at t_{n+1/2}
+  //!
+  ReconstructEdgeComputeEmfAndUpdateFunctor2D(HydroParams params,
+                                              DataArray2d Udata_in,
+                                              DataArray2d Udata_out,
+                                              DataArray2d Qdata2,
+                                              DataArray2d Slopes_x,
+                                              DataArray2d Slopes_y,
+                                              DataArray2d sFaceMag,
+                                              real_t      dtdx,
+                                              real_t      dtdy)
+    : MHDBaseFunctor2D(params)
+    , Udata_in(Udata_in)
+    , Udata_out(Udata_out)
+    , Qdata2(Qdata2)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y)
+    , sFaceMag(sFaceMag)
+    , dtdx(dtdx)
+    , dtdy(dtdy){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray2d Udata_in,
+        DataArray2d Udata_out,
+        DataArray2d Qdata2,
+        DataArray2d Slopes_x,
+        DataArray2d Slopes_y,
+        DataArray2d sFaceMag,
+        real_t      dtdx,
+        real_t      dtdy)
+  {
+    ReconstructEdgeComputeEmfAndUpdateFunctor2D functor(
+      params, Udata_in, Udata_out, Qdata2, Slopes_x, Slopes_y, sFaceMag, dtdx, dtdy);
+    Kokkos::parallel_for(
+      "ReconstructEdgeComputeEmfAndUpdateFunctor2D",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION auto
+  reconstruct_state_at_edge(MHDState &      qEdge,
+                            uint32_t        i0,
+                            uint32_t        j0,
+                            MHDEdgeLocation edge_loc) const
+  {
+
+    // const real_t gamma = params.settings.gamma0;
+    const real_t smallR = params.settings.smallr;
+    const real_t smallp = params.settings.smallp;
+
+    int sign_x = 1;
+    int sign_y = 1;
+    int ia = 0;
+    int ja = 0;
+    int ib = 0;
+    int jb = 0;
+    int sign_a = 1;
+    int sign_b = 1;
+
+    if (edge_loc == MHDEdgeLocation::LB)
+    {
+      ia = 0;
+      ja = 0;
+      ib = 0;
+      jb = 0;
+      sign_x = -1;
+      sign_y = -1;
+      sign_a = -1;
+      sign_b = -1;
+    }
+    else if (edge_loc == MHDEdgeLocation::RT)
+    {
+      ia = 1;
+      ja = 0;
+      ib = 0;
+      jb = 1;
+      sign_x = 1;
+      sign_y = 1;
+      sign_a = 1;
+      sign_b = 1;
+    }
+    else if (edge_loc == MHDEdgeLocation::RB)
+    {
+      ia = 1;
+      ja = 0;
+      ib = 0;
+      jb = 0;
+      sign_x = 1;
+      sign_y = -1;
+      sign_a = -1;
+      sign_b = 1;
+    }
+    else if (edge_loc == MHDEdgeLocation::LT)
+    {
+      ia = 0;
+      ja = 0;
+      ib = 0;
+      jb = 1;
+      sign_x = -1;
+      sign_y = 1;
+      sign_a = 1;
+      sign_b = -1;
+    }
+
+    const real_t A = Udata_in(i0 + ia, j0 + ja, IA) + sFaceMag(i0, j0, IX);
+    const real_t dAy = compute_limited_slope<DIR_Y>(Udata_in, i0 + ia, j0 + ja, IA);
+
+    const real_t B = Udata_in(i0 + ib, j0 + jb, IB) + sFaceMag(i0, j0, IY);
+    const real_t dBx = compute_limited_slope<DIR_X>(Udata_in, i0 + ib, j0 + jb, IB);
+
+    // get limited slopes
+    MHDState dqX, dqY;
+    get_state(Slopes_x, i0, j0, dqX);
+    get_state(Slopes_y, i0, j0, dqY);
+
+    // reconstructed state
+    qEdge[ID] += 0.5 * (sign_x * dqX[ID] + sign_y * dqY[ID]);
+    qEdge[IU] += 0.5 * (sign_x * dqX[IU] + sign_y * dqY[IU]);
+    qEdge[IV] += 0.5 * (sign_x * dqX[IV] + sign_y * dqY[IV]);
+    qEdge[IW] += 0.5 * (sign_x * dqX[IW] + sign_y * dqY[IW]);
+    qEdge[IP] += 0.5 * (sign_x * dqX[IP] + sign_y * dqY[IP]);
+    qEdge[IA] = A + 0.5 * (sign_a * dAy);
+    qEdge[IB] = B + 0.5 * (sign_b * dBx);
+    qEdge[IC] += 0.5 * (sign_x * dqX[IC] + sign_y * dqY[IC]);
+    qEdge[ID] = fmax(smallR, qEdge[ID]);
+    qEdge[IP] = fmax(smallp * qEdge[ID], qEdge[IP]);
+
+  } // reconstruct_state_at_edge
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+      // this drawing is a simple helper to reminder how reconstruction is done
+      //
+      // symbols LB, RB, LT, RT indicate location of reconstructed edge from the cell center.
+      //
+      //
+      //  X locates the edge where hydrodynamics values must be reconstructed
+      //    _________________________
+      //   |           |            |
+      //   |           |            |
+      //   |     RB    |     LB     |
+      //   |  (i-1,j)  |   (i,j)    |
+      //   |          \| /          |
+      //   |___________X____________|
+      //   |          /| \          |
+      //   |        /  |  \         |
+      //   |     RT    |     LT     |
+      //   | (i-1,j-1) |   (i,j-1)  |
+      //   |           |            |
+      //   |___________|____________|
+      //
+
+
+      // qLB is current cell, qLT, qRT and qRB are direct neighbors surrounding the lower left
+      // edge MHDState qLB, qLT, qRB, qRT;
+      MHDState   qEdge_emfZ[4];
+      MHDState & qRT = qEdge_emfZ[IRT];
+      MHDState & qLT = qEdge_emfZ[ILT];
+      MHDState & qRB = qEdge_emfZ[IRB];
+      MHDState & qLB = qEdge_emfZ[ILB];
+
+      // get primitive variable in current cell and neighbors at t_{n+1/2}
+      // clang-format off
+      get_state(Qdata2, i    , j    , qLB);
+      get_state(Qdata2, i    , j - 1, qLT);
+      get_state(Qdata2, i - 1, j    , qRB);
+      get_state(Qdata2, i - 1, j - 1, qRT);
+      // clang-format on
+
+      // LB at (i,j)
+      {
+        const auto i0 = i;
+        const auto j0 = j;
+        reconstruct_state_at_edge(qLB, i0, j0, MHDEdgeLocation::LB);
+      }
+
+      // RT (i-1, j-1)
+      {
+        const auto i0 = i - 1;
+        const auto j0 = j - 1;
+        reconstruct_state_at_edge(qRT, i0, j0, MHDEdgeLocation::RT);
+      }
+
+      // RB (i-1,j)
+      {
+        const auto i0 = i - 1;
+        const auto j0 = j;
+        reconstruct_state_at_edge(qRB, i0, j0, MHDEdgeLocation::RB);
+      }
+
+      // LT (i,j-1)
+      {
+        const auto i0 = i;
+        const auto j0 = j - 1;
+        reconstruct_state_at_edge(qLT, i0, j0, MHDEdgeLocation::LT);
+      }
+
+      const real_t emfZ = compute_emf<EMFZ>(qEdge_emfZ, params);
+
+      // clang-format off
+      Kokkos::atomic_sub(&Udata_out(i    , j    , IA), emfZ * dtdy);
+      Kokkos::atomic_add(&Udata_out(i    , j    , IB), emfZ * dtdx);
+
+      Kokkos::atomic_add(&Udata_out(i    , j - 1, IA), emfZ * dtdy);
+      Kokkos::atomic_sub(&Udata_out(i - 1, j    , IB), emfZ * dtdx);
+      // clang-format on
+    }
+  } // operator ()
+
+  DataArray2d Udata_in, Udata_out;
+  DataArray2d Qdata2;
+  DataArray2d Slopes_x, Slopes_y;
+  DataArray2d sFaceMag;
+  real_t      dtdx, dtdy;
+
+}; // ReconstructEdgeComputeEmfAndUpdateFunctor2D
 
 /*************************************************/
 /*************************************************/
@@ -492,8 +1555,10 @@ class UpdateFunctor2D_MHD : public MHDBaseFunctor2D
         real_t      dtdy)
   {
     UpdateFunctor2D_MHD functor(params, Udata, FluxData_x, FluxData_y, dtdx, dtdy);
-    Kokkos::parallel_for("UpdateFunctor2D_MHD",
-      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }), functor);
+    Kokkos::parallel_for(
+      "UpdateFunctor2D_MHD",
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>>({ 0, 0 }, { params.isize, params.jsize }),
+      functor);
   }
 
   KOKKOS_INLINE_FUNCTION
@@ -519,9 +1584,9 @@ class UpdateFunctor2D_MHD : public MHDBaseFunctor2D
       udata[IU] += flux[IU] * dtdx;
       udata[IV] += flux[IV] * dtdx;
       udata[IW] += flux[IW] * dtdx;
-      // udata[IBX] +=  flux[IBX]*dtdx;
-      // udata[IBY] +=  flux[IBY]*dtdx;
-      udata[IBZ] += flux[IBZ] * dtdx;
+      // udata[IA] +=  flux[IA]*dtdx;
+      // udata[IB] +=  flux[IB]*dtdx;
+      udata[IC] += flux[IC] * dtdx;
 
       get_state(FluxData_x, i + 1, j, flux);
       udata[ID] -= flux[ID] * dtdx;
@@ -529,9 +1594,9 @@ class UpdateFunctor2D_MHD : public MHDBaseFunctor2D
       udata[IU] -= flux[IU] * dtdx;
       udata[IV] -= flux[IV] * dtdx;
       udata[IW] -= flux[IW] * dtdx;
-      // udata[IBX] -=  flux[IBX]*dtdx;
-      // udata[IBY] -=  flux[IBY]*dtdx;
-      udata[IBZ] -= flux[IBZ] * dtdx;
+      // udata[IA] -=  flux[IA]*dtdx;
+      // udata[IB] -=  flux[IB]*dtdx;
+      udata[IC] -= flux[IC] * dtdx;
 
       get_state(FluxData_y, i, j, flux);
       udata[ID] += flux[ID] * dtdy;
@@ -539,9 +1604,9 @@ class UpdateFunctor2D_MHD : public MHDBaseFunctor2D
       udata[IU] += flux[IV] * dtdy; //
       udata[IV] += flux[IU] * dtdy; //
       udata[IW] += flux[IW] * dtdy;
-      // udata[IBX] +=  flux[IBX]*dtdy;
-      // udata[IBY] +=  flux[IBY]*dtdy;
-      udata[IBZ] += flux[IBZ] * dtdy;
+      // udata[IA] +=  flux[IA]*dtdy;
+      // udata[IB] +=  flux[IB]*dtdy;
+      udata[IC] += flux[IC] * dtdy;
 
       get_state(FluxData_y, i, j + 1, flux);
       udata[ID] -= flux[ID] * dtdy;
@@ -549,9 +1614,9 @@ class UpdateFunctor2D_MHD : public MHDBaseFunctor2D
       udata[IU] -= flux[IV] * dtdy; //
       udata[IV] -= flux[IU] * dtdy; //
       udata[IW] -= flux[IW] * dtdy;
-      // udata[IBX] -=  flux[IBX]*dtdy;
-      // udata[IBY] -=  flux[IBY]*dtdy;
-      udata[IBZ] -= flux[IBZ] * dtdy;
+      // udata[IA] -=  flux[IA]*dtdy;
+      // udata[IB] -=  flux[IB]*dtdy;
+      udata[IC] -= flux[IC] * dtdy;
 
       // write back result in Udata
       set_state(Udata, i, j, udata);
@@ -602,8 +1667,10 @@ class UpdateEmfFunctor2D : public MHDBaseFunctor2D
     const int jsize = params.jsize;
     const int ghostWidth = params.ghostWidth;
 
-    if (j >= ghostWidth and j < jsize - ghostWidth /*+1*/ and i >= ghostWidth and
-        i < isize - ghostWidth /*+1*/)
+    // clang-format off
+    if (j >= ghostWidth and j < jsize - ghostWidth /*+1*/ and
+        i >= ghostWidth and i < isize - ghostWidth /*+1*/)
+    // clang-format on
     {
 
       // MHDState udata;
@@ -653,7 +1720,10 @@ class ComputeTraceAndFluxes_Functor2D_MHD : public MHDBaseFunctor2D
     const int jsize = params.jsize;
     const int ghostWidth = params.ghostWidth;
 
-    if (j >= ghostWidth and j <= jsize - ghostWidth and i >= ghostWidth and i <= isize - ghostWidth)
+    // clang-format off
+    if (j >= ghostWidth and j <= jsize - ghostWidth and
+        i >= ghostWidth and i <= isize - ghostWidth)
+    // clang-format on
     {
 
       // local primitive variables
diff --git a/src/muscl/MHDRunFunctors3D.h b/src/muscl/MHDRunFunctors3D.h
index 97d0edd..d3440b4 100644
--- a/src/muscl/MHDRunFunctors3D.h
+++ b/src/muscl/MHDRunFunctors3D.h
@@ -5,10 +5,6 @@
 #include "MHDBaseFunctor3D.h"
 #include "shared/RiemannSolvers_MHD.h"
 
-#ifndef SQR
-#  define SQR(x) ((x) * (x))
-#endif
-
 namespace ppkMHD
 {
 namespace muscl
@@ -50,8 +46,11 @@ class ComputeDtFunctor3D_MHD : public MHDBaseFunctor3D
     const real_t dy = params.dy;
     const real_t dz = params.dz;
 
-    if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and
-        j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth)
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth and
+        j >= ghostWidth and j < jsize - ghostWidth and
+        i >= ghostWidth and i < isize - ghostWidth)
+    // clang-format on
     {
 
       MHDState qLoc; // primitive    variables in current cell
@@ -62,9 +61,9 @@ class ComputeDtFunctor3D_MHD : public MHDBaseFunctor3D
       qLoc[IU] = Qdata(i, j, k, IU);
       qLoc[IV] = Qdata(i, j, k, IV);
       qLoc[IW] = Qdata(i, j, k, IW);
-      qLoc[IBX] = Qdata(i, j, k, IBX);
-      qLoc[IBY] = Qdata(i, j, k, IBY);
-      qLoc[IBZ] = Qdata(i, j, k, IBZ);
+      qLoc[IA] = Qdata(i, j, k, IA);
+      qLoc[IB] = Qdata(i, j, k, IB);
+      qLoc[IC] = Qdata(i, j, k, IC);
 
       // compute fastest information speeds
       real_t fastInfoSpeed[3];
@@ -117,10 +116,14 @@ class ConvertToPrimitivesFunctor3D_MHD : public MHDBaseFunctor3D
     // magnetic field in neighbor cells
     real_t magFieldNeighbors[3];
 
-    if (k >= 0 and k < ksize - 1 and j >= 0 and j < jsize - 1 and i >= 0 and i < isize - 1)
+    // clang-format off
+    if (k >= 0 and k < ksize - 1 and
+        j >= 0 and j < jsize - 1 and
+        i >= 0 and i < isize - 1)
+    // clang-format on
     {
 
-      MHDState uLoc; // conservative    variables in current cell
+      MHDState uLoc; // conservative variables in current cell
       MHDState qLoc; // primitive    variables in current cell
       real_t   c;
 
@@ -130,14 +133,14 @@ class ConvertToPrimitivesFunctor3D_MHD : public MHDBaseFunctor3D
       uLoc[IU] = Udata(i, j, k, IU);
       uLoc[IV] = Udata(i, j, k, IV);
       uLoc[IW] = Udata(i, j, k, IW);
-      uLoc[IBX] = Udata(i, j, k, IBX);
-      uLoc[IBY] = Udata(i, j, k, IBY);
-      uLoc[IBZ] = Udata(i, j, k, IBZ);
+      uLoc[IA] = Udata(i, j, k, IA);
+      uLoc[IB] = Udata(i, j, k, IB);
+      uLoc[IC] = Udata(i, j, k, IC);
 
       // get mag field in neighbor cells
-      magFieldNeighbors[IX] = Udata(i + 1, j, k, IBX);
-      magFieldNeighbors[IY] = Udata(i, j + 1, k, IBY);
-      magFieldNeighbors[IZ] = Udata(i, j, k + 1, IBZ);
+      magFieldNeighbors[IX] = Udata(i + 1, j, k, IA);
+      magFieldNeighbors[IY] = Udata(i, j + 1, k, IB);
+      magFieldNeighbors[IZ] = Udata(i, j, k + 1, IC);
 
       // get primitive variables in current cell
       constoprim_mhd(uLoc, magFieldNeighbors, c, qLoc);
@@ -148,9 +151,9 @@ class ConvertToPrimitivesFunctor3D_MHD : public MHDBaseFunctor3D
       Qdata(i, j, k, IU) = qLoc[IU];
       Qdata(i, j, k, IV) = qLoc[IV];
       Qdata(i, j, k, IW) = qLoc[IW];
-      Qdata(i, j, k, IBX) = qLoc[IBX];
-      Qdata(i, j, k, IBY) = qLoc[IBY];
-      Qdata(i, j, k, IBZ) = qLoc[IBZ];
+      Qdata(i, j, k, IA) = qLoc[IA];
+      Qdata(i, j, k, IB) = qLoc[IB];
+      Qdata(i, j, k, IC) = qLoc[IC];
     }
   }
 
@@ -159,6 +162,116 @@ class ConvertToPrimitivesFunctor3D_MHD : public MHDBaseFunctor3D
 
 }; // ConvertToPrimitivesFunctor3D_MHD
 
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeSlopesFunctor3D_MHD : public MHDBaseFunctor3D
+{
+
+public:
+  /**
+   * Compute limited slopes of primitives variables at cell center.
+   *
+   * Magnetic field slopes are computed for transverse component at cell center, and for normal
+   * component at cell faces.
+   *
+   * \note Normal magnetic field component slopes are not limited.
+   *
+   * \param[in] Udata conservative variables
+   * \param[in] Qdata primitive variables
+   * \param[out] Slopes_x limited slopes along direction X
+   * \param[out] Slopes_y limited slopes along direction Y
+   * \param[out] Slopes_z limited slopes along direction Z
+   */
+  ComputeSlopesFunctor3D_MHD(HydroParams params,
+                             DataArray3d Udata,
+                             DataArray3d Qdata,
+                             DataArray3d Slopes_x,
+                             DataArray3d Slopes_y,
+                             DataArray3d Slopes_z)
+    : MHDBaseFunctor3D(params)
+    , Udata(Udata)
+    , Qdata(Qdata)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y)
+    , Slopes_z(Slopes_z){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray3d Udata,
+        DataArray3d Qdata,
+        DataArray3d Slopes_x,
+        DataArray3d Slopes_y,
+        DataArray3d Slopes_z)
+  {
+    ComputeSlopesFunctor3D_MHD functor(params, Udata, Qdata, Slopes_x, Slopes_y, Slopes_z);
+    Kokkos::parallel_for("ComputeSlopesFunctor3D_MHD",
+                         Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+                           { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
+                         functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j, const int & k) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ksize = params.ksize;
+    // const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (k >= 1 and k < ksize - 1 and
+        j >= 1 and j < jsize - 1 and
+        i >= 1 and i < isize - 1)
+    // clang-format on
+    {
+      MHDState qc, qm, qp;
+      get_state(Qdata, i, j, k, qc);
+
+      MHDState dq;
+
+      //
+      // slopes along X
+      //
+      get_state(Qdata, i - 1, j, k, qm);
+      get_state(Qdata, i + 1, j, k, qp);
+      slope_unsplit_hydro_3d(qc, qp, qm, dq);
+      set_state(Slopes_x, i, j, k, dq);
+
+      // modify slopes for normal magnetic field component using face centered value
+      Slopes_x(i, j, k, IA) = Udata(i + 1, j, k, IA) - Udata(i, j, k, IA);
+
+      //
+      // slopes along Y
+      //
+      get_state(Qdata, i, j - 1, k, qm);
+      get_state(Qdata, i, j + 1, k, qp);
+      slope_unsplit_hydro_3d(qc, qp, qm, dq);
+      set_state(Slopes_y, i, j, k, dq);
+
+      // modify slopes for normal magnetic field component using face centered value
+      Slopes_y(i, j, k, IB) = Udata(i, j + 1, k, IB) - Udata(i, j, k, IB);
+
+      //
+      // slopes along Z
+      //
+      get_state(Qdata, i, j, k - 1, qm);
+      get_state(Qdata, i, j, k + 1, qp);
+      slope_unsplit_hydro_3d(qc, qp, qm, dq);
+      set_state(Slopes_z, i, j, k, dq);
+
+      // modify slopes for normal magnetic field component using face centered value
+      Slopes_z(i, j, k, IC) = Udata(i, j, k + 1, IC) - Udata(i, j, k, IC);
+    }
+  } // end operator ()
+
+  DataArray3d Udata, Qdata;
+  DataArray3d Slopes_x, Slopes_y, Slopes_z;
+
+}; // ComputeSlopesFunctor3D_MHD
+
 /*************************************************/
 /*************************************************/
 /*************************************************/
@@ -166,10 +279,10 @@ class ComputeElecFieldFunctor3D : public MHDBaseFunctor3D
 {
 
 public:
-  ComputeElecFieldFunctor3D(HydroParams      params,
-                            DataArray3d      Udata,
-                            DataArray3d      Qdata,
-                            DataArrayVector3 ElecField)
+  ComputeElecFieldFunctor3D(HydroParams params,
+                            DataArray3d Udata,
+                            DataArray3d Qdata,
+                            DataArray3d ElecField)
     : MHDBaseFunctor3D(params)
     , Udata(Udata)
     , Qdata(Qdata)
@@ -177,7 +290,7 @@ class ComputeElecFieldFunctor3D : public MHDBaseFunctor3D
 
   // static method which does it all: create and execute functor
   static void
-  apply(HydroParams params, DataArray3d Udata, DataArray3d Qdata, DataArrayVector3 ElecField)
+  apply(HydroParams params, DataArray3d Udata, DataArray3d Qdata, DataArray3d ElecField)
   {
     ComputeElecFieldFunctor3D functor(params, Udata, Qdata, ElecField);
     Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
@@ -194,58 +307,134 @@ class ComputeElecFieldFunctor3D : public MHDBaseFunctor3D
     const int ksize = params.ksize;
     // const int ghostWidth = params.ghostWidth;
 
-    if (k > 0 and k < ksize - 1 and j > 0 and j < jsize - 1 and i > 0 and i < isize - 1)
+    // clang-format off
+    if (k > 0 and k < ksize - 1 and
+        j > 0 and j < jsize - 1 and
+        i > 0 and i < isize - 1)
     {
 
       real_t u, v, w, A, B, C;
 
       // compute Ex
       v = ONE_FOURTH_F * (Qdata(i, j - 1, k - 1, IV) + Qdata(i, j - 1, k, IV) +
-                          Qdata(i, j, k - 1, IV) + Qdata(i, j, k, IV));
+                          Qdata(i, j    , k - 1, IV) + Qdata(i, j    , k, IV));
 
       w = ONE_FOURTH_F * (Qdata(i, j - 1, k - 1, IW) + Qdata(i, j - 1, k, IW) +
-                          Qdata(i, j, k - 1, IW) + Qdata(i, j, k, IW));
+                          Qdata(i, j    , k - 1, IW) + Qdata(i, j    , k, IW));
 
-      B = HALF_F * (Udata(i, j, k - 1, IB) + Udata(i, j, k, IB));
-
-      C = HALF_F * (Udata(i, j - 1, k, IC) + Udata(i, j, k, IC));
+      B = HALF_F * (Udata(i, j    , k - 1, IB) + Udata(i, j, k, IB));
+      C = HALF_F * (Udata(i, j - 1, k    , IC) + Udata(i, j, k, IC));
 
       ElecField(i, j, k, IX) = v * C - w * B;
 
       // compute Ey
       u = ONE_FOURTH_F * (Qdata(i - 1, j, k - 1, IU) + Qdata(i - 1, j, k, IU) +
-                          Qdata(i, j, k - 1, IU) + Qdata(i, j, k, IU));
+                          Qdata(i    , j, k - 1, IU) + Qdata(i    , j, k, IU));
 
       w = ONE_FOURTH_F * (Qdata(i - 1, j, k - 1, IW) + Qdata(i - 1, j, k, IW) +
-                          Qdata(i, j, k - 1, IW) + Qdata(i, j, k, IW));
-
-      A = HALF_F * (Udata(i, j, k - 1, IA) + Udata(i, j, k, IA));
+                          Qdata(i    , j, k - 1, IW) + Qdata(i    , j, k, IW));
 
-      C = HALF_F * (Udata(i - 1, j, k, IC) + Udata(i, j, k, IC));
+      A = HALF_F * (Udata(i    , j, k - 1, IA) + Udata(i, j, k, IA));
+      C = HALF_F * (Udata(i - 1, j, k    , IC) + Udata(i, j, k, IC));
 
       ElecField(i, j, k, IY) = w * A - u * C;
 
       // compute Ez
       u = ONE_FOURTH_F * (Qdata(i - 1, j - 1, k, IU) + Qdata(i - 1, j, k, IU) +
-                          Qdata(i, j - 1, k, IU) + Qdata(i, j, k, IU));
+                          Qdata(i    , j - 1, k, IU) + Qdata(i    , j, k, IU));
 
       v = ONE_FOURTH_F * (Qdata(i - 1, j - 1, k, IV) + Qdata(i - 1, j, k, IV) +
-                          Qdata(i, j - 1, k, IV) + Qdata(i, j, k, IV));
+                          Qdata(i    , j - 1, k, IV) + Qdata(i    , j, k, IV));
 
-      A = HALF_F * (Udata(i, j - 1, k, IA) + Udata(i, j, k, IA));
-
-      B = HALF_F * (Udata(i - 1, j, k, IB) + Udata(i, j, k, IB));
+      A = HALF_F * (Udata(i    , j - 1, k, IA) + Udata(i, j, k, IA));
+      B = HALF_F * (Udata(i - 1, j    , k, IB) + Udata(i, j, k, IB));
 
       ElecField(i, j, k, IZ) = u * B - v * A;
     }
+    // clang-format on
+
   } // operator ()
 
-  DataArray3d      Udata;
-  DataArray3d      Qdata;
-  DataArrayVector3 ElecField;
+  DataArray3d Udata;
+  DataArray3d Qdata;
+  DataArray3d ElecField;
 
 }; // ComputeElecFieldFunctor3D
 
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeSourceFaceMagFunctor3D : public MHDBaseFunctor3D
+{
+
+public:
+  ComputeSourceFaceMagFunctor3D(HydroParams params,
+                                DataArray3d ElecField,
+                                DataArray3d sFaceMag,
+                                real_t      dtdx,
+                                real_t      dtdy,
+                                real_t      dtdz)
+    : MHDBaseFunctor3D(params)
+    , ElecField(ElecField)
+    , sFaceMag(sFaceMag)
+    , dtdx(dtdx)
+    , dtdy(dtdy)
+    , dtdz(dtdz){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray3d ElecField,
+        DataArray3d sFaceMag,
+        real_t      dtdx,
+        real_t      dtdy,
+        real_t      dtdz)
+  {
+    ComputeSourceFaceMagFunctor3D functor(params, ElecField, sFaceMag, dtdx, dtdy, dtdz);
+    Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+                           { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
+                         functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j, const int & k) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ksize = params.ksize;
+
+    // clang-format off
+    if (k > 0 and k < ksize-1  and
+        j > 0 and j < jsize-1  and
+        i > 0 and i < isize-1)
+    {
+
+      // sAL0 = +(GLR - GLL) * dtdy * HALF_F - (FLR - FLL) * dtdz * HALF_F
+      sFaceMag(i, j, k, IX) =
+        (ElecField(i, j + 1, k    , IZ) - ElecField(i, j, k, IZ)) * HALF_F * dtdy -
+        (ElecField(i, j    , k + 1, IY) - ElecField(i, j, k, IY)) * HALF_F * dtdz;
+
+      // sBL0 = -(GRL - GLL) * dtdx * HALF_F + (ELR - ELL) * dtdz * HALF_F
+      sFaceMag(i, j, k, IY) =
+        -(ElecField(i + 1, j, k    , IZ) - ElecField(i, j, k, IZ)) * HALF_F * dtdx +
+         (ElecField(i    , j, k + 1, IX) - ElecField(i, j, k, IX)) * HALF_F * dtdz;
+
+      // sCL0 = +(FRL - FLL) * dtdx * HALF_F - (ERL - ELL) * dtdy * HALF_F
+      sFaceMag(i, j, k, IZ) =
+        (ElecField(i + 1, j    , k, IY) - ElecField(i, j, k, IY)) * HALF_F * dtdx -
+        (ElecField(i    , j + 1, k, IX) - ElecField(i, j, k, IX)) * HALF_F * dtdy;
+    }
+    // clang-format on
+
+  } // operator ()
+
+  DataArray3d ElecField;
+  DataArray3d sFaceMag;
+  real_t      dtdx, dtdy, dtdz;
+
+}; // ComputeSourceFaceMagFunctor3D
+
 /*************************************************/
 /*************************************************/
 /*************************************************/
@@ -287,7 +476,11 @@ class ComputeMagSlopesFunctor3D : public MHDBaseFunctor3D
     const int ksize = params.ksize;
     // const int ghostWidth = params.ghostWidth;
 
-    if (k > 0 and k < ksize - 1 and j > 0 and j < jsize - 1 and i > 0 and i < isize - 1)
+    // clang-format off
+    if (k > 0 and k < ksize - 1 and
+        j > 0 and j < jsize - 1 and
+        i > 0 and i < isize - 1)
+    // clang-format on
     {
 
       real_t bfSlopes[15];
@@ -298,23 +491,26 @@ class ComputeMagSlopesFunctor3D : public MHDBaseFunctor3D
       real_t(&dbfZ)[3] = dbfSlopes[IZ];
 
       // get magnetic slopes dbf
-      bfSlopes[0] = Udata(i, j, k, IA);
-      bfSlopes[1] = Udata(i, j + 1, k, IA);
-      bfSlopes[2] = Udata(i, j - 1, k, IA);
-      bfSlopes[3] = Udata(i, j, k + 1, IA);
-      bfSlopes[4] = Udata(i, j, k - 1, IA);
-
-      bfSlopes[5] = Udata(i, j, k, IB);
-      bfSlopes[6] = Udata(i + 1, j, k, IB);
-      bfSlopes[7] = Udata(i - 1, j, k, IB);
-      bfSlopes[8] = Udata(i, j, k + 1, IB);
-      bfSlopes[9] = Udata(i, j, k - 1, IB);
-
-      bfSlopes[10] = Udata(i, j, k, IC);
-      bfSlopes[11] = Udata(i + 1, j, k, IC);
-      bfSlopes[12] = Udata(i - 1, j, k, IC);
-      bfSlopes[13] = Udata(i, j + 1, k, IC);
-      bfSlopes[14] = Udata(i, j - 1, k, IC);
+
+      // clang-format off
+      bfSlopes[0] =  Udata(i    , j    , k    , IA);
+      bfSlopes[1] =  Udata(i    , j + 1, k    , IA);
+      bfSlopes[2] =  Udata(i    , j - 1, k    , IA);
+      bfSlopes[3] =  Udata(i    , j    , k + 1, IA);
+      bfSlopes[4] =  Udata(i    , j    , k - 1, IA);
+
+      bfSlopes[5] =  Udata(i    , j    , k    , IB);
+      bfSlopes[6] =  Udata(i + 1, j    , k    , IB);
+      bfSlopes[7] =  Udata(i - 1, j    , k    , IB);
+      bfSlopes[8] =  Udata(i    , j    , k + 1, IB);
+      bfSlopes[9] =  Udata(i    , j    , k - 1, IB);
+
+      bfSlopes[10] = Udata(i    , j    , k    , IC);
+      bfSlopes[11] = Udata(i + 1, j    , k    , IC);
+      bfSlopes[12] = Udata(i - 1, j    , k    , IC);
+      bfSlopes[13] = Udata(i    , j + 1, k    , IC);
+      bfSlopes[14] = Udata(i    , j - 1, k    , IC);
+      // clang-format on
 
       // compute magnetic slopes
       slope_unsplit_mhd_3d(bfSlopes, dbfSlopes);
@@ -413,7 +609,7 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D
         DataArrayVector3 DeltaA,
         DataArrayVector3 DeltaB,
         DataArrayVector3 DeltaC,
-        DataArrayVector3 ElecField,
+        DataArray3d      ElecField,
         DataArray3d      Qm_x,
         DataArray3d      Qm_y,
         DataArray3d      Qm_z,
@@ -478,8 +674,11 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D
     const int ksize = params.ksize;
     const int ghostWidth = params.ghostWidth;
 
-    if (k >= ghostWidth - 2 and k < ksize - ghostWidth + 1 and j >= ghostWidth - 2 and
-        j < jsize - ghostWidth + 1 and i >= ghostWidth - 2 and i < isize - ghostWidth + 1)
+    // clang-format off
+    if (k >= ghostWidth - 2 and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth - 2 and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth - 2 and i < isize - ghostWidth + 1)
+    // clang-format on
     {
 
       MHDState q;
@@ -501,25 +700,27 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D
 
       real_t xPos = params.xmin + params.dx / 2 + (i - ghostWidth) * params.dx;
 
+      // clang-format off
+
       // get primitive variables state vector
-      get_state(Qdata, i, j, k, q);
-      get_state(Qdata, i + 1, j, k, qPlusX);
-      get_state(Qdata, i - 1, j, k, qMinusX);
-      get_state(Qdata, i, j + 1, k, qPlusY);
-      get_state(Qdata, i, j - 1, k, qMinusY);
-      get_state(Qdata, i, j, k + 1, qPlusZ);
-      get_state(Qdata, i, j, k - 1, qMinusZ);
+      get_state(Qdata, i    , j    , k    , q);
+      get_state(Qdata, i + 1, j    , k    , qPlusX);
+      get_state(Qdata, i - 1, j    , k    , qMinusX);
+      get_state(Qdata, i    , j + 1, k    , qPlusY);
+      get_state(Qdata, i    , j - 1, k    , qMinusY);
+      get_state(Qdata, i    , j    , k + 1, qPlusZ);
+      get_state(Qdata, i    , j    , k - 1, qMinusZ);
 
       // get hydro slopes dq
       slope_unsplit_hydro_3d(q, qPlusX, qMinusX, qPlusY, qMinusY, qPlusZ, qMinusZ, dq);
 
       // get face-centered magnetic components
-      bfNb[0] = Udata(i, j, k, IA);
-      bfNb[1] = Udata(i + 1, j, k, IA);
-      bfNb[2] = Udata(i, j, k, IB);
-      bfNb[3] = Udata(i, j + 1, k, IB);
-      bfNb[4] = Udata(i, j, k, IC);
-      bfNb[5] = Udata(i, j, k + 1, IC);
+      bfNb[0] = Udata(i    , j    , k    , IA);
+      bfNb[1] = Udata(i + 1, j    , k    , IA);
+      bfNb[2] = Udata(i    , j    , k    , IB);
+      bfNb[3] = Udata(i    , j + 1, k    , IB);
+      bfNb[4] = Udata(i    , j    , k    , IC);
+      bfNb[5] = Udata(i    , j    , k + 1, IC);
 
       // get dbf (transverse magnetic slopes)
       dbf[0] = DeltaA(i, j, k, IY);
@@ -529,28 +730,30 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D
       dbf[4] = DeltaC(i, j, k, IX);
       dbf[5] = DeltaC(i, j, k, IY);
 
-      dbf[6] = DeltaA(i + 1, j, k, IY);
-      dbf[7] = DeltaA(i + 1, j, k, IZ);
-      dbf[8] = DeltaB(i, j + 1, k, IX);
-      dbf[9] = DeltaB(i, j + 1, k, IZ);
-      dbf[10] = DeltaC(i, j, k + 1, IX);
-      dbf[11] = DeltaC(i, j, k + 1, IY);
+      dbf[6]  = DeltaA(i + 1, j    , k    , IY);
+      dbf[7]  = DeltaA(i + 1, j    , k    , IZ);
+      dbf[8]  = DeltaB(i    , j + 1, k    , IX);
+      dbf[9]  = DeltaB(i    , j + 1, k    , IZ);
+      dbf[10] = DeltaC(i    , j    , k + 1, IX);
+      dbf[11] = DeltaC(i    , j    , k + 1, IY);
 
       // get electric field components
-      Ex[0][0] = ElecField(i, j, k, IX);
-      Ex[0][1] = ElecField(i, j, k + 1, IX);
-      Ex[1][0] = ElecField(i, j + 1, k, IX);
-      Ex[1][1] = ElecField(i, j + 1, k + 1, IX);
+      Ex[0][0] = ElecField(i    , j    , k    , IX); // ELL
+      Ex[0][1] = ElecField(i    , j    , k + 1, IX); // ELR
+      Ex[1][0] = ElecField(i    , j + 1, k    , IX); // ERL
+      Ex[1][1] = ElecField(i    , j + 1, k + 1, IX); // ERR
+
+      Ey[0][0] = ElecField(i    , j    , k    , IY); // FLL
+      Ey[0][1] = ElecField(i    , j    , k + 1, IY); // FLR
+      Ey[1][0] = ElecField(i + 1, j    , k    , IY); // FRL
+      Ey[1][1] = ElecField(i + 1, j    , k + 1, IY); // FRR
 
-      Ey[0][0] = ElecField(i, j, k, IY);
-      Ey[0][1] = ElecField(i, j, k + 1, IY);
-      Ey[1][0] = ElecField(i + 1, j, k, IY);
-      Ey[1][1] = ElecField(i + 1, j, k + 1, IY);
+      Ez[0][0] = ElecField(i    , j    , k    , IZ); // GLL
+      Ez[0][1] = ElecField(i    , j + 1, k    , IZ); // GLR
+      Ez[1][0] = ElecField(i + 1, j    , k    , IZ); // GRL
+      Ez[1][1] = ElecField(i + 1, j + 1, k    , IZ); // GRR
 
-      Ez[0][0] = ElecField(i, j, k, IZ);
-      Ez[0][1] = ElecField(i, j + 1, k, IZ);
-      Ez[1][0] = ElecField(i + 1, j, k, IZ);
-      Ez[1][1] = ElecField(i + 1, j + 1, k, IZ);
+      // clang-format on
 
       // compute qm, qp and qEdge
       trace_unsplit_mhd_3d_simpler(
@@ -641,7 +844,8 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D
   } // operator ()
 
   DataArray3d      Udata, Qdata;
-  DataArrayVector3 DeltaA, DeltaB, DeltaC, ElecField;
+  DataArrayVector3 DeltaA, DeltaB, DeltaC;
+  DataArray3d      ElecField;
   DataArray3d      Qm_x, Qm_y, Qm_z;
   DataArray3d      Qp_x, Qp_y, Qp_z;
   DataArray3d      QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB;
@@ -654,33 +858,31 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D
 /*************************************************/
 /*************************************************/
 /*************************************************/
-class ComputeFluxesAndStoreFunctor3D_MHD : public MHDBaseFunctor3D
+/**
+ * Compute cell-centered primitive data at t_{n+1/2} (half time step in
+ * Muscl-Hancock).
+ */
+class ComputeUpdatedPrimVarFunctor3D_MHD : public MHDBaseFunctor3D
 {
 
 public:
-  ComputeFluxesAndStoreFunctor3D_MHD(HydroParams params,
-                                     DataArray3d Qm_x,
-                                     DataArray3d Qm_y,
-                                     DataArray3d Qm_z,
-                                     DataArray3d Qp_x,
-                                     DataArray3d Qp_y,
-                                     DataArray3d Qp_z,
-                                     DataArray3d Fluxes_x,
-                                     DataArray3d Fluxes_y,
-                                     DataArray3d Fluxes_z,
+  ComputeUpdatedPrimVarFunctor3D_MHD(HydroParams params,
+                                     DataArray3d Udata,
+                                     DataArray3d Qdata,
+                                     DataArray3d Slopes_x,
+                                     DataArray3d Slopes_y,
+                                     DataArray3d Slopes_z,
+                                     DataArray3d Qdata2,
                                      real_t      dtdx,
                                      real_t      dtdy,
                                      real_t      dtdz)
     : MHDBaseFunctor3D(params)
-    , Qm_x(Qm_x)
-    , Qm_y(Qm_y)
-    , Qm_z(Qm_z)
-    , Qp_x(Qp_x)
-    , Qp_y(Qp_y)
-    , Qp_z(Qp_z)
-    , Fluxes_x(Fluxes_x)
-    , Fluxes_y(Fluxes_y)
-    , Fluxes_z(Fluxes_z)
+    , Udata(Udata)
+    , Qdata(Qdata)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y)
+    , Slopes_z(Slopes_z)
+    , Qdata2(Qdata2)
     , dtdx(dtdx)
     , dtdy(dtdy)
     , dtdz(dtdz){};
@@ -688,22 +890,20 @@ class ComputeFluxesAndStoreFunctor3D_MHD : public MHDBaseFunctor3D
   // static method which does it all: create and execute functor
   static void
   apply(HydroParams params,
-        DataArray3d Qm_x,
-        DataArray3d Qm_y,
-        DataArray3d Qm_z,
-        DataArray3d Qp_x,
-        DataArray3d Qp_y,
-        DataArray3d Qp_z,
-        DataArray3d Flux_x,
-        DataArray3d Flux_y,
-        DataArray3d Flux_z,
+        DataArray3d Udata,
+        DataArray3d Qdata,
+        DataArray3d Slopes_x,
+        DataArray3d Slopes_y,
+        DataArray3d Slopes_z,
+        DataArray3d Qdata2,
         real_t      dtdx,
         real_t      dtdy,
         real_t      dtdz)
   {
-    ComputeFluxesAndStoreFunctor3D_MHD functor(
-      params, Qm_x, Qm_y, Qm_z, Qp_x, Qp_y, Qp_z, Flux_x, Flux_y, Flux_z, dtdx, dtdy, dtdz);
-    Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+    ComputeUpdatedPrimVarFunctor3D_MHD functor(
+      params, Udata, Qdata, Slopes_x, Slopes_y, Slopes_z, Qdata2, dtdx, dtdy, dtdz);
+    Kokkos::parallel_for("ComputeUpdatedPrimVarFunctor3D_MHD",
+                         Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
                            { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
                          functor);
   }
@@ -712,140 +912,1255 @@ class ComputeFluxesAndStoreFunctor3D_MHD : public MHDBaseFunctor3D
   void
   operator()(const int & i, const int & j, const int & k) const
   {
-    const int isize = params.isize;
-    const int jsize = params.jsize;
-    const int ksize = params.ksize;
-    const int ghostWidth = params.ghostWidth;
+    const int    isize = params.isize;
+    const int    jsize = params.jsize;
+    const int    ksize = params.ksize;
+    const int    ghostWidth = params.ghostWidth;
+    const real_t gamma = params.settings.gamma0;
 
-    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and j >= ghostWidth and
-        j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format off
+    if (k >= ghostWidth - 2 and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth - 2 and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth - 2 and i < isize - ghostWidth + 1)
+    // clang-format on
     {
+      // get primitive variable in current cell
+      MHDState q;
+      get_state(Qdata, i, j, k, q);
 
-      MHDState qleft, qright;
-      MHDState flux;
-
-      //
-      // Solve Riemann problem at X-interfaces and compute X-fluxes
-      //
-      get_state(Qm_x, i - 1, j, k, qleft);
-      get_state(Qp_x, i, j, k, qright);
-
-      // compute hydro flux along X
-      riemann_mhd(qleft, qright, flux, params);
-
-      // store fluxes
-      set_state(Fluxes_x, i, j, k, flux);
-
-      //
-      // Solve Riemann problem at Y-interfaces and compute Y-fluxes
-      //
-      get_state(Qm_y, i, j - 1, k, qleft);
-      swapValues(&(qleft[IU]), &(qleft[IV]));
-      swapValues(&(qleft[IBX]), &(qleft[IBY]));
-
-      get_state(Qp_y, i, j, k, qright);
-      swapValues(&(qright[IU]), &(qright[IV]));
-      swapValues(&(qright[IBX]), &(qright[IBY]));
-
-      // compute hydro flux along Y
-      riemann_mhd(qleft, qright, flux, params);
-
-      // store fluxes
-      set_state(Fluxes_y, i, j, k, flux);
-
-      //
-      // Solve Riemann problem at Z-interfaces and compute Z-fluxes
-      //
-      get_state(Qm_z, i, j, k - 1, qleft);
-      swapValues(&(qleft[IU]), &(qleft[IW]));
-      swapValues(&(qleft[IBX]), &(qleft[IBZ]));
+      MHDState dq[3];
 
-      get_state(Qp_z, i, j, k, qright);
-      swapValues(&(qright[IU]), &(qright[IW]));
-      swapValues(&(qright[IBX]), &(qright[IBZ]));
+      // retrieve hydro slopes along X
+      get_state(Slopes_x, i, j, k, dq[IX]);
+
+      // retrieve hydro slopes along Y
+      get_state(Slopes_y, i, j, k, dq[IY]);
+
+      // retrieve hydro slopes along Z
+      get_state(Slopes_z, i, j, k, dq[IZ]);
+
+      // Cell centered values
+      auto const & r = q[ID];
+      auto const & p = q[IP];
+      auto const & u = q[IU];
+      auto const & v = q[IV];
+      auto const & w = q[IW];
+      auto const & A = q[IA];
+      auto const & B = q[IB];
+      auto const & C = q[IC];
+
+      // Cell centered TVD slopes in X direction
+      auto const & drx = dq[IX][ID];
+      auto const & dpx = dq[IX][IP];
+      auto const & dux = dq[IX][IU];
+      auto const & dvx = dq[IX][IV];
+      auto const & dwx = dq[IX][IW];
+      auto const & dCx = dq[IX][IC];
+      auto const & dBx = dq[IX][IB];
+
+      // Cell centered TVD slopes in Y direction
+      auto const & dry = dq[IY][ID];
+      auto const & dpy = dq[IY][IP];
+      auto const & duy = dq[IY][IU];
+      auto const & dvy = dq[IY][IV];
+      auto const & dwy = dq[IY][IW];
+      auto const & dCy = dq[IY][IC];
+      auto const & dAy = dq[IY][IA];
+
+      // Cell centered TVD slopes in Z direction
+      auto const & drz = dq[IZ][ID];
+      auto const & dpz = dq[IZ][IP];
+      auto const & duz = dq[IZ][IU];
+      auto const & dvz = dq[IZ][IV];
+      auto const & dwz = dq[IZ][IW];
+      auto const & dAz = dq[IZ][IA];
+      auto const & dBz = dq[IZ][IB];
+
+      auto const   db = compute_normal_mag_field_slopes(Udata, i, j, k);
+      auto const & dAx = db[IX];
+      auto const & dBy = db[IY];
+      auto const & dCz = db[IZ];
+
+      real_t sr0, su0, sv0, sw0, sp0, sA0, sB0, sC0;
+      {
 
-      // compute hydro flux along Z
-      riemann_mhd(qleft, qright, flux, params);
+        sr0 =
+          (-u * drx - dux * r) * dtdx + (-v * dry - dvy * r) * dtdy + (-w * drz - dwz * r) * dtdz;
+        su0 = (-u * dux - (dpx + B * dBx + C * dCx) / r) * dtdx + (-v * duy + B * dAy / r) * dtdy +
+              (-w * duz + C * dAz / r) * dtdz;
+        sv0 = (-u * dvx + A * dBx / r) * dtdx + (-v * dvy - (dpy + A * dAy + C * dCy) / r) * dtdy +
+              (-w * dvz + C * dBz / r) * dtdz;
+        sw0 = (-u * dwx + A * dCx / r) * dtdx + (-v * dwy + B * dCy / r) * dtdy +
+              (-w * dwz - (dpz + A * dAz + B * dBz) / r) * dtdz;
+        sp0 = (-u * dpx - dux * gamma * p) * dtdx + (-v * dpy - dvy * gamma * p) * dtdy +
+              (-w * dpz - dwz * gamma * p) * dtdz;
+        sA0 = (u * dBy + B * duy - v * dAy - A * dvy) * dtdy +
+              (u * dCz + C * duz - w * dAz - A * dwz) * dtdz;
+        sB0 = (v * dAx + A * dvx - u * dBx - B * dux) * dtdx +
+              (v * dCz + C * dvz - w * dBz - B * dwz) * dtdz;
+        sC0 = (w * dAx + A * dwx - u * dCx - C * dux) * dtdx +
+              (w * dBy + B * dwy - v * dCy - C * dvy) * dtdy;
+      }
 
-      // store fluxes
-      set_state(Fluxes_z, i, j, k, flux);
+      // Update in time the primitive variables (half time step)
+      Qdata2(i, j, k, ID) = r + 0.5 * sr0;
+      Qdata2(i, j, k, IU) = u + 0.5 * su0;
+      Qdata2(i, j, k, IV) = v + 0.5 * sv0;
+      Qdata2(i, j, k, IW) = w + 0.5 * sw0;
+      Qdata2(i, j, k, IP) = p + 0.5 * sp0;
+      Qdata2(i, j, k, IA) = A + 0.5 * sA0;
+      Qdata2(i, j, k, IB) = B + 0.5 * sB0;
+      Qdata2(i, j, k, IC) = C + 0.5 * sC0;
     }
-  }
+  } // operator ()
 
-  DataArray3d Qm_x, Qm_y, Qm_z;
-  DataArray3d Qp_x, Qp_y, Qp_z;
-  DataArray3d Fluxes_x, Fluxes_y, Fluxes_z;
+  DataArray3d Udata, Qdata; // input
+  DataArray3d Slopes_x, Slopes_y, Slopes_z; // input
+  DataArray3d Qdata2;       // output
   real_t      dtdx, dtdy, dtdz;
 
-}; // ComputeFluxesAndStoreFunctor3D_MHD
+}; // ComputeUpdatedPrimvarFunctor3D_MHD
 
 /*************************************************/
 /*************************************************/
 /*************************************************/
-class ComputeEmfAndStoreFunctor3D : public MHDBaseFunctor3D
+/**
+ * Reconstruct hydro state at cell face center, solve Riemann problems and update.
+ *
+ * This is done in a direction by direction manner.
+ */
+template <Direction dir>
+class ComputeFluxAndUpdateAlongDirFunctor3D_MHD : public MHDBaseFunctor3D
 {
 
 public:
-  ComputeEmfAndStoreFunctor3D(HydroParams      params,
-                              DataArray3d      QEdge_RT,
-                              DataArray3d      QEdge_RB,
-                              DataArray3d      QEdge_LT,
-                              DataArray3d      QEdge_LB,
-                              DataArray3d      QEdge_RT2,
-                              DataArray3d      QEdge_RB2,
-                              DataArray3d      QEdge_LT2,
-                              DataArray3d      QEdge_LB2,
-                              DataArray3d      QEdge_RT3,
-                              DataArray3d      QEdge_RB3,
-                              DataArray3d      QEdge_LT3,
-                              DataArray3d      QEdge_LB3,
-                              DataArrayVector3 Emf,
-                              real_t           dtdx,
-                              real_t           dtdy,
-                              real_t           dtdz)
+  //!
+  //! \param[in] Udata_in is conservative variables at t_n
+  //! \param[in] Udata_out is conservative variables at t_{n+1}
+  //! \param[in] Qdata2 is primitive variables array at t_{n+1/2}
+  //!
+  ComputeFluxAndUpdateAlongDirFunctor3D_MHD(HydroParams params,
+                                            DataArray3d Udata_in,
+                                            DataArray3d Udata_out,
+                                            DataArray3d Qdata2,
+                                            DataArray3d Slopes_x,
+                                            DataArray3d Slopes_y,
+                                            DataArray3d Slopes_z,
+                                            DataArray3d sFaceMag,
+                                            real_t      dtdx,
+                                            real_t      dtdy,
+                                            real_t      dtdz)
     : MHDBaseFunctor3D(params)
-    , QEdge_RT(QEdge_RT)
-    , QEdge_RB(QEdge_RB)
-    , QEdge_LT(QEdge_LT)
-    , QEdge_LB(QEdge_LB)
-    , QEdge_RT2(QEdge_RT2)
-    , QEdge_RB2(QEdge_RB2)
-    , QEdge_LT2(QEdge_LT2)
-    , QEdge_LB2(QEdge_LB2)
-    , QEdge_RT3(QEdge_RT3)
-    , QEdge_RB3(QEdge_RB3)
-    , QEdge_LT3(QEdge_LT3)
-    , QEdge_LB3(QEdge_LB3)
-    , Emf(Emf)
+    , Udata_in(Udata_in)
+    , Udata_out(Udata_out)
+    , Qdata2(Qdata2)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y)
+    , Slopes_z(Slopes_z)
+    , sFaceMag(sFaceMag)
     , dtdx(dtdx)
     , dtdy(dtdy)
     , dtdz(dtdz){};
 
   // static method which does it all: create and execute functor
   static void
-  apply(HydroParams      params,
-        DataArray3d      QEdge_RT,
-        DataArray3d      QEdge_RB,
-        DataArray3d      QEdge_LT,
-        DataArray3d      QEdge_LB,
-        DataArray3d      QEdge_RT2,
-        DataArray3d      QEdge_RB2,
-        DataArray3d      QEdge_LT2,
-        DataArray3d      QEdge_LB2,
-        DataArray3d      QEdge_RT3,
-        DataArray3d      QEdge_RB3,
-        DataArray3d      QEdge_LT3,
-        DataArray3d      QEdge_LB3,
-        DataArrayVector3 Emf,
-        real_t           dtdx,
-        real_t           dtdy,
-        real_t           dtdz)
+  apply(HydroParams params,
+        DataArray3d Udata_in,
+        DataArray3d Udata_out,
+        DataArray3d Qdata2,
+        DataArray3d Slopes_x,
+        DataArray3d Slopes_y,
+        DataArray3d Slopes_z,
+        DataArray3d sFaceMag,
+        real_t      dtdx,
+        real_t      dtdy,
+        real_t      dtdz)
   {
-    ComputeEmfAndStoreFunctor3D functor(params,
-                                        QEdge_RT,
-                                        QEdge_RB,
-                                        QEdge_LT,
-                                        QEdge_LB,
+    ComputeFluxAndUpdateAlongDirFunctor3D_MHD<dir> functor(params,
+                                                           Udata_in,
+                                                           Udata_out,
+                                                           Qdata2,
+                                                           Slopes_x,
+                                                           Slopes_y,
+                                                           Slopes_z,
+                                                           sFaceMag,
+                                                           dtdx,
+                                                           dtdy,
+                                                           dtdz);
+    Kokkos::parallel_for("ComputeFluxAndUpdateAlongDirFunctor3D_MHD",
+                         Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+                           { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
+                         functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j, const int & k) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ksize = params.ksize;
+    const int ghostWidth = params.ghostWidth;
+
+    // const real_t gamma = params.settings.gamma0;
+    const real_t smallR = params.settings.smallr;
+    const real_t smallp = params.settings.smallp;
+
+    constexpr auto delta_i = dir == DIR_X ? 1 : 0;
+    constexpr auto delta_j = dir == DIR_Y ? 1 : 0;
+    constexpr auto delta_k = dir == DIR_Z ? 1 : 0;
+
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+
+      // left and right reconstructed state (input for Riemann solver)
+      MHDState qR, qL;
+
+      // slopes in left and right cells
+      MHDState dq, dqN;
+
+      //
+      // Right state at left interface (current cell)
+      //
+
+      // load hydro slopes along dir
+      if constexpr (dir == DIR_X)
+      {
+        get_state(Slopes_x, i, j, k, dq);
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        get_state(Slopes_y, i, j, k, dq);
+      }
+      else if constexpr (dir == DIR_Z)
+      {
+        get_state(Slopes_z, i, j, k, dq);
+      }
+
+      // get primitive variable in current cell at t_{n+1/2}
+      MHDState q2;
+      get_state(Qdata2, i, j, k, q2);
+
+      qR[ID] = q2[ID] - 0.5 * dq[ID];
+      qR[IU] = q2[IU] - 0.5 * dq[IU];
+      qR[IV] = q2[IV] - 0.5 * dq[IV];
+      qR[IW] = q2[IW] - 0.5 * dq[IW];
+      qR[IP] = q2[IP] - 0.5 * dq[IP];
+      qR[ID] = fmax(smallR, qR[ID]);
+      qR[IP] = fmax(smallp * qR[ID], qR[IP]);
+      // clang-format off
+      if constexpr (dir == DIR_X)
+      {
+        const real_t AL = Udata_in(i, j, k, IA) + sFaceMag(i, j, k, IX);
+        qR[IA] = AL;
+        qR[IB] = q2[IB] - 0.5 * dq[IB];
+        qR[IC] = q2[IC] - 0.5 * dq[IC];
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        const real_t BL = Udata_in(i, j, k, IB) + sFaceMag(i, j, k, IY);
+        qR[IA] = q2[IA] - 0.5 * dq[IA];
+        qR[IB] = BL;
+        qR[IC] = q2[IC] - 0.5 * dq[IC];
+      }
+      else if constexpr (dir == DIR_Z)
+      {
+        const real_t CL = Udata_in(i, j, k, IC) + sFaceMag(i, j, k, IZ);
+        qR[IA] = q2[IA] - 0.5 * dq[IA];
+        qR[IB] = q2[IB] - 0.5 * dq[IB];
+        qR[IC] = CL;
+      }
+      // clang-format on
+
+      //
+      // Left state at right interface (neighbor cell)
+      //
+
+      // load hydro slopes along dir
+      if constexpr (dir == DIR_X)
+      {
+        get_state(Slopes_x, i - delta_i, j - delta_j, k - delta_k, dqN);
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        get_state(Slopes_y, i - delta_i, j - delta_j, k - delta_k, dqN);
+      }
+      else if constexpr (dir == DIR_Z)
+      {
+        get_state(Slopes_z, i - delta_i, j - delta_j, k - delta_k, dqN);
+      }
+
+      // get primitive variable in neighbor cell at t_{n+1/2}
+      MHDState q2N;
+      get_state(Qdata2, i - delta_i, j - delta_j, k - delta_k, q2N);
+
+      qL[ID] = q2N[ID] + 0.5 * dqN[ID];
+      qL[IU] = q2N[IU] + 0.5 * dqN[IU];
+      qL[IV] = q2N[IV] + 0.5 * dqN[IV];
+      qL[IW] = q2N[IW] + 0.5 * dqN[IW];
+      qL[IP] = q2N[IP] + 0.5 * dqN[IP];
+      qL[ID] = fmax(smallR, qL[ID]);
+      qL[IP] = fmax(smallp * qL[ID], qL[IP]);
+      if constexpr (dir == DIR_X)
+      {
+        qL[IA] = qR[IA];
+        qL[IB] = q2N[IB] + 0.5 * dqN[IB];
+        qL[IC] = q2N[IC] + 0.5 * dqN[IC];
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        qL[IA] = q2N[IA] + 0.5 * dqN[IA];
+        qL[IB] = qR[IB];
+        qL[IC] = q2N[IC] + 0.5 * dqN[IC];
+      }
+      else if constexpr (dir == DIR_Z)
+      {
+        qL[IA] = q2N[IA] + 0.5 * dqN[IA];
+        qL[IB] = q2N[IB] + 0.5 * dqN[IB];
+        qL[IC] = qR[IC];
+      }
+
+      // now we are ready for computing hydro flux
+      MHDState flux;
+
+      if constexpr (dir == DIR_Y)
+      {
+        swapValues(&(qL[IU]), &(qL[IV]));
+        swapValues(&(qL[IA]), &(qL[IB]));
+        swapValues(&(qR[IU]), &(qR[IV]));
+        swapValues(&(qR[IA]), &(qR[IB]));
+      }
+      else if constexpr (dir == DIR_Z)
+      {
+        swapValues(&(qL[IU]), &(qL[IW]));
+        swapValues(&(qL[IA]), &(qL[IC]));
+        swapValues(&(qR[IU]), &(qR[IW]));
+        swapValues(&(qR[IA]), &(qR[IC]));
+      }
+      riemann_mhd(qL, qR, flux, params);
+      if constexpr (dir == DIR_Y)
+      {
+        swapValues(&(flux[IU]), &(flux[IV]));
+        swapValues(&(flux[IA]), &(flux[IB]));
+      }
+      else if constexpr (dir == DIR_Z)
+      {
+        swapValues(&(flux[IU]), &(flux[IW]));
+        swapValues(&(flux[IA]), &(flux[IC]));
+      }
+
+      if constexpr (dir == DIR_X)
+      {
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i, j, k, ID), flux[ID] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IP), flux[IP] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IU), flux[IU] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IV), flux[IV] * dtdx);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IW), flux[IW] * dtdx);
+        }
+
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i > ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, k, ID), flux[ID] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, k, IP), flux[IP] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, k, IU), flux[IU] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, k, IV), flux[IV] * dtdx);
+          Kokkos::atomic_sub(&Udata_out(i - 1, j, k, IW), flux[IW] * dtdx);
+        }
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i, j, k, ID), flux[ID] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IP), flux[IP] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IU), flux[IU] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IV), flux[IV] * dtdy);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IW), flux[IW] * dtdy);
+        }
+        if (k < ksize - ghostWidth and j > ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, k, ID), flux[ID] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, k, IP), flux[IP] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, k, IU), flux[IU] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, k, IV), flux[IV] * dtdy);
+          Kokkos::atomic_sub(&Udata_out(i, j - 1, k, IW), flux[IW] * dtdy);
+        }
+      }
+      else if constexpr (dir == DIR_Z)
+      {
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i, j, k, ID), flux[ID] * dtdz);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IP), flux[IP] * dtdz);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IU), flux[IU] * dtdz);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IV), flux[IV] * dtdz);
+          Kokkos::atomic_add(&Udata_out(i, j, k, IW), flux[IW] * dtdz);
+        }
+        if (k > ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i, j, k - 1, ID), flux[ID] * dtdz);
+          Kokkos::atomic_sub(&Udata_out(i, j, k - 1, IP), flux[IP] * dtdz);
+          Kokkos::atomic_sub(&Udata_out(i, j, k - 1, IU), flux[IU] * dtdz);
+          Kokkos::atomic_sub(&Udata_out(i, j, k - 1, IV), flux[IV] * dtdz);
+          Kokkos::atomic_sub(&Udata_out(i, j, k - 1, IW), flux[IW] * dtdz);
+        }
+      }
+    }
+  } // operator ()
+
+  DataArray3d Udata_in, Udata_out;
+  DataArray3d Qdata2;
+  DataArray3d Slopes_x, Slopes_y, Slopes_z;
+  DataArray3d sFaceMag;
+  real_t      dtdx, dtdy, dtdz;
+
+}; // ComputeFluxAndUpdateAlongDirFunctor3D_MHD
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+/**
+ * Reconstruct hydrodynamics variables and magnetic field at edge center, compute Emf and update.
+ */
+template <Direction dir>
+class ReconstructEdgeComputeEmfAndUpdateFunctor3D : public MHDBaseFunctor3D
+{
+
+public:
+  //!
+  //! \param[in] Udata_in is conservative variables at t_n
+  //! \param[in] Udata_out is conservative variables at t_{n+1}
+  //! \param[in] Qdata2 is primitive variables array at t_{n+1/2}
+  //!
+  ReconstructEdgeComputeEmfAndUpdateFunctor3D(HydroParams params,
+                                              DataArray3d Udata_in,
+                                              DataArray3d Udata_out,
+                                              DataArray3d Qdata2,
+                                              DataArray3d Slopes_x,
+                                              DataArray3d Slopes_y,
+                                              DataArray3d Slopes_z,
+                                              DataArray3d sFaceMag,
+                                              real_t      dtdx,
+                                              real_t      dtdy,
+                                              real_t      dtdz)
+    : MHDBaseFunctor3D(params)
+    , Udata_in(Udata_in)
+    , Udata_out(Udata_out)
+    , Qdata2(Qdata2)
+    , Slopes_x(Slopes_x)
+    , Slopes_y(Slopes_y)
+    , Slopes_z(Slopes_z)
+    , sFaceMag(sFaceMag)
+    , dtdx(dtdx)
+    , dtdy(dtdy)
+    , dtdz(dtdz){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray3d Udata_in,
+        DataArray3d Udata_out,
+        DataArray3d Qdata2,
+        DataArray3d Slopes_x,
+        DataArray3d Slopes_y,
+        DataArray3d Slopes_z,
+        DataArray3d sFaceMag,
+        real_t      dtdx,
+        real_t      dtdy,
+        real_t      dtdz)
+  {
+    ReconstructEdgeComputeEmfAndUpdateFunctor3D functor(params,
+                                                        Udata_in,
+                                                        Udata_out,
+                                                        Qdata2,
+                                                        Slopes_x,
+                                                        Slopes_y,
+                                                        Slopes_z,
+                                                        sFaceMag,
+                                                        dtdx,
+                                                        dtdy,
+                                                        dtdz);
+    Kokkos::parallel_for("ReconstructEdgeComputeEmfAndUpdateFunctor3D",
+                         Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+                           { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
+                         functor);
+  }
+
+  /**
+   * Given a cell-center MHD state and limited slopes, reconstruct a MHD state at edge.
+   *
+   * \param[out] qEdge is the reconstructed MHD state at edge
+   * \param[in] i0 x-coordinate (cell center) of the cell where the reconstruction takes place
+   * \param[in] j0 y-coordinate (cell center) of the cell where the reconstruction takes place
+   * \param[in] k0 z-coordinate (cell center) of the cell where the reconstruction takes place
+   * \param[in] edge_loc is the type of edge reconstruction (from cell center)
+   * \param[in] dir0 one of the edge transverse direction
+   * \param[in] dir1 the other edge transverse direction
+   */
+  template <int dir0, int dir1>
+  KOKKOS_INLINE_FUNCTION auto
+  reconstruct_state_at_edge(MHDState & qEdge, int ic, int jc, int kc, MHDEdgeLocation edge_loc)
+    const
+  {
+
+    // const real_t gamma = params.settings.gamma0;
+    const real_t smallR = params.settings.smallr;
+    const real_t smallp = params.settings.smallp;
+
+    int                   sign_dq0 = 1;
+    int                   sign_dq1 = 1;
+    Kokkos::Array<int, 3> ijk0{ 0, 0, 0 };
+    Kokkos::Array<int, 3> ijk1{ 0, 0, 0 };
+    int                   sign_b0 = 1;
+    int                   sign_b1 = 1;
+
+    if (edge_loc == MHDEdgeLocation::LB)
+    {
+      sign_dq0 = -1;
+      sign_dq1 = -1;
+      sign_b0 = -1;
+      sign_b1 = -1;
+    }
+    else if (edge_loc == MHDEdgeLocation::RT)
+    {
+      ijk0[dir0] = 1;
+      ijk1[dir1] = 1;
+      sign_dq0 = 1;
+      sign_dq1 = 1;
+      sign_b0 = 1;
+      sign_b1 = 1;
+    }
+    else if (edge_loc == MHDEdgeLocation::RB)
+    {
+      ijk0[dir0] = 1;
+      sign_dq0 = 1;
+      sign_dq1 = -1;
+      sign_b0 = -1;
+      sign_b1 = 1;
+    }
+    else if (edge_loc == MHDEdgeLocation::LT)
+    {
+      ijk1[dir1] = 1;
+      sign_dq0 = -1;
+      sign_dq1 = 1;
+      sign_b0 = 1;
+      sign_b1 = -1;
+    }
+
+    const real_t B0 =
+      Udata_in(ic + ijk0[IX], jc + ijk0[IY], kc + ijk0[IZ], IA + dir0) + sFaceMag(ic, jc, kc, dir0);
+    const real_t dB0d1 = compute_limited_slope<static_cast<Direction>(dir1)>(
+      Udata_in, ic + ijk0[IX], jc + ijk0[IY], kc + ijk0[IZ], IA + dir0);
+
+    const real_t B1 =
+      Udata_in(ic + ijk1[IX], jc + ijk1[IY], kc + ijk1[IZ], IA + dir1) + sFaceMag(ic, jc, kc, dir1);
+    const real_t dB1d0 = compute_limited_slope<static_cast<Direction>(dir0)>(
+      Udata_in, ic + ijk1[IX], jc + ijk1[IY], kc + ijk1[IZ], IA + dir1);
+
+    // get limited slopes
+    MHDState dq0, dq1;
+    if constexpr (dir0 == IX and dir1 == IY)
+    {
+      get_state(Slopes_x, ic, jc, kc, dq0);
+      get_state(Slopes_y, ic, jc, kc, dq1);
+    }
+    else if constexpr (dir0 == IX and dir1 == IZ)
+    {
+      get_state(Slopes_x, ic, jc, kc, dq0);
+      get_state(Slopes_z, ic, jc, kc, dq1);
+    }
+    else if constexpr (dir0 == IY and dir1 == IZ)
+    {
+      get_state(Slopes_y, ic, jc, kc, dq0);
+      get_state(Slopes_z, ic, jc, kc, dq1);
+    }
+
+    // reconstructed state
+    qEdge[ID] += 0.5 * (sign_dq0 * dq0[ID] + sign_dq1 * dq1[ID]);
+    qEdge[IU] += 0.5 * (sign_dq0 * dq0[IU] + sign_dq1 * dq1[IU]);
+    qEdge[IV] += 0.5 * (sign_dq0 * dq0[IV] + sign_dq1 * dq1[IV]);
+    qEdge[IW] += 0.5 * (sign_dq0 * dq0[IW] + sign_dq1 * dq1[IW]);
+    qEdge[IP] += 0.5 * (sign_dq0 * dq0[IP] + sign_dq1 * dq1[IP]);
+    if (dir0 == IX)
+    {
+      qEdge[IA] = B0 + 0.5 * (sign_b0 * dB0d1);
+    }
+    else
+    {
+      qEdge[IA] += 0.5 * (sign_dq0 * dq0[IA] + sign_dq1 * dq1[IA]);
+    }
+
+    if (dir0 == IY)
+    {
+      qEdge[IB] = B0 + 0.5 * (sign_b0 * dB0d1);
+    }
+    else
+    {
+      qEdge[IB] += 0.5 * (sign_dq0 * dq0[IB] + sign_dq1 * dq1[IB]);
+    }
+
+    if (dir1 == IY)
+    {
+      qEdge[IB] = B1 + 0.5 * (sign_b1 * dB1d0);
+    }
+    else
+    {
+      qEdge[IB] += 0.5 * (sign_dq0 * dq0[IB] + sign_dq1 * dq1[IB]);
+    }
+
+    if (dir1 == IZ)
+    {
+      qEdge[IC] = B1 + 0.5 * (sign_b1 * dB1d0);
+    }
+    else
+    {
+      qEdge[IC] += 0.5 * (sign_dq0 * dq0[IC] + sign_dq1 * dq1[IC]);
+    }
+
+    qEdge[ID] = fmax(smallR, qEdge[ID]);
+    qEdge[IP] = fmax(smallp * qEdge[ID], qEdge[IP]);
+
+  } // reconstruct_state_at_edge
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j, const int & k) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ksize = params.ksize;
+    const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+      // this drawing is a simple helper to reminder how reconstruction is done
+      //
+      // symbols LB, RB, LT, RT indicate location of reconstructed edge from the cell center.
+      //
+      //
+      //  X locates the edge where hydrodynamics values must be reconstructed
+      //    _________________________
+      //   |           |            |
+      //   |           |            |
+      //   |     RB    |     LB     |
+      //   |  (i-1,j)  |   (i,j)    |
+      //   |          \| /          |
+      //   |___________X____________|
+      //   |          /| \          |
+      //   |        /  |  \         |
+      //   |     RT    |     LT     |
+      //   | (i-1,j-1) |   (i,j-1)  |
+      //   |           |            |
+      //   |___________|____________|
+      //
+
+
+      // qLB is current cell, qLT, qRT and qRB are direct neighbors surrounding the lower left edge
+      // MHDState qLB, qLT, qRB, qRT;
+      MHDState   qEdge_emf[4];
+      MHDState & qRT = qEdge_emf[IRT];
+      MHDState & qLT = qEdge_emf[ILT];
+      MHDState & qRB = qEdge_emf[IRB];
+      MHDState & qLB = qEdge_emf[ILB];
+
+      // get primitive variable in current cell and neighbors at t_{n+1/2}
+      // clang-format off
+      if constexpr (dir == DIR_Z)
+      {
+        get_state(Qdata2, i    , j    , k    , qLB);
+        get_state(Qdata2, i    , j - 1, k    , qLT);
+        get_state(Qdata2, i - 1, j    , k    , qRB);
+        get_state(Qdata2, i - 1, j - 1, k    , qRT);
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        get_state(Qdata2, i    , j    , k    , qLB);
+        get_state(Qdata2, i    , j    , k - 1, qLT); // later LT and RB will swapped
+        get_state(Qdata2, i - 1, j    , k    , qRB); // later LT and RB will swapped
+        get_state(Qdata2, i - 1, j    , k - 1, qRT);
+      }
+      else if constexpr (dir == DIR_X)
+      {
+        get_state(Qdata2, i    , j    , k    , qLB);
+        get_state(Qdata2, i    , j    , k - 1, qLT);
+        get_state(Qdata2, i    , j - 1, k    , qRB);
+        get_state(Qdata2, i    , j - 1, k - 1, qRT);
+      }
+      // clang-format on
+
+      if constexpr (dir == DIR_Z)
+      {
+        // LB at (i,j,k)
+        {
+          const auto i0 = i;
+          const auto j0 = j;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IX, IY>(qLB, i0, j0, k0, MHDEdgeLocation::LB);
+        }
+
+        // RT (i-1, j-1, k)
+        {
+          const auto i0 = i - 1;
+          const auto j0 = j - 1;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IX, IY>(qRT, i0, j0, k0, MHDEdgeLocation::RT);
+        }
+
+        // RB (i-1, j, k)
+        {
+          const auto i0 = i - 1;
+          const auto j0 = j;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IX, IY>(qRB, i0, j0, k0, MHDEdgeLocation::RB);
+        }
+
+        // LT (i, j-1, k)
+        {
+          const auto i0 = i;
+          const auto j0 = j - 1;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IX, IY>(qLT, i0, j0, k0, MHDEdgeLocation::LT);
+        }
+
+        const real_t emfZ = compute_emf<EMFZ>(qEdge_emf, params);
+
+        // clang-format off
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i    , j    , k     , IA), emfZ * dtdy);
+          Kokkos::atomic_add(&Udata_out(i    , j    , k     , IB), emfZ * dtdx);
+        }
+
+        if (k < ksize - ghostWidth and j > ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i    , j - 1, k     , IA), emfZ * dtdy);
+        }
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i > ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i - 1, j    , k     , IB), emfZ * dtdx);
+        }
+        // clang-format on
+      }
+      else if constexpr (dir == DIR_Y)
+      {
+        // LB at (i,j,k)
+        {
+          const auto i0 = i;
+          const auto j0 = j;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IX, IZ>(qLB, i0, j0, k0, MHDEdgeLocation::LB);
+        }
+
+        // RT (i-1, j, k-1)
+        {
+          const auto i0 = i - 1;
+          const auto j0 = j;
+          const auto k0 = k - 1;
+          reconstruct_state_at_edge<IX, IZ>(qRT, i0, j0, k0, MHDEdgeLocation::RT);
+        }
+
+        // RB (i-1, j, k)
+        {
+          const auto i0 = i - 1;
+          const auto j0 = j;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IX, IZ>(qRB, i0, j0, k0, MHDEdgeLocation::RB);
+        }
+
+        // LT (i, j, k-1)
+        {
+          const auto i0 = i;
+          const auto j0 = j;
+          const auto k0 = k - 1;
+          reconstruct_state_at_edge<IX, IZ>(qLT, i0, j0, k0, MHDEdgeLocation::LT);
+        }
+
+        // exchange RB and LT
+        {
+          swapValues(&(qRB[ID]), &(qLT[ID]));
+          swapValues(&(qRB[IU]), &(qLT[IU]));
+          swapValues(&(qRB[IV]), &(qLT[IV]));
+          swapValues(&(qRB[IW]), &(qLT[IW]));
+          swapValues(&(qRB[IP]), &(qLT[IP]));
+          swapValues(&(qRB[IA]), &(qLT[IA]));
+          swapValues(&(qRB[IB]), &(qLT[IB]));
+          swapValues(&(qRB[IC]), &(qLT[IC]));
+        }
+        const real_t emfY = compute_emf<EMFY>(qEdge_emf, params);
+
+        // clang-format off
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i    , j    , k     , IA), emfY * dtdz);
+          Kokkos::atomic_sub(&Udata_out(i    , j    , k     , IC), emfY * dtdx);
+        }
+        if (k > ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i    , j    , k - 1 , IA), emfY * dtdz);
+        }
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i > ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i - 1, j    , k     , IC), emfY * dtdx);
+        }
+        // clang-format on
+      }
+      else if constexpr (dir == DIR_X)
+      {
+        // LB at (i,j,k)
+        {
+          const auto i0 = i;
+          const auto j0 = j;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IY, IZ>(qLB, i0, j0, k0, MHDEdgeLocation::LB);
+        }
+
+        // RT (i, j-1, k-1)
+        {
+          const auto i0 = i;
+          const auto j0 = j - 1;
+          const auto k0 = k - 1;
+          reconstruct_state_at_edge<IY, IZ>(qRT, i0, j0, k0, MHDEdgeLocation::RT);
+        }
+
+        // RB (i, j-1, k)
+        {
+          const auto i0 = i;
+          const auto j0 = j - 1;
+          const auto k0 = k;
+          reconstruct_state_at_edge<IY, IZ>(qRB, i0, j0, k0, MHDEdgeLocation::RB);
+        }
+
+        // LT (i, j, k-1)
+        {
+          const auto i0 = i;
+          const auto j0 = j;
+          const auto k0 = k - 1;
+          reconstruct_state_at_edge<IY, IZ>(qLT, i0, j0, k0, MHDEdgeLocation::LT);
+        }
+
+        const real_t emfX = compute_emf<EMFX>(qEdge_emf, params);
+
+        // clang-format off
+        if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i    , j    , k     , IB), emfX * dtdz);
+          Kokkos::atomic_add(&Udata_out(i    , j    , k     , IC), emfX * dtdy);
+        }
+        if (k > ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_add(&Udata_out(i    , j    , k - 1 , IB), emfX * dtdz);
+        }
+        if (k < ksize - ghostWidth and j > ghostWidth and i < isize - ghostWidth)
+        {
+          Kokkos::atomic_sub(&Udata_out(i    , j - 1, k     , IC), emfX * dtdy);
+        }
+        // clang-format on
+      }
+    }
+  } // operator ()
+
+  DataArray3d Udata_in, Udata_out;
+  DataArray3d Qdata2;
+  DataArray3d Slopes_x, Slopes_y, Slopes_z;
+  DataArray3d sFaceMag;
+  real_t      dtdx, dtdy, dtdz;
+
+}; // ComputeEmfAndUpdateFunctor3D_MHD
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeFluxesAndStoreFunctor3D_MHD : public MHDBaseFunctor3D
+{
+
+public:
+  ComputeFluxesAndStoreFunctor3D_MHD(HydroParams params,
+                                     DataArray3d Qm_x,
+                                     DataArray3d Qm_y,
+                                     DataArray3d Qm_z,
+                                     DataArray3d Qp_x,
+                                     DataArray3d Qp_y,
+                                     DataArray3d Qp_z,
+                                     DataArray3d Fluxes_x,
+                                     DataArray3d Fluxes_y,
+                                     DataArray3d Fluxes_z,
+                                     real_t      dtdx,
+                                     real_t      dtdy,
+                                     real_t      dtdz)
+    : MHDBaseFunctor3D(params)
+    , Qm_x(Qm_x)
+    , Qm_y(Qm_y)
+    , Qm_z(Qm_z)
+    , Qp_x(Qp_x)
+    , Qp_y(Qp_y)
+    , Qp_z(Qp_z)
+    , Fluxes_x(Fluxes_x)
+    , Fluxes_y(Fluxes_y)
+    , Fluxes_z(Fluxes_z)
+    , dtdx(dtdx)
+    , dtdy(dtdy)
+    , dtdz(dtdz){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray3d Qm_x,
+        DataArray3d Qm_y,
+        DataArray3d Qm_z,
+        DataArray3d Qp_x,
+        DataArray3d Qp_y,
+        DataArray3d Qp_z,
+        DataArray3d Flux_x,
+        DataArray3d Flux_y,
+        DataArray3d Flux_z,
+        real_t      dtdx,
+        real_t      dtdy,
+        real_t      dtdz)
+  {
+    ComputeFluxesAndStoreFunctor3D_MHD functor(
+      params, Qm_x, Qm_y, Qm_z, Qp_x, Qp_y, Qp_z, Flux_x, Flux_y, Flux_z, dtdx, dtdy, dtdz);
+    Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+                           { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
+                         functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j, const int & k) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ksize = params.ksize;
+    const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+
+      MHDState qleft, qright;
+      MHDState flux;
+
+      //
+      // Solve Riemann problem at X-interfaces and compute X-fluxes
+      //
+      get_state(Qm_x, i - 1, j, k, qleft);
+      get_state(Qp_x, i, j, k, qright);
+
+      // compute hydro flux along X
+      riemann_mhd(qleft, qright, flux, params);
+
+      // store fluxes
+      set_state(Fluxes_x, i, j, k, flux);
+
+      //
+      // Solve Riemann problem at Y-interfaces and compute Y-fluxes
+      //
+      get_state(Qm_y, i, j - 1, k, qleft);
+      swapValues(&(qleft[IU]), &(qleft[IV]));
+      swapValues(&(qleft[IA]), &(qleft[IB]));
+
+      get_state(Qp_y, i, j, k, qright);
+      swapValues(&(qright[IU]), &(qright[IV]));
+      swapValues(&(qright[IA]), &(qright[IB]));
+
+      // compute hydro flux along Y
+      riemann_mhd(qleft, qright, flux, params);
+
+      // store fluxes
+      set_state(Fluxes_y, i, j, k, flux);
+
+      //
+      // Solve Riemann problem at Z-interfaces and compute Z-fluxes
+      //
+      get_state(Qm_z, i, j, k - 1, qleft);
+      swapValues(&(qleft[IU]), &(qleft[IW]));
+      swapValues(&(qleft[IA]), &(qleft[IC]));
+
+      get_state(Qp_z, i, j, k, qright);
+      swapValues(&(qright[IU]), &(qright[IW]));
+      swapValues(&(qright[IA]), &(qright[IC]));
+
+      // compute hydro flux along Z
+      riemann_mhd(qleft, qright, flux, params);
+
+      // store fluxes
+      set_state(Fluxes_z, i, j, k, flux);
+    }
+  }
+
+  DataArray3d Qm_x, Qm_y, Qm_z;
+  DataArray3d Qp_x, Qp_y, Qp_z;
+  DataArray3d Fluxes_x, Fluxes_y, Fluxes_z;
+  real_t      dtdx, dtdy, dtdz;
+
+}; // ComputeFluxesAndStoreFunctor3D_MHD
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeFluxesAndUpdateFunctor3D_MHD : public MHDBaseFunctor3D
+{
+
+public:
+  ComputeFluxesAndUpdateFunctor3D_MHD(HydroParams params,
+                                      DataArray3d Qm_x,
+                                      DataArray3d Qm_y,
+                                      DataArray3d Qm_z,
+                                      DataArray3d Qp_x,
+                                      DataArray3d Qp_y,
+                                      DataArray3d Qp_z,
+                                      DataArray3d Udata,
+                                      real_t      dtdx,
+                                      real_t      dtdy,
+                                      real_t      dtdz)
+    : MHDBaseFunctor3D(params)
+    , Qm_x(Qm_x)
+    , Qm_y(Qm_y)
+    , Qm_z(Qm_z)
+    , Qp_x(Qp_x)
+    , Qp_y(Qp_y)
+    , Qp_z(Qp_z)
+    , Udata(Udata)
+    , dtdx(dtdx)
+    , dtdy(dtdy)
+    , dtdz(dtdz){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray3d Qm_x,
+        DataArray3d Qm_y,
+        DataArray3d Qm_z,
+        DataArray3d Qp_x,
+        DataArray3d Qp_y,
+        DataArray3d Qp_z,
+        DataArray3d Udata,
+        real_t      dtdx,
+        real_t      dtdy,
+        real_t      dtdz)
+  {
+    ComputeFluxesAndUpdateFunctor3D_MHD functor(
+      params, Qm_x, Qm_y, Qm_z, Qp_x, Qp_y, Qp_z, Udata, dtdx, dtdy, dtdz);
+    Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+                           { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
+                         functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j, const int & k) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ksize = params.ksize;
+    const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format on
+    {
+
+      MHDState qleft, qright;
+      MHDState flux;
+
+      //
+      // Solve Riemann problem at X-interfaces and compute X-fluxes
+      //
+
+      // clang-format off
+      get_state(Qm_x, i - 1, j, k, qleft);
+      get_state(Qp_x, i    , j, k, qright);
+      // clang-format on
+
+      // compute hydro flux along X
+      riemann_mhd(qleft, qright, flux, params);
+
+      //
+      // Update with fluxes along X
+      //
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i, j, k, ID), flux[ID] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, k, IP), flux[IP] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, k, IU), flux[IU] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, k, IV), flux[IV] * dtdx);
+        Kokkos::atomic_add(&Udata(i, j, k, IW), flux[IW] * dtdx);
+      }
+
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i > ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i - 1, j, k, ID), flux[ID] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, k, IP), flux[IP] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, k, IU), flux[IU] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, k, IV), flux[IV] * dtdx);
+        Kokkos::atomic_sub(&Udata(i - 1, j, k, IW), flux[IW] * dtdx);
+      }
+
+      //
+      // Solve Riemann problem at Y-interfaces and compute Y-fluxes
+      //
+      get_state(Qm_y, i, j - 1, k, qleft);
+      swapValues(&(qleft[IU]), &(qleft[IV]));
+      swapValues(&(qleft[IA]), &(qleft[IB]));
+
+      get_state(Qp_y, i, j, k, qright);
+      swapValues(&(qright[IU]), &(qright[IV]));
+      swapValues(&(qright[IA]), &(qright[IB]));
+
+      // compute hydro flux along Y
+      riemann_mhd(qleft, qright, flux, params);
+
+      swapValues(&(flux[IU]), &(flux[IV]));
+      swapValues(&(flux[IA]), &(flux[IB]));
+
+      //
+      // update with fluxes Y
+      //
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i, j, k, ID), flux[ID] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, k, IP), flux[IP] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, k, IU), flux[IU] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, k, IV), flux[IV] * dtdy);
+        Kokkos::atomic_add(&Udata(i, j, k, IW), flux[IW] * dtdy);
+      }
+      if (k < ksize - ghostWidth and j > ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i, j - 1, k, ID), flux[ID] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, k, IP), flux[IP] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, k, IU), flux[IU] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, k, IV), flux[IV] * dtdy);
+        Kokkos::atomic_sub(&Udata(i, j - 1, k, IW), flux[IW] * dtdy);
+      }
+
+      //
+      // Solve Riemann problem at Z-interfaces and compute Z-fluxes
+      //
+      get_state(Qm_z, i, j, k - 1, qleft);
+      swapValues(&(qleft[IU]), &(qleft[IW]));
+      swapValues(&(qleft[IA]), &(qleft[IC]));
+
+      get_state(Qp_z, i, j, k, qright);
+      swapValues(&(qright[IU]), &(qright[IW]));
+      swapValues(&(qright[IA]), &(qright[IC]));
+
+      // compute hydro flux along Z
+      riemann_mhd(qleft, qright, flux, params);
+
+      swapValues(&(flux[IU]), &(flux[IW]));
+      swapValues(&(flux[IA]), &(flux[IC]));
+
+      //
+      // update with fluxes Z
+      //
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i, j, k, ID), flux[ID] * dtdz);
+        Kokkos::atomic_add(&Udata(i, j, k, IP), flux[IP] * dtdz);
+        Kokkos::atomic_add(&Udata(i, j, k, IU), flux[IU] * dtdz);
+        Kokkos::atomic_add(&Udata(i, j, k, IV), flux[IV] * dtdz);
+        Kokkos::atomic_add(&Udata(i, j, k, IW), flux[IW] * dtdz);
+      }
+      if (k > ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i, j, k - 1, ID), flux[ID] * dtdz);
+        Kokkos::atomic_sub(&Udata(i, j, k - 1, IP), flux[IP] * dtdz);
+        Kokkos::atomic_sub(&Udata(i, j, k - 1, IU), flux[IU] * dtdz);
+        Kokkos::atomic_sub(&Udata(i, j, k - 1, IV), flux[IV] * dtdz);
+        Kokkos::atomic_sub(&Udata(i, j, k - 1, IW), flux[IW] * dtdz);
+      }
+    }
+  }
+
+  DataArray3d Qm_x, Qm_y, Qm_z;
+  DataArray3d Qp_x, Qp_y, Qp_z;
+  DataArray3d Udata;
+  real_t      dtdx, dtdy, dtdz;
+
+}; // ComputeFluxesAndUpdateFunctor3D_MHD
+
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeEmfAndStoreFunctor3D : public MHDBaseFunctor3D
+{
+
+public:
+  ComputeEmfAndStoreFunctor3D(HydroParams      params,
+                              DataArray3d      QEdge_RT,
+                              DataArray3d      QEdge_RB,
+                              DataArray3d      QEdge_LT,
+                              DataArray3d      QEdge_LB,
+                              DataArray3d      QEdge_RT2,
+                              DataArray3d      QEdge_RB2,
+                              DataArray3d      QEdge_LT2,
+                              DataArray3d      QEdge_LB2,
+                              DataArray3d      QEdge_RT3,
+                              DataArray3d      QEdge_RB3,
+                              DataArray3d      QEdge_LT3,
+                              DataArray3d      QEdge_LB3,
+                              DataArrayVector3 Emf,
+                              real_t           dtdx,
+                              real_t           dtdy,
+                              real_t           dtdz)
+    : MHDBaseFunctor3D(params)
+    , QEdge_RT(QEdge_RT)
+    , QEdge_RB(QEdge_RB)
+    , QEdge_LT(QEdge_LT)
+    , QEdge_LB(QEdge_LB)
+    , QEdge_RT2(QEdge_RT2)
+    , QEdge_RB2(QEdge_RB2)
+    , QEdge_LT2(QEdge_LT2)
+    , QEdge_LB2(QEdge_LB2)
+    , QEdge_RT3(QEdge_RT3)
+    , QEdge_RB3(QEdge_RB3)
+    , QEdge_LT3(QEdge_LT3)
+    , QEdge_LB3(QEdge_LB3)
+    , Emf(Emf)
+    , dtdx(dtdx)
+    , dtdy(dtdy)
+    , dtdz(dtdz){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams      params,
+        DataArray3d      QEdge_RT,
+        DataArray3d      QEdge_RB,
+        DataArray3d      QEdge_LT,
+        DataArray3d      QEdge_LB,
+        DataArray3d      QEdge_RT2,
+        DataArray3d      QEdge_RB2,
+        DataArray3d      QEdge_LT2,
+        DataArray3d      QEdge_LB2,
+        DataArray3d      QEdge_RT3,
+        DataArray3d      QEdge_RB3,
+        DataArray3d      QEdge_LT3,
+        DataArray3d      QEdge_LB3,
+        DataArrayVector3 Emf,
+        real_t           dtdx,
+        real_t           dtdy,
+        real_t           dtdz)
+  {
+    ComputeEmfAndStoreFunctor3D functor(params,
+                                        QEdge_RT,
+                                        QEdge_RB,
+                                        QEdge_LT,
+                                        QEdge_LB,
                                         QEdge_RT2,
                                         QEdge_RB2,
                                         QEdge_LT2,
@@ -872,8 +2187,10 @@ class ComputeEmfAndStoreFunctor3D : public MHDBaseFunctor3D
     const int ksize = params.ksize;
     const int ghostWidth = params.ghostWidth;
 
-    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and j >= ghostWidth and
-        j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
     {
 
       MHDState qEdge_emf[4];
@@ -886,29 +2203,30 @@ class ComputeEmfAndStoreFunctor3D : public MHDBaseFunctor3D
 
       // actually compute emfZ
       get_state(QEdge_RT3, i - 1, j - 1, k, qEdge_emf[IRT]);
-      get_state(QEdge_RB3, i - 1, j, k, qEdge_emf[IRB]);
-      get_state(QEdge_LT3, i, j - 1, k, qEdge_emf[ILT]);
-      get_state(QEdge_LB3, i, j, k, qEdge_emf[ILB]);
+      get_state(QEdge_RB3, i - 1, j    , k, qEdge_emf[IRB]);
+      get_state(QEdge_LT3, i    , j - 1, k, qEdge_emf[ILT]);
+      get_state(QEdge_LB3, i    , j    , k, qEdge_emf[ILB]);
 
       Emf(i, j, k, I_EMFZ) = compute_emf<EMFZ>(qEdge_emf, params);
 
       // actually compute emfY (take care that RB and LT are
       // swapped !!!)
       get_state(QEdge_RT2, i - 1, j, k - 1, qEdge_emf[IRT]);
-      get_state(QEdge_LT2, i, j, k - 1, qEdge_emf[IRB]);
-      get_state(QEdge_RB2, i - 1, j, k, qEdge_emf[ILT]);
-      get_state(QEdge_LB2, i, j, k, qEdge_emf[ILB]);
+      get_state(QEdge_LT2, i    , j, k - 1, qEdge_emf[IRB]);
+      get_state(QEdge_RB2, i - 1, j, k    , qEdge_emf[ILT]);
+      get_state(QEdge_LB2, i    , j, k    , qEdge_emf[ILB]);
 
       Emf(i, j, k, I_EMFY) = compute_emf<EMFY>(qEdge_emf, params);
 
       // actually compute emfX
       get_state(QEdge_RT, i, j - 1, k - 1, qEdge_emf[IRT]);
-      get_state(QEdge_RB, i, j - 1, k, qEdge_emf[IRB]);
-      get_state(QEdge_LT, i, j, k - 1, qEdge_emf[ILT]);
-      get_state(QEdge_LB, i, j, k, qEdge_emf[ILB]);
+      get_state(QEdge_RB, i, j - 1, k    , qEdge_emf[IRB]);
+      get_state(QEdge_LT, i, j    , k - 1, qEdge_emf[ILT]);
+      get_state(QEdge_LB, i, j    , k    , qEdge_emf[ILB]);
 
       Emf(i, j, k, I_EMFX) = compute_emf<EMFX>(qEdge_emf, params);
     }
+    // clang-format on
   }
 
   DataArray3d      QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB;
@@ -919,6 +2237,192 @@ class ComputeEmfAndStoreFunctor3D : public MHDBaseFunctor3D
 
 }; // ComputeEmfAndStoreFunctor3D
 
+/*************************************************/
+/*************************************************/
+/*************************************************/
+class ComputeEmfAndUpdateFunctor3D : public MHDBaseFunctor3D
+{
+
+public:
+  ComputeEmfAndUpdateFunctor3D(HydroParams params,
+                               DataArray3d QEdge_RT,
+                               DataArray3d QEdge_RB,
+                               DataArray3d QEdge_LT,
+                               DataArray3d QEdge_LB,
+                               DataArray3d QEdge_RT2,
+                               DataArray3d QEdge_RB2,
+                               DataArray3d QEdge_LT2,
+                               DataArray3d QEdge_LB2,
+                               DataArray3d QEdge_RT3,
+                               DataArray3d QEdge_RB3,
+                               DataArray3d QEdge_LT3,
+                               DataArray3d QEdge_LB3,
+                               DataArray3d Udata,
+                               real_t      dtdx,
+                               real_t      dtdy,
+                               real_t      dtdz)
+    : MHDBaseFunctor3D(params)
+    , QEdge_RT(QEdge_RT)
+    , QEdge_RB(QEdge_RB)
+    , QEdge_LT(QEdge_LT)
+    , QEdge_LB(QEdge_LB)
+    , QEdge_RT2(QEdge_RT2)
+    , QEdge_RB2(QEdge_RB2)
+    , QEdge_LT2(QEdge_LT2)
+    , QEdge_LB2(QEdge_LB2)
+    , QEdge_RT3(QEdge_RT3)
+    , QEdge_RB3(QEdge_RB3)
+    , QEdge_LT3(QEdge_LT3)
+    , QEdge_LB3(QEdge_LB3)
+    , Udata(Udata)
+    , dtdx(dtdx)
+    , dtdy(dtdy)
+    , dtdz(dtdz){};
+
+  // static method which does it all: create and execute functor
+  static void
+  apply(HydroParams params,
+        DataArray3d QEdge_RT,
+        DataArray3d QEdge_RB,
+        DataArray3d QEdge_LT,
+        DataArray3d QEdge_LB,
+        DataArray3d QEdge_RT2,
+        DataArray3d QEdge_RB2,
+        DataArray3d QEdge_LT2,
+        DataArray3d QEdge_LB2,
+        DataArray3d QEdge_RT3,
+        DataArray3d QEdge_RB3,
+        DataArray3d QEdge_LT3,
+        DataArray3d QEdge_LB3,
+        DataArray3d Udata,
+        real_t      dtdx,
+        real_t      dtdy,
+        real_t      dtdz)
+  {
+    ComputeEmfAndUpdateFunctor3D functor(params,
+                                         QEdge_RT,
+                                         QEdge_RB,
+                                         QEdge_LT,
+                                         QEdge_LB,
+                                         QEdge_RT2,
+                                         QEdge_RB2,
+                                         QEdge_LT2,
+                                         QEdge_LB2,
+                                         QEdge_RT3,
+                                         QEdge_RB3,
+                                         QEdge_LT3,
+                                         QEdge_LB3,
+                                         Udata,
+                                         dtdx,
+                                         dtdy,
+                                         dtdz);
+    Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
+                           { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }),
+                         functor);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void
+  operator()(const int & i, const int & j, const int & k) const
+  {
+    const int isize = params.isize;
+    const int jsize = params.jsize;
+    const int ksize = params.ksize;
+    const int ghostWidth = params.ghostWidth;
+
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and
+        j >= ghostWidth and j < jsize - ghostWidth + 1 and
+        i >= ghostWidth and i < isize - ghostWidth + 1)
+    {
+
+      MHDState qEdge_emf[4];
+
+      // preparation for calling compute_emf (equivalent to cmp_mag_flx
+      // in DUMSES)
+      // in the following, the 2 first indexes in qEdge_emf array play
+      // the same offset role as in the calling argument of cmp_mag_flx
+      // in DUMSES (if you see what I mean ?!)
+
+      // actually compute emfZ
+      get_state(QEdge_RT3, i - 1, j - 1, k, qEdge_emf[IRT]);
+      get_state(QEdge_RB3, i - 1, j    , k, qEdge_emf[IRB]);
+      get_state(QEdge_LT3, i    , j - 1, k, qEdge_emf[ILT]);
+      get_state(QEdge_LB3, i    , j    , k, qEdge_emf[ILB]);
+
+      const real_t emfZ = compute_emf<EMFZ>(qEdge_emf, params);
+
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i    , j    , k     , IA), emfZ * dtdy);
+        Kokkos::atomic_add(&Udata(i    , j    , k     , IB), emfZ * dtdx);
+      }
+
+      if (k < ksize - ghostWidth and j > ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i    , j - 1, k     , IA), emfZ * dtdy);
+      }
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i > ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i - 1, j    , k     , IB), emfZ * dtdx);
+      }
+
+      // actually compute emfY (take care that RB and LT are
+      // swapped !!!)
+      get_state(QEdge_RT2, i - 1, j, k - 1, qEdge_emf[IRT]);
+      get_state(QEdge_LT2, i    , j, k - 1, qEdge_emf[IRB]);
+      get_state(QEdge_RB2, i - 1, j, k    , qEdge_emf[ILT]);
+      get_state(QEdge_LB2, i    , j, k    , qEdge_emf[ILB]);
+
+      const real_t emfY = compute_emf<EMFY>(qEdge_emf, params);
+
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i    , j    , k     , IA), emfY * dtdz);
+        Kokkos::atomic_sub(&Udata(i    , j    , k     , IC), emfY * dtdx);
+      }
+      if (k > ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i    , j    , k - 1 , IA), emfY * dtdz);
+      }
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i > ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i - 1, j    , k     , IC), emfY * dtdx);
+      }
+
+      // actually compute emfX
+      get_state(QEdge_RT, i, j - 1, k - 1, qEdge_emf[IRT]);
+      get_state(QEdge_RB, i, j - 1, k    , qEdge_emf[IRB]);
+      get_state(QEdge_LT, i, j    , k - 1, qEdge_emf[ILT]);
+      get_state(QEdge_LB, i, j    , k    , qEdge_emf[ILB]);
+
+      const real_t emfX = compute_emf<EMFX>(qEdge_emf, params);
+
+      if (k < ksize - ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i    , j    , k     , IB), emfX * dtdz);
+        Kokkos::atomic_add(&Udata(i    , j    , k     , IC), emfX * dtdy);
+      }
+      if (k > ghostWidth and j < jsize - ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_add(&Udata(i    , j    , k - 1 , IB), emfX * dtdz);
+      }
+      if (k < ksize - ghostWidth and j > ghostWidth and i < isize - ghostWidth)
+      {
+        Kokkos::atomic_sub(&Udata(i    , j - 1, k     , IC), emfX * dtdy);
+      }
+    }
+    // clang-format on
+  }
+
+  DataArray3d QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB;
+  DataArray3d QEdge_RT2, QEdge_RB2, QEdge_LT2, QEdge_LB2;
+  DataArray3d QEdge_RT3, QEdge_RB3, QEdge_LT3, QEdge_LB3;
+  DataArray3d Udata;
+  real_t      dtdx, dtdy, dtdz;
+
+}; // ComputeEmfAndUpdateFunctor3D
+
 
 /*************************************************/
 /*************************************************/
@@ -971,8 +2475,11 @@ class UpdateFunctor3D_MHD : public MHDBaseFunctor3D
     const int ksize = params.ksize;
     const int ghostWidth = params.ghostWidth;
 
-    if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and
-        j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth)
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth and
+        j >= ghostWidth and j < jsize - ghostWidth and
+        i >= ghostWidth and i < isize - ghostWidth)
+    // clang-format on
     {
 
       MHDState udata;
@@ -1010,11 +2517,11 @@ class UpdateFunctor3D_MHD : public MHDBaseFunctor3D
       udata[IW] -= flux[IW] * dtdy;
 
       get_state(FluxData_z, i, j, k, flux);
-      udata[ID] += flux[ID] * dtdy;
-      udata[IP] += flux[IP] * dtdy;
-      udata[IU] += flux[IW] * dtdy; //
-      udata[IV] += flux[IV] * dtdy;
-      udata[IW] += flux[IU] * dtdy; //
+      udata[ID] += flux[ID] * dtdz;
+      udata[IP] += flux[IP] * dtdz;
+      udata[IU] += flux[IW] * dtdz; //
+      udata[IV] += flux[IV] * dtdz;
+      udata[IW] += flux[IU] * dtdz; //
 
       get_state(FluxData_z, i, j, k + 1, flux);
       udata[ID] -= flux[ID] * dtdz;
@@ -1080,34 +2587,37 @@ class UpdateEmfFunctor3D : public MHDBaseFunctor3D
     const int ksize = params.ksize;
     const int ghostWidth = params.ghostWidth;
 
-    if (k >= ghostWidth and k < ksize - ghostWidth + 1 and j >= ghostWidth and
-        j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1)
+    // clang-format off
+    if (k >= ghostWidth and k < ksize - ghostWidth /*+ 1*/ and
+        j >= ghostWidth and j < jsize - ghostWidth /*+ 1*/ and
+        i >= ghostWidth and i < isize - ghostWidth /*+ 1*/)
+    // clang-format on
     {
 
       MHDState udata;
       get_state(Udata, i, j, k, udata);
 
-      if (k < ksize - ghostWidth)
+      // if (k < ksize - ghostWidth)
       {
-        udata[IBX] += (Emf(i, j + 1, k, I_EMFZ) - Emf(i, j, k, I_EMFZ)) * dtdy;
+        udata[IA] += (Emf(i, j + 1, k, I_EMFZ) - Emf(i, j, k, I_EMFZ)) * dtdy;
 
-        udata[IBY] -= (Emf(i + 1, j, k, I_EMFZ) - Emf(i, j, k, I_EMFZ)) * dtdx;
+        udata[IB] -= (Emf(i + 1, j, k, I_EMFZ) - Emf(i, j, k, I_EMFZ)) * dtdx;
       }
 
       // update BX
-      udata[IBX] -= (Emf(i, j, k + 1, I_EMFY) - Emf(i, j, k, I_EMFY)) * dtdz;
+      udata[IA] -= (Emf(i, j, k + 1, I_EMFY) - Emf(i, j, k, I_EMFY)) * dtdz;
 
       // update BY
-      udata[IBY] += (Emf(i, j, k + 1, I_EMFX) - Emf(i, j, k, I_EMFX)) * dtdz;
+      udata[IB] += (Emf(i, j, k + 1, I_EMFX) - Emf(i, j, k, I_EMFX)) * dtdz;
 
       // update BZ
-      udata[IBZ] += (Emf(i + 1, j, k, I_EMFY) - Emf(i, j, k, I_EMFY)) * dtdx;
+      udata[IC] += (Emf(i + 1, j, k, I_EMFY) - Emf(i, j, k, I_EMFY)) * dtdx;
 
-      udata[IBZ] -= (Emf(i, j + 1, k, I_EMFX) - Emf(i, j, k, I_EMFX)) * dtdy;
+      udata[IC] -= (Emf(i, j + 1, k, I_EMFX) - Emf(i, j, k, I_EMFX)) * dtdy;
 
-      Udata(i, j, k, IA) = udata[IBX];
-      Udata(i, j, k, IB) = udata[IBY];
-      Udata(i, j, k, IC) = udata[IBZ];
+      Udata(i, j, k, IA) = udata[IA];
+      Udata(i, j, k, IB) = udata[IB];
+      Udata(i, j, k, IC) = udata[IC];
     }
   } // operator()
 
diff --git a/src/muscl/SolverMHDMuscl.cpp b/src/muscl/SolverMHDMuscl.cpp
index 15fa12d..e07069d 100644
--- a/src/muscl/SolverMHDMuscl.cpp
+++ b/src/muscl/SolverMHDMuscl.cpp
@@ -68,6 +68,16 @@ SolverMHDMuscl<3>::make_boundaries(DataArray Udata)
 // ///////////////////////////////////////////////////////////////////
 // Compute electric field
 // ///////////////////////////////////////////////////////////////////
+template <>
+void
+SolverMHDMuscl<2>::computeElectricField(DataArray Udata)
+{
+
+  // call device functor
+  // ComputeElecFieldFunctor2D::apply(params, Udata, Q, ElecField);
+
+} // SolverMHDMuscl<2>::computeElectricField
+
 template <>
 void
 SolverMHDMuscl<3>::computeElectricField(DataArray Udata)
@@ -209,6 +219,44 @@ SolverMHDMuscl<3>::computeFluxesAndStore(real_t dt)
 
 } // SolverMHDMuscl<3>::computeFluxesAndStore
 
+// =======================================================
+// =======================================================
+// //////////////////////////////////////////////////////////////////
+// Compute flux via Riemann solver and update
+// //////////////////////////////////////////////////////////////////
+template <>
+void
+SolverMHDMuscl<2>::computeFluxesAndUpdate(real_t dt, DataArray Udata)
+{
+
+  real_t dtdx = dt / params.dx;
+  real_t dtdy = dt / params.dy;
+
+  // call device functor
+  ComputeFluxesAndUpdateFunctor2D_MHD::apply(params, Qm_x, Qm_y, Qp_x, Qp_y, Udata, dtdx, dtdy);
+
+} // SolverMHDMuscl<2>::computeFluxesAndUpdate
+
+// =======================================================
+// =======================================================
+// //////////////////////////////////////////////////////////////////
+// Compute flux via Riemann solver and update
+// //////////////////////////////////////////////////////////////////
+template <>
+void
+SolverMHDMuscl<3>::computeFluxesAndUpdate(real_t dt, DataArray Udata)
+{
+
+  real_t dtdx = dt / params.dx;
+  real_t dtdy = dt / params.dy;
+  real_t dtdz = dt / params.dz;
+
+  // call device functor
+  ComputeFluxesAndUpdateFunctor3D_MHD::apply(
+    params, Qm_x, Qm_y, Qm_z, Qp_x, Qp_y, Qp_z, Udata, dtdx, dtdy, dtdz);
+
+} // SolverMHDMuscl<3>::computeFluxesAndUpdate
+
 // =======================================================
 // =======================================================
 // //////////////////////////////////////////////////////////////////
@@ -263,6 +311,60 @@ SolverMHDMuscl<3>::computeEmfAndStore(real_t dt)
 
 } // SolverMHDMuscl<3>::computeEmfAndStore
 
+// =======================================================
+// =======================================================
+// //////////////////////////////////////////////////////////////////
+// Compute EMF via 2D Riemann solver and update magnetic field
+// //////////////////////////////////////////////////////////////////
+template <>
+void
+SolverMHDMuscl<2>::computeEmfAndUpdate(real_t dt, DataArray Udata)
+{
+
+  real_t dtdx = dt / params.dx;
+  real_t dtdy = dt / params.dy;
+
+  // call device functor
+  ComputeEmfAndUpdateFunctor2D::apply(
+    params, QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, Udata, dtdx, dtdy);
+
+} // SolverMHSMuscl<2>::computeEmfAndUpdate
+
+// =======================================================
+// =======================================================
+// //////////////////////////////////////////////////////////////////
+// Compute EMF via 2D Riemann solver and update magnetic field
+// //////////////////////////////////////////////////////////////////
+template <>
+void
+SolverMHDMuscl<3>::computeEmfAndUpdate(real_t dt, DataArray Udata)
+{
+
+  real_t dtdx = dt / params.dx;
+  real_t dtdy = dt / params.dy;
+  real_t dtdz = dt / params.dz;
+
+  // call device functor
+  ComputeEmfAndUpdateFunctor3D::apply(params,
+                                      QEdge_RT,
+                                      QEdge_RB,
+                                      QEdge_LT,
+                                      QEdge_LB,
+                                      QEdge_RT2,
+                                      QEdge_RB2,
+                                      QEdge_LT2,
+                                      QEdge_LB2,
+                                      QEdge_RT3,
+                                      QEdge_RB3,
+                                      QEdge_LT3,
+                                      QEdge_LB3,
+                                      Udata,
+                                      dtdx,
+                                      dtdy,
+                                      dtdz);
+
+} // SolverMHSMuscl<2>::computeEmfAndUpdate
+
 // =======================================================
 // =======================================================
 // ///////////////////////////////////////////
@@ -312,6 +414,43 @@ SolverMHDMuscl<2>::godunov_unsplit_impl(DataArray data_in, DataArray data_out, r
     // actual update with emf
     UpdateEmfFunctor2D::apply(params, data_out, Emf1, dtdx, dtdy);
   }
+  else if (params.implementationVersion == 1)
+  {
+    // trace computation: fill arrays qm_x, qm_y, qp_x, qp_y
+    computeTrace(data_in, dt);
+
+    // Compute flux via Riemann solver and update (time integration)
+    computeFluxesAndUpdate(dt, data_out);
+
+    // Compute Emf and update magnetic field
+    computeEmfAndUpdate(dt, data_out);
+  }
+  else if (params.implementationVersion == 2)
+  {
+    // compute limited slopes (for reconstruction)
+    ComputeSlopesFunctor2D_MHD::apply(params, data_in, Q, Slopes_x, Slopes_y);
+
+    // update (cell-centered) primitive variables, perform 1/2 time step
+    ComputeUpdatedPrimVarFunctor2D_MHD::apply(
+      params, data_in, Q, Slopes_x, Slopes_y, Q2, dtdx, dtdy);
+
+    // compute electric field (v wedge B)
+    ComputeElecFieldFunctor2D::apply(params, data_in, Q, ElecField);
+
+    // compute source term to update face centered magnetic field component at t_{n+1/2}
+    ComputeSourceFaceMagFunctor2D::apply(params, ElecField, sFaceMag, dtdx, dtdy);
+
+    // update at t_{n+1} with hydro flux (across all faces)
+    ComputeFluxAndUpdateAlongDirFunctor2D_MHD<DIR_X>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, sFaceMag, dtdx, dtdy);
+
+    ComputeFluxAndUpdateAlongDirFunctor2D_MHD<DIR_Y>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, sFaceMag, dtdx, dtdy);
+
+    ReconstructEdgeComputeEmfAndUpdateFunctor2D::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, sFaceMag, dtdx, dtdy);
+  }
+
   timers[TIMER_NUM_SCHEME]->stop();
 
 } // SolverMHDMuscl2D::godunov_unsplit_impl
@@ -326,13 +465,9 @@ void
 SolverMHDMuscl<3>::godunov_unsplit_impl(DataArray data_in, DataArray data_out, real_t dt)
 {
 
-  real_t dtdx;
-  real_t dtdy;
-  real_t dtdz;
-
-  dtdx = dt / params.dx;
-  dtdy = dt / params.dy;
-  dtdz = dt / params.dz;
+  const real_t dtdx = dt / params.dx;
+  const real_t dtdy = dt / params.dy;
+  const real_t dtdz = dt / params.dz;
 
   // fill ghost cell in data_in
   timers[TIMER_BOUNDARIES]->start();
@@ -373,6 +508,58 @@ SolverMHDMuscl<3>::godunov_unsplit_impl(DataArray data_in, DataArray data_out, r
     // actual update with emf
     UpdateEmfFunctor3D::apply(params, data_out, Emf, dtdx, dtdy, dtdz);
   }
+  else if (params.implementationVersion == 1)
+  {
+    // compute electric field
+    computeElectricField(data_in);
+
+    // compute magnetic slopes
+    computeMagSlopes(data_in);
+
+    // trace computation: fill arrays qm_x, qm_y, qm_z, qp_x, qp_y, qp_z
+    computeTrace(data_in, dt);
+
+    // Compute flux via Riemann solver and update (time integration)
+    computeFluxesAndUpdate(dt, data_out);
+
+    // Compute Emf and update magnetic field
+    computeEmfAndUpdate(dt, data_out);
+  }
+  else if (params.implementationVersion == 2)
+  {
+    // compute limited slopes (for reconstruction)
+    ComputeSlopesFunctor3D_MHD::apply(params, data_in, Q, Slopes_x, Slopes_y, Slopes_z);
+
+    // update (cell-centered) primitive variables, perform 1/2 time step
+    ComputeUpdatedPrimVarFunctor3D_MHD::apply(
+      params, data_in, Q, Slopes_x, Slopes_y, Slopes_z, Q2, dtdx, dtdy, dtdz);
+
+    // compute electric field (v wedge B)
+    ComputeElecFieldFunctor3D::apply(params, data_in, Q, ElecField);
+
+    // compute source term to update face centered magnetic field component at t_{n+1/2}
+    ComputeSourceFaceMagFunctor3D::apply(params, ElecField, sFaceMag, dtdx, dtdy, dtdz);
+
+    // update hydro variables at t_{n+1}
+    ComputeFluxAndUpdateAlongDirFunctor3D_MHD<DIR_X>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, Slopes_z, sFaceMag, dtdx, dtdy, dtdz);
+
+    ComputeFluxAndUpdateAlongDirFunctor3D_MHD<DIR_Y>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, Slopes_z, sFaceMag, dtdx, dtdy, dtdz);
+
+    ComputeFluxAndUpdateAlongDirFunctor3D_MHD<DIR_Z>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, Slopes_z, sFaceMag, dtdx, dtdy, dtdz);
+
+    // update magnetic field at t_{n+1}
+    ReconstructEdgeComputeEmfAndUpdateFunctor3D<DIR_X>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, Slopes_z, sFaceMag, dtdx, dtdy, dtdz);
+
+    ReconstructEdgeComputeEmfAndUpdateFunctor3D<DIR_Y>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, Slopes_z, sFaceMag, dtdx, dtdy, dtdz);
+
+    ReconstructEdgeComputeEmfAndUpdateFunctor3D<DIR_Z>::apply(
+      params, data_in, data_out, Q2, Slopes_x, Slopes_y, Slopes_z, sFaceMag, dtdx, dtdy, dtdz);
+  }
   timers[TIMER_NUM_SCHEME]->stop();
 
 } // SolverMHDMuscl<3>::godunov_unsplit_impl
diff --git a/src/muscl/SolverMHDMuscl.h b/src/muscl/SolverMHDMuscl.h
index 516374b..94b75e5 100644
--- a/src/muscl/SolverMHDMuscl.h
+++ b/src/muscl/SolverMHDMuscl.h
@@ -73,18 +73,26 @@ class SolverMHDMuscl : public ppkMHD::SolverBase
     return solver;
   }
 
-  DataArray     U;     /*!< hydrodynamics conservative variables arrays */
-  DataArrayHost Uhost; /*!< U mirror on host memory space */
-  DataArray     U2;    /*!< hydrodynamics conservative variables arrays */
-  DataArray     Q;     /*!< hydrodynamics primitive    variables array  */
+  DataArray     U;     //!< hydrodynamics conservative variables array
+  DataArrayHost Uhost; //!< U mirror on host memory space
+  DataArray     U2;    //!< hydrodynamics conservative variables array
+  DataArray     Q;     //!< hydrodynamics primitive    variables array
+  DataArray Q2; //!< hydrodynamics primitive    variables array at t_{n+1/2} (half time step update)
 
-  DataArray Qm_x; /*!< hydrodynamics Riemann states array implementation 2 */
-  DataArray Qm_y; /*!< hydrodynamics Riemann states array */
-  DataArray Qm_z; /*!< hydrodynamics Riemann states array */
+  /// source term using to compute face centered magnetic field component at t_{n+1/2}
+  DataArray sFaceMag;
 
-  DataArray Qp_x; /*!< hydrodynamics Riemann states array */
-  DataArray Qp_y; /*!< hydrodynamics Riemann states array */
-  DataArray Qp_z; /*!< hydrodynamics Riemann states array */
+  DataArray Slopes_x; //!< implementation 2 only
+  DataArray Slopes_y; //!< implementation 2 only
+  DataArray Slopes_z; //!< implementation 2 only
+
+  DataArray Qm_x; //!< hydrodynamics Riemann states array implementation 2
+  DataArray Qm_y; //!< hydrodynamics Riemann states array
+  DataArray Qm_z; //!< hydrodynamics Riemann states array
+
+  DataArray Qp_x; //!< hydrodynamics Riemann states array
+  DataArray Qp_y; //!< hydrodynamics Riemann states array
+  DataArray Qp_z; //!< hydrodynamics Riemann states array
 
   DataArray QEdge_RT;
   DataArray QEdge_RB;
@@ -105,11 +113,11 @@ class SolverMHDMuscl : public ppkMHD::SolverBase
   DataArray Fluxes_y;
   DataArray Fluxes_z;
 
-  /* electromotive forces */
+  // electromotive forces
   DataArrayScalar  Emf1; // 2d
   DataArrayVector3 Emf;  // 3d
 
-  DataArrayVector3 ElecField;
+  DataArray ElecField; // 2 or 3 components
 
   DataArrayVector3 DeltaA;
   DataArrayVector3 DeltaB;
@@ -167,6 +175,7 @@ class SolverMHDMuscl : public ppkMHD::SolverBase
 
   void
   computeElectricField(DataArray Udata);
+
   void
   computeMagSlopes(DataArray Udata);
 
@@ -175,9 +184,16 @@ class SolverMHDMuscl : public ppkMHD::SolverBase
 
   void
   computeFluxesAndStore(real_t dt);
+
+  void
+  computeFluxesAndUpdate(real_t dt, DataArray Udata);
+
   void
   computeEmfAndStore(real_t dt);
 
+  void
+  computeEmfAndUpdate(real_t dt, DataArray Udata);
+
   // output
   void
   save_solution_impl();
@@ -201,6 +217,11 @@ SolverMHDMuscl<dim>::SolverMHDMuscl(HydroParams & params, ConfigMap & configMap)
   , U()
   , U2()
   , Q()
+  , Q2()
+  , sFaceMag()
+  , Slopes_x()
+  , Slopes_y()
+  , Slopes_z()
   , Qm_x()
   , Qm_y()
   , Qm_z()
@@ -259,7 +280,7 @@ SolverMHDMuscl<dim>::SolverMHDMuscl(HydroParams & params, ConfigMap & configMap)
 
     total_mem_size += isize * jsize * nbvar * sizeof(real_t) * 3; // 1+1+1 for U+U2+Q
 
-    if (params.implementationVersion == 0)
+    if (params.implementationVersion == 0 or params.implementationVersion == 1)
     {
 
       Qm_x = DataArray("Qm_x", isize, jsize, nbvar);
@@ -272,13 +293,29 @@ SolverMHDMuscl<dim>::SolverMHDMuscl(HydroParams & params, ConfigMap & configMap)
       QEdge_LT = DataArray("QEdge_LT", isize, jsize, nbvar);
       QEdge_LB = DataArray("QEdge_LB", isize, jsize, nbvar);
 
+      total_mem_size += isize * jsize * nbvar * sizeof(real_t) * 8;
+    }
+    else if (params.implementationVersion == 2)
+    {
+      Q2 = DataArray("Q2", isize, jsize, nbvar);
+      sFaceMag = DataArray("sFaceMag", isize, jsize, 2);
+      Slopes_x = DataArray("Slope_x", isize, jsize, nbvar);
+      Slopes_y = DataArray("Slope_y", isize, jsize, nbvar);
+      ElecField = DataArray("ElecField", isize, jsize, 1);
+
+      total_mem_size += isize * jsize * nbvar * sizeof(real_t) * (1 + 2);
+      total_mem_size += isize * jsize * 1 * sizeof(real_t) * 1;
+      total_mem_size += isize * jsize * 2 * sizeof(real_t) * 1;
+    }
+
+    if (params.implementationVersion == 0)
+    {
       Fluxes_x = DataArray("Fluxes_x", isize, jsize, nbvar);
       Fluxes_y = DataArray("Fluxes_y", isize, jsize, nbvar);
 
       Emf1 = DataArrayScalar("Emf", isize, jsize);
-
-      total_mem_size +=
-        isize * jsize * nbvar * sizeof(real_t) * 10 + isize * jsize * 1 * sizeof(real_t);
+      total_mem_size += isize * jsize * nbvar * sizeof(real_t) * 2;
+      total_mem_size += isize * jsize * 1 * sizeof(real_t);
     }
   }
   else
@@ -291,7 +328,7 @@ SolverMHDMuscl<dim>::SolverMHDMuscl(HydroParams & params, ConfigMap & configMap)
 
     total_mem_size += isize * jsize * ksize * nbvar * sizeof(real_t) * 3; // 1+1+1=3 for U+U2+Q
 
-    if (params.implementationVersion == 0)
+    if (params.implementationVersion == 0 or params.implementationVersion == 1)
     {
 
       Qm_x = DataArray("Qm_x", isize, jsize, ksize, nbvar);
@@ -317,22 +354,39 @@ SolverMHDMuscl<dim>::SolverMHDMuscl(HydroParams & params, ConfigMap & configMap)
       QEdge_LT3 = DataArray("QEdge_LT3", isize, jsize, ksize, nbvar);
       QEdge_LB3 = DataArray("QEdge_LB3", isize, jsize, ksize, nbvar);
 
-      Fluxes_x = DataArray("Fluxes_x", isize, jsize, ksize, nbvar);
-      Fluxes_y = DataArray("Fluxes_y", isize, jsize, ksize, nbvar);
-      Fluxes_z = DataArray("Fluxes_z", isize, jsize, ksize, nbvar);
-
-      Emf = DataArrayVector3("Emf", isize, jsize, ksize);
-
-      ElecField = DataArrayVector3("ElecField", isize, jsize, ksize);
+      ElecField = DataArray("ElecField", isize, jsize, ksize, 3);
 
       DeltaA = DataArrayVector3("DeltaA", isize, jsize, ksize);
       DeltaB = DataArrayVector3("DeltaB", isize, jsize, ksize);
       DeltaC = DataArrayVector3("DeltaC", isize, jsize, ksize);
 
-      total_mem_size += isize * jsize * ksize * nbvar * sizeof(real_t) * 21 +
-                        isize * jsize * ksize * 3 * sizeof(real_t) * 5;
+      total_mem_size += isize * jsize * ksize * nbvar * sizeof(real_t) * 18 +
+                        isize * jsize * ksize * 3 * sizeof(real_t) * 4;
+    }
+    else if (params.implementationVersion == 2)
+    {
+      Q2 = DataArray("Q2", isize, jsize, ksize, nbvar);
+      sFaceMag = DataArray("sFaceMag", isize, jsize, ksize, 3);
+      Slopes_x = DataArray("Slope_x", isize, jsize, ksize, nbvar);
+      Slopes_y = DataArray("Slope_y", isize, jsize, ksize, nbvar);
+      Slopes_z = DataArray("Slope_z", isize, jsize, ksize, nbvar);
+      ElecField = DataArray("ElecField", isize, jsize, ksize, 3);
+
+      total_mem_size += isize * jsize * ksize * nbvar * sizeof(real_t) * (1 + 3) +
+                        isize * jsize * ksize * 3 * sizeof(real_t) * 2;
     }
 
+    if (params.implementationVersion == 0)
+    {
+      Fluxes_x = DataArray("Fluxes_x", isize, jsize, ksize, nbvar);
+      Fluxes_y = DataArray("Fluxes_y", isize, jsize, ksize, nbvar);
+      Fluxes_z = DataArray("Fluxes_z", isize, jsize, ksize, nbvar);
+
+      Emf = DataArrayVector3("Emf", isize, jsize, ksize);
+
+      total_mem_size += isize * jsize * ksize * nbvar * sizeof(real_t) * 3 +
+                        isize * jsize * ksize * 3 * sizeof(real_t) * 1;
+    }
   } // dim == 2 / 3
 
   // perform init condition
@@ -354,20 +408,16 @@ SolverMHDMuscl<dim>::SolverMHDMuscl(HydroParams & params, ConfigMap & configMap)
 
   if (myRank == 0)
   {
-    std::cout << "##########################"
-              << "\n";
+    std::cout << "##########################" << "\n";
     std::cout << "Solver is " << m_solver_name << "\n";
     std::cout << "Problem (init condition) is " << m_problem_name << "\n";
-    std::cout << "##########################"
-              << "\n";
+    std::cout << "##########################" << "\n";
 
     // print parameters on screen
     params.print();
-    std::cout << "##########################"
-              << "\n";
+    std::cout << "##########################" << "\n";
     std::cout << "Memory requested : " << (total_mem_size / 1e6) << " MBytes\n";
-    std::cout << "##########################"
-              << "\n";
+    std::cout << "##########################" << "\n";
   }
 
 } // SolverMHDMuscl::SolverMHDMuscl
@@ -733,7 +783,7 @@ SolverMHDMuscl<dim>::next_iteration_impl()
       save_solution();
 
     } // end output
-  }   // end enable output
+  } // end enable output
 
   // compute new dt
   timers[TIMER_DT]->start();
@@ -817,10 +867,15 @@ void
 SolverMHDMuscl<dim>::computeElectricField(DataArray Udata)
 {
 
-  // NA, 3D only
+  // 2d / 3d implementation are specialized
 
 } // SolverMHDMuscl<dim>::computeElectricField
 
+// 2d
+template <>
+void
+SolverMHDMuscl<2>::computeElectricField(DataArray Udata);
+
 // 3d
 template <>
 void
diff --git a/src/shared/HydroParams.cpp b/src/shared/HydroParams.cpp
index be158e6..e261dff 100644
--- a/src/shared/HydroParams.cpp
+++ b/src/shared/HydroParams.cpp
@@ -201,9 +201,9 @@ HydroParams::setup(ConfigMap & configMap)
   }
 
   implementationVersion = configMap.getFloat("OTHER", "implementationVersion", 0);
-  if (implementationVersion != 0 and implementationVersion != 1)
+  if (implementationVersion != 0 and implementationVersion != 1 and implementationVersion != 2)
   {
-    std::cout << "Implementation version is invalid (must be 0 or 1)\n";
+    std::cout << "Implementation version is invalid (must be 0, 1 or 2)\n";
     std::cout << "Use the default : 0\n";
     implementationVersion = 0;
   }
diff --git a/src/shared/mhd_utils.h b/src/shared/mhd_utils.h
index 08244ef..01e2432 100644
--- a/src/shared/mhd_utils.h
+++ b/src/shared/mhd_utils.h
@@ -17,6 +17,19 @@
 namespace ppkMHD
 {
 
+/**
+ * enum used ti identified one the four state arround a cell edge.
+ *
+ * This is useful when computing emf (electromotive forces)
+ */
+enum class MHDEdgeLocation : uint8_t
+{
+  LB,
+  RB,
+  LT,
+  RT
+};
+
 /**
  * max value out of 4
  */