diff --git a/.gitignore b/.gitignore
index b1c8ed26e9..ebbbfaebeb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ _local
 /src/__init__.py
 /tests/__init__.py
 .gt_cache/
+.gt4py_cache/
 .gt_cache_pytest*/
 
 # DaCe
diff --git a/src/gt4py/cartesian/gtc/common.py b/src/gt4py/cartesian/gtc/common.py
index ef38a9a658..60236a3e97 100644
--- a/src/gt4py/cartesian/gtc/common.py
+++ b/src/gt4py/cartesian/gtc/common.py
@@ -118,7 +118,7 @@ def isbool(self):
         return self == self.BOOL
 
     def isinteger(self):
-        return self in (self.INT8, self.INT32, self.INT64)
+        return self in (self.INT8, self.INT16, self.INT32, self.INT64)
 
     def isfloat(self):
         return self in (self.FLOAT32, self.FLOAT64)
diff --git a/src/gt4py/cartesian/gtc/dace/daceir.py b/src/gt4py/cartesian/gtc/dace/daceir.py
index 492a9598c5..43a33fdd6d 100644
--- a/src/gt4py/cartesian/gtc/dace/daceir.py
+++ b/src/gt4py/cartesian/gtc/dace/daceir.py
@@ -734,7 +734,13 @@ class ScalarAccess(common.ScalarAccess, Expr):
 
 
 class VariableKOffset(common.VariableKOffset[Expr]):
-    pass
+    @datamodels.validator("k")
+    def no_casts_in_offset_expression(self, _: datamodels.Attribute, expression: Expr) -> None:
+        for part in expression.walk_values():
+            if isinstance(part, Cast):
+                raise ValueError(
+                    "DaCe backends are currently missing support for casts in variable k offsets. See issue https://github.com/GridTools/gt4py/issues/1881."
+                )
 
 
 class IndexAccess(common.FieldAccess, Expr):
diff --git a/src/gt4py/cartesian/gtc/dace/expansion/tasklet_codegen.py b/src/gt4py/cartesian/gtc/dace/expansion/tasklet_codegen.py
index 8033c64710..2948b9d76d 100644
--- a/src/gt4py/cartesian/gtc/dace/expansion/tasklet_codegen.py
+++ b/src/gt4py/cartesian/gtc/dace/expansion/tasklet_codegen.py
@@ -44,7 +44,9 @@ def _visit_offset(
             else:
                 int_sizes.append(None)
         sym_offsets = [
-            dace.symbolic.pystr_to_symbolic(self.visit(off, **kwargs))
+            dace.symbolic.pystr_to_symbolic(
+                self.visit(off, access_info=access_info, decl=decl, **kwargs)
+            )
             for off in (node.to_dict()["i"], node.to_dict()["j"], node.k)
         ]
         for axis in access_info.variable_offset_axes:
diff --git a/src/gt4py/next/program_processors/runners/dace/gtir_dataflow.py b/src/gt4py/next/program_processors/runners/dace/gtir_dataflow.py
index e6f33208e3..43e7c6354d 100644
--- a/src/gt4py/next/program_processors/runners/dace/gtir_dataflow.py
+++ b/src/gt4py/next/program_processors/runners/dace/gtir_dataflow.py
@@ -232,15 +232,36 @@ def connect(
         dest: dace.nodes.AccessNode,
         subset: dace_subsets.Range,
     ) -> None:
-        # retrieve the node which writes the result
-        last_node = self.state.in_edges(self.result.dc_node)[0].src
-        if isinstance(last_node, dace.nodes.Tasklet):
-            # the last transient node can be deleted
-            # Note that it could also be applied when `last_node` is a NestedSDFG,
-            # but an exception would be when the inner write to global data is a
-            # WCR memlet, because that prevents fusion of the outer map. This case
-            # happens for the reduce with skip values, which uses a map with WCR.
-            last_node_connector = self.state.in_edges(self.result.dc_node)[0].src_conn
+        write_edge = self.state.in_edges(self.result.dc_node)[0]
+        write_size = write_edge.data.dst_subset.num_elements()
+        # check the kind of node which writes the result
+        if isinstance(write_edge.src, dace.nodes.Tasklet):
+            # The temporary data written by a tasklet can be safely deleted
+            assert write_size.is_constant()
+            remove_last_node = True
+        elif isinstance(write_edge.src, dace.nodes.NestedSDFG):
+            if write_size.is_constant():
+                # Temporary data with compile-time size is allocated on the stack
+                # and therefore is safe to keep. We decide to keep it as a workaround
+                # for a dace issue with memlet propagation in combination with
+                # nested SDFGs containing conditional blocks. The output memlet
+                # of such blocks will be marked as dynamic because dace is not able
+                # to detect the exact size of a conditional branch dataflow, even
+                # in case of if-else expressions with exact same output data.
+                remove_last_node = False
+            else:
+                # In case the output data has runtime size it is necessary to remove
+                # it in order to avoid dynamic memory allocation inside a parallel
+                # map scope. Otherwise, the memory allocation will for sure lead
+                # to performance degradation, and eventually illegal memory issues
+                # when the gpu runs out of local memory.
+                remove_last_node = True
+        else:
+            remove_last_node = False
+
+        if remove_last_node:
+            last_node = write_edge.src
+            last_node_connector = write_edge.src_conn
             self.state.remove_node(self.result.dc_node)
         else:
             last_node = self.result.dc_node
diff --git a/tests/cartesian_tests/definitions.py b/tests/cartesian_tests/definitions.py
index 7499ad4a95..4d52b9b773 100644
--- a/tests/cartesian_tests/definitions.py
+++ b/tests/cartesian_tests/definitions.py
@@ -51,6 +51,12 @@ def _get_backends_with_storage_info(storage_info_kind: str):
 _PERFORMANCE_BACKEND_NAMES = [name for name in _ALL_BACKEND_NAMES if name not in ("numpy", "cuda")]
 PERFORMANCE_BACKENDS = [_backend_name_as_param(name) for name in _PERFORMANCE_BACKEND_NAMES]
 
+DACE_BACKENDS = [
+    _backend_name_as_param(name)
+    for name in filter(lambda name: name.startswith("dace:"), _ALL_BACKEND_NAMES)
+]
+NON_DACE_BACKENDS = [backend for backend in ALL_BACKENDS if backend not in DACE_BACKENDS]
+
 
 @pytest.fixture()
 def id_version():
diff --git a/tests/cartesian_tests/integration_tests/multi_feature_tests/test_code_generation.py b/tests/cartesian_tests/integration_tests/multi_feature_tests/test_code_generation.py
index 8ace0de740..8e5f3466d0 100644
--- a/tests/cartesian_tests/integration_tests/multi_feature_tests/test_code_generation.py
+++ b/tests/cartesian_tests/integration_tests/multi_feature_tests/test_code_generation.py
@@ -27,7 +27,13 @@
 )
 from gt4py.storage.cartesian import utils as storage_utils
 
-from cartesian_tests.definitions import ALL_BACKENDS, CPU_BACKENDS, get_array_library
+from cartesian_tests.definitions import (
+    ALL_BACKENDS,
+    CPU_BACKENDS,
+    DACE_BACKENDS,
+    NON_DACE_BACKENDS,
+    get_array_library,
+)
 from cartesian_tests.integration_tests.multi_feature_tests.stencil_definitions import (
     EXTERNALS_REGISTRY as externals_registry,
     REGISTRY as stencil_definitions,
@@ -762,3 +768,89 @@ def test(
     out_arr = gt_storage.ones(backend=backend, shape=domain, dtype=np.float64)
     test(in_arr, out_arr)
     assert (out_arr[:, :, :] == 388.0).all()
+
+
+@pytest.mark.parametrize("backend", NON_DACE_BACKENDS)
+def test_cast_in_index(backend):
+    @gtscript.stencil(backend)
+    def cast_in_index(
+        in_field: Field[np.float64], i32: np.int32, i64: np.int64, out_field: Field[np.float64]
+    ):
+        """Simple copy stencil with forced cast in index calculation."""
+        with computation(PARALLEL), interval(...):
+            out_field = in_field[0, 0, i32 - i64]
+
+
+@pytest.mark.parametrize("backend", DACE_BACKENDS)
+@pytest.mark.xfail(raises=ValueError)
+def test_dace_no_cast_in_index(backend):
+    @gtscript.stencil(backend)
+    def cast_in_index(
+        in_field: Field[np.float64], i32: np.int32, i64: np.int64, out_field: Field[np.float64]
+    ):
+        """Simple copy stencil with forced cast in index calculation."""
+        with computation(PARALLEL), interval(...):
+            out_field = in_field[0, 0, i32 - i64]
+
+
+@pytest.mark.parametrize("backend", ALL_BACKENDS)
+def test_read_after_write_stencil(backend):
+    """Stencil with multiple read after write access patterns."""
+
+    @gtscript.stencil(backend=backend)
+    def lagrangian_contributions(
+        q: Field[np.float64],
+        pe1: Field[np.float64],
+        pe2: Field[np.float64],
+        q4_1: Field[np.float64],
+        q4_2: Field[np.float64],
+        q4_3: Field[np.float64],
+        q4_4: Field[np.float64],
+        dp1: Field[np.float64],
+        lev: gtscript.Field[gtscript.IJ, np.int64],
+    ):
+        """
+        Args:
+            q (out):
+            pe1 (in):
+            pe2 (in):
+            q4_1 (in):
+            q4_2 (in):
+            q4_3 (in):
+            q4_4 (in):
+            dp1 (in):
+            lev (inout):
+        """
+        with computation(FORWARD), interval(...):
+            pl = (pe2 - pe1[0, 0, lev]) / dp1[0, 0, lev]
+            if pe2[0, 0, 1] <= pe1[0, 0, lev + 1]:
+                pr = (pe2[0, 0, 1] - pe1[0, 0, lev]) / dp1[0, 0, lev]
+                q = (
+                    q4_2[0, 0, lev]
+                    + 0.5 * (q4_4[0, 0, lev] + q4_3[0, 0, lev] - q4_2[0, 0, lev]) * (pr + pl)
+                    - q4_4[0, 0, lev] * 1.0 / 3.0 * (pr * (pr + pl) + pl * pl)
+                )
+            else:
+                qsum = (pe1[0, 0, lev + 1] - pe2) * (
+                    q4_2[0, 0, lev]
+                    + 0.5 * (q4_4[0, 0, lev] + q4_3[0, 0, lev] - q4_2[0, 0, lev]) * (1.0 + pl)
+                    - q4_4[0, 0, lev] * 1.0 / 3.0 * (1.0 + pl * (1.0 + pl))
+                )
+                lev = lev + 1
+                while pe1[0, 0, lev + 1] < pe2[0, 0, 1]:
+                    qsum += dp1[0, 0, lev] * q4_1[0, 0, lev]
+                    lev = lev + 1
+                dp = pe2[0, 0, 1] - pe1[0, 0, lev]
+                esl = dp / dp1[0, 0, lev]
+                qsum += dp * (
+                    q4_2[0, 0, lev]
+                    + 0.5
+                    * esl
+                    * (
+                        q4_3[0, 0, lev]
+                        - q4_2[0, 0, lev]
+                        + q4_4[0, 0, lev] * (1.0 - (2.0 / 3.0) * esl)
+                    )
+                )
+                q = qsum / (pe2[0, 0, 1] - pe2)
+            lev = lev - 1
diff --git a/tests/cartesian_tests/unit_tests/test_gtc/test_common.py b/tests/cartesian_tests/unit_tests/test_gtc/test_common.py
index 68006c113b..4e799d2090 100644
--- a/tests/cartesian_tests/unit_tests/test_gtc/test_common.py
+++ b/tests/cartesian_tests/unit_tests/test_gtc/test_common.py
@@ -41,6 +41,24 @@
 # - For testing non-leave nodes, introduce builders with defaults (for leave nodes as well)
 
 
+def test_data_type_methods():
+    for type in DataType:
+        if type == DataType.BOOL:
+            assert type.isbool()
+        else:
+            assert not type.isbool()
+
+        if type in (DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64):
+            assert type.isinteger()
+        else:
+            assert not type.isinteger()
+
+        if type in (DataType.FLOAT32, DataType.FLOAT64):
+            assert type.isfloat()
+        else:
+            assert not type.isfloat()
+
+
 class DummyExpr(Expr):
     """Fake expression for cases where a concrete expression is not needed."""
 
diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py
index 8ebb240339..030aa9b131 100644
--- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py
+++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py
@@ -1846,7 +1846,7 @@ def test_gtir_let_lambda_with_tuple1():
 
     sdfg = build_dace_sdfg(testee, CARTESIAN_OFFSETS)
 
-    z_fields = (np.empty_like(a), np.empty_like(a))
+    z_fields = (np.zeros_like(a), np.zeros_like(a))
     a_ref = np.concatenate((z_fields[0][:1], a[1 : N - 1], z_fields[0][N - 1 :]))
     b_ref = np.concatenate((z_fields[1][:1], b[1 : N - 1], z_fields[1][N - 1 :]))
 
@@ -2037,7 +2037,7 @@ def test_gtir_index():
         ],
     )
 
-    v = np.empty(N, dtype=np.int32)
+    v = np.zeros(N, dtype=np.int32)
 
     # we need to run domain inference in order to add the domain annex information to the index node.
     testee = infer_domain.infer_program(testee, offset_provider=CARTESIAN_OFFSETS)