moj-analytical-services · RobinL · Nov 26, 2024 · Nov 26, 2024
diff --git a/splink/internals/connected_components.py b/splink/internals/connected_components.py
@@ -438,7 +438,7 @@ def solve_connected_components(
             SELECT representative FROM __splink__representatives_stable_{iteration}
         )
         """
-        pipeline.enqueue_sql(sql, "__splink__representatives_unstable")
+        pipeline.enqueue_sql(sql, f"__splink__representatives_unstable_{iteration}")
         prev_representatives_thinned = db_api.sql_pipeline_to_splink_dataframe(pipeline)
 
         # 1a. Thin neighbours table - we can drop all rows that refer to

diff --git a/tests/helpers.py b/tests/helpers.py
@@ -73,8 +73,8 @@ def DatabaseAPI(self):
     def db_api_args(self):
         return {
             "spark_session": self.spark,
-            "num_partitions_on_repartition": 1,
-            "break_lineage_method": "checkpoint",
+            "num_partitions_on_repartition": 2,
+            "break_lineage_method": "parquet",
         }
 
     def convert_frame(self, df):