From 86fd4bb03e2aecf7f5d4ad098ef381ed158f5901 Mon Sep 17 00:00:00 2001
From: "Wilkins, Emily (Counterpointe Solutions)"
 <80470879+ewilkins-csi@users.noreply.github.com>
Date: Tue, 19 Nov 2024 11:19:22 -0600
Subject: [PATCH] [#469] fix encryption ITs for changed MDA model

Part of the encryption IT scenarios were checking that the right number
of fields were retrieved by the `get_field_list` method.  I'm not sure
that this makes sense as an encryption test.  Either the encryption
method (which uses `get_field_list`) encrypts fields correctly, or it
doesn't. I don't see a ton of value in testing that method specifically
over the actual encryption tests which indirectly test `get_field_list`.

This changeset also splits off the vault scenarios into a separate
feature, as those are the only real "IT" scenarios.  We can't just tag
the scenarios directly because the feature-level tag is used to
determine whether we should stand up the Vault image, and we can't base
this logic at the scenario level because that would tear down and deploy
Vault between each scenario, which is wasteful since we aren't changing
the state of Vault between scenarios.
---
 .../features/pyspark_data_encryption.feature  | 36 +------------------
 .../pyspark_data_encryption_vault.feature     | 18 ++++++++++
 .../steps/pyspark_data_encryption_steps.py    | 34 ------------------
 3 files changed, 19 insertions(+), 69 deletions(-)
 create mode 100644 test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature

diff --git a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature
index c8a9a4efc..de167bf6c 100644
--- a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature
+++ b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature
@@ -1,23 +1,8 @@
-@pyspark_data_encryption @integration
+@pyspark_data_encryption
 Feature: Data encryption
     # Generated sample BDD specification/feature file - PLEASE ***DO*** MODIFY.
     # Originally generated from templates/behave.feature.vm.
 
-  Scenario: Get fields from native inbound collection with inbound record type (set([CustomData]))
-  Given a pipeline with native inbound collection and inbound record type
-  When field names are retrieved for the inbound record
-  Then a list of field names can be retrieved
-
-  Scenario: Get fields from native non-collection inbound without an inbound record type (DataFrame)
-  Given a pipeline with native non-collection inbound and no inbound record type
-  When field names are retrieved for the inbound record
-  Then a list of field names can be retrieved
-
-  Scenario: Get fields from native collection inbound without an inbound record type (set([DataFrame]))
-  Given a pipeline with native collection inbound and no inbound record type
-  When field names are retrieved for the set of inbound dataframes
-  Then each list of field names can be retrieved
-
   Scenario: Encrypt fields for native inbound with inbound record type (set([CustomData]))
   Given a pipeline with native inbound collection and inbound record type
   When encryption is called on the inbound record
@@ -37,22 +22,3 @@ Feature: Data encryption
   Given a pipeline with native inbound collection and inbound record type
   When AES encryption is requested
   Then the correct AES algorithm is applied to the data set
-
-  @integration
-  Scenario: Specifying the Vault encryption algorithm yields a custom field encrypted with the corresponding algorithm
-  Given a pipeline with native inbound collection and inbound record type
-  When Vault encryption is requested
-  Then the correct Vault algorithm is applied to the data set
-
-  @integration
-  Scenario: Specifying the Vault encryption algorithm yields a dataframe field encrypted with the corresponding algorithm
-  Given a pipeline with native non-collection inbound and no inbound record type
-  When Vault encryption is requested
-  Then the correct Vault algorithm is applied to the dataframe
-
-  @integration
-  Scenario: Encrypt fields for native collection inbound without an inbound record type (set([DataFrame]))
-  Given a pipeline with native collection inbound and no inbound record type
-  When Vault encryption is requested
-  Then the correct dataframe fields are vault encrypted for each dataframe
-
diff --git a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature
new file mode 100644
index 000000000..1fe5761c8
--- /dev/null
+++ b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature
@@ -0,0 +1,18 @@
+@pyspark_data_encryption @integration
+Feature: Data encryption
+
+  Scenario: Specifying the Vault encryption algorithm yields a custom field encrypted with the corresponding algorithm
+  Given a pipeline with native inbound collection and inbound record type
+  When Vault encryption is requested
+  Then the correct Vault algorithm is applied to the data set
+
+  Scenario: Specifying the Vault encryption algorithm yields a dataframe field encrypted with the corresponding algorithm
+  Given a pipeline with native non-collection inbound and no inbound record type
+  When Vault encryption is requested
+  Then the correct Vault algorithm is applied to the dataframe
+
+  Scenario: Encrypt fields for native collection inbound without an inbound record type (set([DataFrame]))
+  Given a pipeline with native collection inbound and no inbound record type
+  When Vault encryption is requested
+  Then the correct dataframe fields are vault encrypted for each dataframe
+
diff --git a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py
index 0c15e281b..19165de64 100644
--- a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py
+++ b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py
@@ -16,12 +16,7 @@
 """
 
 from behave import given, when, then  # pylint: disable=no-name-in-module
-from os import path, walk
 import nose.tools as nt
-import ast
-import sys
-import os
-from pathlib import Path
 from aissemble_test_data_delivery_pyspark_model.step.native_inbound_with_custom_types import (
     NativeInboundWithCustomTypes,
 )
@@ -31,12 +26,8 @@
 from aissemble_test_data_delivery_pyspark_model.step.native_inbound_with_custom_collection_type import (
     NativeInboundWithCustomCollectionType,
 )
-from aissemble_test_data_delivery_pyspark_model.step.native_inbound_with_custom_data_type_async import (
-    NativeInboundWithCustomDataTypeAsync,
-)
 from aissemble_test_data_delivery_pyspark_model.record.custom_data import CustomData
 from krausening.logging import LogManager
-from pyspark.sql.types import StructType, StructField, StringType
 
 logger = LogManager.get_instance().get_logger("DataEncryptionTest")
 
@@ -92,21 +83,6 @@ def step_impl(context):
     )
 
 
-@when("field names are retrieved for the inbound record")
-def step_impl(context):
-    context.input_fields = context.pipeline.get_fields_list(context.inbound)
-    logger.info(context.input_fields)
-    nt.eq_(len(context.input_fields), 1, "Wrong number of input fields")
-
-
-@when("field names are retrieved for the set of inbound dataframes")
-def step_impl(context):
-    for df in context.inbound:
-        context.input_fields = context.pipeline.get_fields_list(df)
-        logger.info(context.input_fields)
-        nt.eq_(len(context.input_fields), 1, "Wrong number of input fields")
-
-
 @when("encryption is called on the inbound record")
 def step_impl(context):
     context.encrypted_dataset = context.pipeline.apply_encryption_to_dataset(
@@ -125,16 +101,6 @@ def step_impl(context):
     logger.info(context.encrypted_dataset)
 
 
-@then("a list of field names can be retrieved")
-def step_impl(context):
-    nt.eq_(
-        context.input_fields[0],
-        "custom_field",
-        'Input field was not "custom_field".  Instead it was '
-        + context.input_fields[0],
-    )
-
-
 @then("the correct fields are encrypted")
 def step_impl(context):
     for record in context.encrypted_dataset: