From 86fd4bb03e2aecf7f5d4ad098ef381ed158f5901 Mon Sep 17 00:00:00 2001 From: "Wilkins, Emily (Counterpointe Solutions)" <80470879+ewilkins-csi@users.noreply.github.com> Date: Tue, 19 Nov 2024 11:19:22 -0600 Subject: [PATCH] [#469] fix encryption ITs for changed MDA model Part of the encryption IT scenarios were checking that the right number of fields were retrieved by the `get_field_list` method. I'm not sure that this makes sense as an encryption test. Either the encryption method (which uses `get_field_list`) encrypts fields correctly, or it doesn't. I don't see a ton of value in testing that method specifically over the actual encryption tests which indirectly test `get_field_list`. This changeset also splits off the vault scenarios into a separate feature, as those are the only real "IT" scenarios. We can't just tag the scenarios directly because the feature-level tag is used to determine whether we should stand up the Vault image, and we can't base this logic at the scenario level because that would tear down and deploy Vault between each scenario, which is wasteful since we aren't changing the state of Vault between scenarios. --- .../features/pyspark_data_encryption.feature | 36 +------------------ .../pyspark_data_encryption_vault.feature | 18 ++++++++++ .../steps/pyspark_data_encryption_steps.py | 34 ------------------ 3 files changed, 19 insertions(+), 69 deletions(-) create mode 100644 test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature diff --git a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature index c8a9a4efc..de167bf6c 100644 --- a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature +++ b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption.feature @@ -1,23 +1,8 @@ -@pyspark_data_encryption @integration +@pyspark_data_encryption Feature: Data encryption # Generated sample BDD specification/feature file - PLEASE ***DO*** MODIFY. # Originally generated from templates/behave.feature.vm. - Scenario: Get fields from native inbound collection with inbound record type (set([CustomData])) - Given a pipeline with native inbound collection and inbound record type - When field names are retrieved for the inbound record - Then a list of field names can be retrieved - - Scenario: Get fields from native non-collection inbound without an inbound record type (DataFrame) - Given a pipeline with native non-collection inbound and no inbound record type - When field names are retrieved for the inbound record - Then a list of field names can be retrieved - - Scenario: Get fields from native collection inbound without an inbound record type (set([DataFrame])) - Given a pipeline with native collection inbound and no inbound record type - When field names are retrieved for the set of inbound dataframes - Then each list of field names can be retrieved - Scenario: Encrypt fields for native inbound with inbound record type (set([CustomData])) Given a pipeline with native inbound collection and inbound record type When encryption is called on the inbound record @@ -37,22 +22,3 @@ Feature: Data encryption Given a pipeline with native inbound collection and inbound record type When AES encryption is requested Then the correct AES algorithm is applied to the data set - - @integration - Scenario: Specifying the Vault encryption algorithm yields a custom field encrypted with the corresponding algorithm - Given a pipeline with native inbound collection and inbound record type - When Vault encryption is requested - Then the correct Vault algorithm is applied to the data set - - @integration - Scenario: Specifying the Vault encryption algorithm yields a dataframe field encrypted with the corresponding algorithm - Given a pipeline with native non-collection inbound and no inbound record type - When Vault encryption is requested - Then the correct Vault algorithm is applied to the dataframe - - @integration - Scenario: Encrypt fields for native collection inbound without an inbound record type (set([DataFrame])) - Given a pipeline with native collection inbound and no inbound record type - When Vault encryption is requested - Then the correct dataframe fields are vault encrypted for each dataframe - diff --git a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature new file mode 100644 index 000000000..1fe5761c8 --- /dev/null +++ b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/pyspark_data_encryption_vault.feature @@ -0,0 +1,18 @@ +@pyspark_data_encryption @integration +Feature: Data encryption + + Scenario: Specifying the Vault encryption algorithm yields a custom field encrypted with the corresponding algorithm + Given a pipeline with native inbound collection and inbound record type + When Vault encryption is requested + Then the correct Vault algorithm is applied to the data set + + Scenario: Specifying the Vault encryption algorithm yields a dataframe field encrypted with the corresponding algorithm + Given a pipeline with native non-collection inbound and no inbound record type + When Vault encryption is requested + Then the correct Vault algorithm is applied to the dataframe + + Scenario: Encrypt fields for native collection inbound without an inbound record type (set([DataFrame])) + Given a pipeline with native collection inbound and no inbound record type + When Vault encryption is requested + Then the correct dataframe fields are vault encrypted for each dataframe + diff --git a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py index 0c15e281b..19165de64 100644 --- a/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py +++ b/test/test-mda-models/aissemble-test-data-delivery-pyspark-model/tests/features/steps/pyspark_data_encryption_steps.py @@ -16,12 +16,7 @@ """ from behave import given, when, then # pylint: disable=no-name-in-module -from os import path, walk import nose.tools as nt -import ast -import sys -import os -from pathlib import Path from aissemble_test_data_delivery_pyspark_model.step.native_inbound_with_custom_types import ( NativeInboundWithCustomTypes, ) @@ -31,12 +26,8 @@ from aissemble_test_data_delivery_pyspark_model.step.native_inbound_with_custom_collection_type import ( NativeInboundWithCustomCollectionType, ) -from aissemble_test_data_delivery_pyspark_model.step.native_inbound_with_custom_data_type_async import ( - NativeInboundWithCustomDataTypeAsync, -) from aissemble_test_data_delivery_pyspark_model.record.custom_data import CustomData from krausening.logging import LogManager -from pyspark.sql.types import StructType, StructField, StringType logger = LogManager.get_instance().get_logger("DataEncryptionTest") @@ -92,21 +83,6 @@ def step_impl(context): ) -@when("field names are retrieved for the inbound record") -def step_impl(context): - context.input_fields = context.pipeline.get_fields_list(context.inbound) - logger.info(context.input_fields) - nt.eq_(len(context.input_fields), 1, "Wrong number of input fields") - - -@when("field names are retrieved for the set of inbound dataframes") -def step_impl(context): - for df in context.inbound: - context.input_fields = context.pipeline.get_fields_list(df) - logger.info(context.input_fields) - nt.eq_(len(context.input_fields), 1, "Wrong number of input fields") - - @when("encryption is called on the inbound record") def step_impl(context): context.encrypted_dataset = context.pipeline.apply_encryption_to_dataset( @@ -125,16 +101,6 @@ def step_impl(context): logger.info(context.encrypted_dataset) -@then("a list of field names can be retrieved") -def step_impl(context): - nt.eq_( - context.input_fields[0], - "custom_field", - 'Input field was not "custom_field". Instead it was ' - + context.input_fields[0], - ) - - @then("the correct fields are encrypted") def step_impl(context): for record in context.encrypted_dataset: