Skip to content

Commit

Permalink
address design review comments on method naming conventions
Browse files Browse the repository at this point in the history
  • Loading branch information
sujaypatil96 committed Jan 22, 2025
1 parent d9902df commit f91d3d3
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 21 deletions.
26 changes: 17 additions & 9 deletions nmdc_runtime/site/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from nmdc_runtime.site.ops import (
build_merged_db,
missing_gold_biosample_repair,
generate_biosample_set_for_nmdc_study_from_gold,
nmdc_schema_database_export_filename,
nmdc_schema_database_from_gold_study,
nmdc_schema_object_to_dict,
Expand Down Expand Up @@ -59,8 +59,8 @@
ncbi_submission_xml_from_nmdc_study,
ncbi_submission_xml_asset,
get_database_updater_inputs,
nmdc_study_id_filename,
missing_data_generation_repair,
post_submission_portal_biosample_ingest_record_stitching_filename,
generate_data_generation_set_post_biosample_ingest,
)
from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id

Expand Down Expand Up @@ -474,25 +474,33 @@ def nmdc_study_to_ncbi_submission_export():


@graph
def fill_missing_data_generation_data_object_records():
def generate_data_generation_set_for_biosamples_in_nmdc_study():
(study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)

database = missing_data_generation_repair(study_id, gold_nmdc_instrument_map_df)
database = generate_data_generation_set_post_biosample_ingest(
study_id, gold_nmdc_instrument_map_df
)

database_dict = nmdc_schema_object_to_dict(database)
filename = nmdc_study_id_filename(study_id)
filename = post_submission_portal_biosample_ingest_record_stitching_filename(
study_id
)
outputs = export_json_to_drs(database_dict, filename)
add_output_run_event(outputs)


@graph
def fill_missing_biosample_records_from_gold():
def generate_biosample_set_from_samples_in_gold():
(study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)

database = missing_gold_biosample_repair(study_id, gold_nmdc_instrument_map_df)
database = generate_biosample_set_for_nmdc_study_from_gold(
study_id, gold_nmdc_instrument_map_df
)
database_dict = nmdc_schema_object_to_dict(database)
filename = nmdc_study_id_filename(study_id)
filename = post_submission_portal_biosample_ingest_record_stitching_filename(
study_id
)
outputs = export_json_to_drs(database_dict, filename)
add_output_run_event(outputs)
12 changes: 8 additions & 4 deletions nmdc_runtime/site/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1249,7 +1249,9 @@ def ncbi_submission_xml_from_nmdc_study(


@op
def nmdc_study_id_filename(nmdc_study_id: str) -> str:
def post_submission_portal_biosample_ingest_record_stitching_filename(
nmdc_study_id: str,
) -> str:
filename = nmdc_study_id_to_filename(nmdc_study_id)
return f"missing_database_records_for_{filename}.json"

Expand Down Expand Up @@ -1278,7 +1280,7 @@ def get_database_updater_inputs(context: OpExecutionContext) -> Tuple[str, str]:
"gold_api_client",
}
)
def missing_data_generation_repair(
def generate_data_generation_set_post_biosample_ingest(
context: OpExecutionContext,
nmdc_study_id: str,
gold_nmdc_instrument_map_df: pd.DataFrame,
Expand All @@ -1298,7 +1300,9 @@ def missing_data_generation_repair(
nmdc_study_id,
gold_nmdc_instrument_map_df,
)
database = database_updater.create_missing_dg_records()
database = (
database_updater.generate_data_generation_set_records_from_gold_api_for_study()
)

return database

Expand All @@ -1310,7 +1314,7 @@ def missing_data_generation_repair(
"gold_api_client",
}
)
def missing_gold_biosample_repair(
def generate_biosample_set_for_nmdc_study_from_gold(
context: OpExecutionContext,
nmdc_study_id: str,
gold_nmdc_instrument_map_df: pd.DataFrame,
Expand Down
4 changes: 3 additions & 1 deletion nmdc_runtime/site/repair/database_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def _fetch_gold_projects(self, gold_biosample_id: str):
"""
return self.gold_api_client.fetch_projects_by_biosample(gold_biosample_id)

def create_missing_dg_records(self) -> nmdc.Database:
def generate_data_generation_set_records_from_gold_api_for_study(
self,
) -> nmdc.Database:
"""This method creates missing data generation records for a given study in the NMDC database using
metadata from GOLD. The way the logic works is, it first fetches all the biosamples associated
with the study from the NMDC database. Then, it fetches all the biosample and project data data
Expand Down
12 changes: 7 additions & 5 deletions nmdc_runtime/site/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from nmdc_runtime.api.models.trigger import Trigger
from nmdc_runtime.site.export.study_metadata import export_study_biosamples_metadata
from nmdc_runtime.site.graphs import (
fill_missing_biosample_records_from_gold,
generate_biosample_set_from_samples_in_gold,
translate_metadata_submission_to_nmdc_schema_database,
ingest_metadata_submission,
gold_study_to_database,
Expand All @@ -45,7 +45,7 @@
ingest_neon_surface_water_metadata,
ensure_alldocs,
nmdc_study_to_ncbi_submission_export,
fill_missing_data_generation_data_object_records,
generate_data_generation_set_for_biosamples_in_nmdc_study,
)
from nmdc_runtime.site.resources import (
get_mongo,
Expand Down Expand Up @@ -925,10 +925,11 @@ def biosample_export():


@repository
def database_record_repair():
def database_records_stitching():
normal_resources = run_config_frozen__normal_env["resources"]
return [
fill_missing_data_generation_data_object_records.to_job(
generate_data_generation_set_for_biosamples_in_nmdc_study.to_job(
description="This job can be used to create a data_generation_set JSON for biosamples that are already present in the NMDC database.",
resource_defs=resource_defs,
config={
"resources": merge(
Expand Down Expand Up @@ -969,7 +970,8 @@ def database_record_repair():
},
},
),
fill_missing_biosample_records_from_gold.to_job(
generate_biosample_set_from_samples_in_gold.to_job(
description="This job can be used to create a biosample_set JSON from samples in GOLD for a given study in NMDC.",
resource_defs=resource_defs,
config={
"resources": merge(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_data/test_database_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def db_updater(mock_clients):


@patch("nmdc_runtime.site.repair.database_updater.GoldStudyTranslator")
def test_create_missing_dg_records(
def test_generate_data_generation_set_records_from_gold_api_for_study(
MockGoldStudyTranslator, db_updater, mock_clients, test_minter
):
mock_runtime_api_user_client = mock_clients["runtime_api_user_client"]
Expand Down Expand Up @@ -95,7 +95,7 @@ def test_create_missing_dg_records(
mint_id_mock.json.return_value = test_minter("nmdc:NucleotideSequencing", 1)
mock_runtime_api_site_client.mint_id.return_value = mint_id_mock

database = db_updater.create_missing_dg_records()
database = db_updater.generate_data_generation_set_records_from_gold_api_for_study()

assert database is not None
assert len(database.data_generation_set) > 0
Expand Down

0 comments on commit f91d3d3

Please sign in to comment.