Skip to content

Commit

Permalink
Updated run_pipeline script to latest pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
mbernste committed Mar 9, 2021
1 parent cf55653 commit f260f10
Showing 1 changed file with 53 additions and 38 deletions.
91 changes: 53 additions & 38 deletions run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
import map_sra_to_ontology
from map_sra_to_ontology import ontology_graph
from map_sra_to_ontology import load_ontology
from map_sra_to_ontology import predict_sample_type
from map_sra_to_ontology import config
from map_sra_to_ontology import predict_sample_type
from map_sra_to_ontology import run_sample_type_predictor
import predict_sample_type
from predict_sample_type import run_sample_type_predictor
from predict_sample_type.learn_classifier import *
from map_sra_to_ontology import pipeline_components as pc

Expand All @@ -44,7 +43,7 @@ def main():
"EFO":"16",
"CVCL":"4"}
ont_id_to_og = {x:load_ontology.load(x)[0] for x in ont_name_to_ont_id.values()}
pipeline = p_48()
pipeline = p_53()

all_mappings = []
for tag_to_val in tag_to_vals:
Expand Down Expand Up @@ -114,36 +113,50 @@ def run_pipeline_on_key_vals(tag_to_val, ont_id_to_og, mapping_data):
# }
# return mappings

def p_48():
spec_lex = pc.SpecialistLexicon(config.specialist_lex_location())
inflec_var = pc.SPECIALISTLexInflectionalVariants(spec_lex)
spell_var = pc.SPECIALISTSpellingVariants(spec_lex)
key_val_filt = pc.KeyValueFilter_Stage()
init_tokens_stage = pc.InitKeyValueTokens_Stage()
ngram = pc.NGram_Stage()
lower_stage = pc.Lowercase_Stage()
man_at_syn = pc.ManuallyAnnotatedSynonyms_Stage()
infer_cell_line = pc.InferCellLineTerms_Stage()
prop_spec_syn = pc.PropertySpecificSynonym_Stage()
infer_dev_stage = pc.ImpliedDevelopmentalStageFromAge_Stage()
linked_super = pc.LinkedTermsOfSuperterms_Stage()
cell_culture = pc.ConsequentCulturedCell_Stage()
filt_match_priority = pc.FilterOntologyMatchesByPriority_Stage()
real_val = pc.ExtractRealValue_Stage()
match_cust_targs = pc.ExactMatchCustomTargets_Stage()
cust_conseq = pc.CustomConsequentTerms_Stage()
delimit_plus = pc.Delimit_Stage('+')
delimit_underscore = pc.Delimit_Stage('_')
delimit_dash = pc.Delimit_Stage('-')
delimit_slash = pc.Delimit_Stage('/')
block_cell_line_key = pc.BlockCellLineNonCellLineKey_Stage()
subphrase_linked = pc.RemoveSubIntervalOfMatchedBlockAncestralLink_Stage()
cellline_to_implied_disease = pc.CellLineToImpliedDisease_Stage()
acr_to_expan = pc.AcronymToExpansion_Stage()
exact_match = pc.ExactStringMatching_Stage(["1", "2", "4", "5", "7", "8", "9"], query_len_thresh=3)
fuzzy_match = pc.FuzzyStringMatching_Stage(0.1, query_len_thresh=3)
two_char_match = pc.TwoCharMappings_Stage()
time_unit = pc.ParseTimeWithUnit_Stage()

def p_53():
spec_lex = SpecialistLexicon(config.specialist_lex_location())
inflec_var = SPECIALISTLexInflectionalVariants(spec_lex)
spell_var = SPECIALISTSpellingVariants(spec_lex)
key_val_filt = KeyValueFilter_Stage()
init_tokens_stage = InitKeyValueTokens_Stage()
ngram = NGram_Stage()
lower_stage = Lowercase_Stage()
man_at_syn = ManuallyAnnotatedSynonyms_Stage()
infer_cell_line = InferCellLineTerms_Stage()
prop_spec_syn = PropertySpecificSynonym_Stage()
infer_dev_stage = ImpliedDevelopmentalStageFromAge_Stage()
linked_super = LinkedTermsOfSuperterms_Stage()
cell_culture = ConsequentCulturedCell_Stage()
filt_match_priority = FilterOntologyMatchesByPriority_Stage()
real_val = ExtractRealValue_Stage()
match_cust_targs = ExactMatchCustomTargets_Stage()
cust_conseq = CustomConsequentTerms_Stage()
delimit_plus = Delimit_Stage('+')
delimit_underscore = Delimit_Stage('_')
delimit_dash = Delimit_Stage('-')
delimit_slash = Delimit_Stage('/')
block_cell_line_key = BlockCellLineNonCellLineKey_Stage()
subphrase_linked = RemoveSubIntervalOfMatchedBlockAncestralLink_Stage()
cellline_to_implied_disease = CellLineToImpliedDisease_Stage()
acr_to_expan = AcronymToExpansion_Stage()
exact_match = ExactStringMatching_Stage(
[
"1",
"2",
"5",
"7",
"8",
"9",
"18" # Cellosaurus restricted to human cell lines
],
query_len_thresh=3
)
fuzzy_match = FuzzyStringMatching_Stage(0.1, query_len_thresh=3)
two_char_match = TwoCharMappings_Stage()
time_unit = ParseTimeWithUnit_Stage()
prioritize_exact = PrioritizeExactMatchOverFuzzyMatch()
artifact_term_combo = TermArtifactCombinations_Stage()

stages = [
key_val_filt,
Expand All @@ -158,8 +171,8 @@ def p_48():
spell_var,
man_at_syn,
acr_to_expan,
time_unit,
exact_match,
time_unit,
two_char_match,
prop_spec_syn,
fuzzy_match,
Expand All @@ -169,13 +182,15 @@ def p_48():
cellline_to_implied_disease,
subphrase_linked,
cust_conseq,
artifact_term_combo,
real_val,
filt_match_priority,
infer_cell_line,
infer_dev_stage,
cell_culture]
return pc.Pipeline(stages, defaultdict(lambda: 1.0))

cell_culture,
prioritize_exact
]
return Pipeline(stages, defaultdict(lambda: 1.0))

if __name__ == "__main__":
main()
Expand Down

0 comments on commit f260f10

Please sign in to comment.