From 485aa0605845085ad280badf41496f43d6e4100b Mon Sep 17 00:00:00 2001 From: Mark Woon Date: Mon, 10 Feb 2025 22:58:59 -0800 Subject: [PATCH] feat(reporter): finalize call-only tsv reporter --- build.gradle | 4 +- dockstore/pipeline/PharmCAT_Pipeline.wdl | 10 + docs/using/Running-PharmCAT-Pipeline.md | 8 +- docs/using/Running-PharmCAT.md | 12 +- preprocessor/pharmcat_pipeline | 9 + .../org/pharmgkb/pharmcat/BaseConfig.java | 9 +- .../org/pharmgkb/pharmcat/BatchPharmCAT.java | 3 +- src/main/java/org/pharmgkb/pharmcat/Env.java | 3 + .../java/org/pharmgkb/pharmcat/PharmCAT.java | 3 +- .../java/org/pharmgkb/pharmcat/Pipeline.java | 15 +- .../pharmcat/phenotype/Phenotyper.java | 58 ++- .../pharmcat/reporter/ReportContext.java | 32 +- .../reporter/format/CallsOnlyFormat.java | 359 +++++++++++++----- .../reporter/model/result/Diplotype.java | 4 + .../java/org/pharmgkb/pharmcat/CftrTest.java | 2 +- .../org/pharmgkb/pharmcat/Cyp2d6Test.java | 16 +- .../org/pharmgkb/pharmcat/PharmCATTest.java | 193 ++++++---- .../org/pharmgkb/pharmcat/PipelineTest.java | 46 +-- .../pharmgkb/pharmcat/PipelineWrapper.java | 50 ++- .../java/org/pharmgkb/pharmcat/TestUtils.java | 11 +- .../pharmcat/phenotype/PhenotyperTest.java | 14 +- .../pharmcat/reporter/ReporterTest.java | 6 +- .../reporter/format/CallsOnlyFormatTest.java | 150 ++++++++ 23 files changed, 752 insertions(+), 265 deletions(-) create mode 100644 src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java diff --git a/build.gradle b/build.gradle index e068c4f8..d83a2de3 100644 --- a/build.gradle +++ b/build.gradle @@ -204,7 +204,7 @@ task updateExample { args = [ '-vcf', file('docs/examples/pharmcat.example.vcf'), '-po', file('docs/examples/pharmcat.example.outsideCall.tsv'), - '-reporterJson', '-matcherHtml' + 'reporterCallsOnlyTsv', '-reporterJson', '-reporterHtml', '-matcherHtml' ] } @@ -214,7 +214,7 @@ task updateExample { classpath = sourceSets.main.runtimeClasspath args = [ '-vcf', file('docs/examples/pharmcat.example2.vcf'), - '-reporterJson', '-matcherHtml' + 'reporterCallsOnlyTsv', '-reporterJson', '-reporterHtml', '-matcherHtml' ] } } diff --git a/dockstore/pipeline/PharmCAT_Pipeline.wdl b/dockstore/pipeline/PharmCAT_Pipeline.wdl index f7569b77..3fc03199 100644 --- a/dockstore/pipeline/PharmCAT_Pipeline.wdl +++ b/dockstore/pipeline/PharmCAT_Pipeline.wdl @@ -28,7 +28,9 @@ workflow pharmcat_pipeline { run_reporter: "Run reporter independently." reporter_sources: "Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]" reporter_extended: "Write an extended report (includes all possible genes and drugs, even if no data is available)" + reporter_save_html: "Save reporter results as HTML (the default if no format is specified)." reporter_save_json: "Save reporter results as JSON." + reporter_save_calls_only_tsv: "Save call results only as TSV." base_filename: "Prefix for output files. Defaults to the same base name as the input." delete_intermediate_files: "Delete intermediate PharmCAT files. Defaults to saving all files." @@ -52,7 +54,9 @@ workflow pharmcat_pipeline { Boolean run_reporter = false String reporter_sources = "" Boolean reporter_extended = false + Boolean reporter_save_html = false Boolean reporter_save_json = false + Boolean reporter_save_calls_only_tsv = false String base_filename = "" Boolean delete_intermediate_files = false Int max_concurrent_processes = 1 @@ -74,7 +78,9 @@ workflow pharmcat_pipeline { run_reporter = run_reporter, reporter_sources = reporter_sources, reporter_extended = reporter_extended, + reporter_save_html = reporter_save_html, reporter_save_json = reporter_save_json, + reporter_save_calls_only_tsv = reporter_save_calls_only_tsv, base_filename = base_filename, delete_intermediate_files = delete_intermediate_files, max_concurrent_processes = max_concurrent_processes, @@ -108,7 +114,9 @@ task pharmcat_pipeline_task { Boolean run_reporter = false String reporter_sources = "" Boolean reporter_extended = false + Boolean reporter_save_html = false Boolean reporter_save_json = false + Boolean reporter_save_calls_only_tsv = false String base_filename = "" Boolean delete_intermediate_files = false Int max_concurrent_processes = 1 @@ -133,7 +141,9 @@ task pharmcat_pipeline_task { ~{if run_reporter then '-reporter' else ''} \ ~{if reporter_sources != "" then '-rs ' + reporter_sources else ''} \ ~{if reporter_extended then '-re' else ''} \ + ~{if reporter_save_html then '-reporterHtml' else ''} \ ~{if reporter_save_json then '-reporterJson' else ''} \ + ~{if reporter_save_calls_only_tsv then 'reporterCallsOnlyTsv' else ''} \ ~{if base_filename != "" then '-bf ' + base_filename else ''} \ ~{if delete_intermediate_files then '-del' else ''} \ -cp ~{max_concurrent_processes} -cm ~{max_memory} diff --git a/docs/using/Running-PharmCAT-Pipeline.md b/docs/using/Running-PharmCAT-Pipeline.md index 52c9ad52..b3ff4381 100644 --- a/docs/using/Running-PharmCAT-Pipeline.md +++ b/docs/using/Running-PharmCAT-Pipeline.md @@ -38,7 +38,8 @@ usage: pharmcat_pipeline [-s | -S ] [-R ] [-matcher] [-ma] [-matcherHtml] [-research ] [-phenotyper] - [-reporter] [-rs ] [-re] [-reporterJson] + [-reporter] [-rs ] [-re] + [-reporterHtml] [-reporterJson] [-reporterCallsOnlyTsv] [-o ] [-bf ] [-del] [-cp ] [-v] [-V] @@ -91,8 +92,13 @@ Reporter arguments: Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA] -re, --reporter-extended Write an extended report (includes all possible genes and drugs, even if no data is available) + -reporterHtml, --reporter-save-html + Save reporter results as HTML. This is the default if no format is specified. + If any format is specified, only the specified formats will be saved. -reporterJson, --reporter-save-json Save reporter results as JSON. + -reporterCallsOnlyTsv, --reporter-save-calls-only-tsv + Save call results only as TSV. Output arguments: -o , --output-dir diff --git a/docs/using/Running-PharmCAT.md b/docs/using/Running-PharmCAT.md index 0f30eceb..cbd6e2e8 100644 --- a/docs/using/Running-PharmCAT.md +++ b/docs/using/Running-PharmCAT.md @@ -163,9 +163,17 @@ Each module has its own arguments to customize its behavior. -re
or --reporter-extended
: write an extended report (includes all possible genes and drugs, even if no data is available) --reporterJson +-reporterHtml
or --reporter-save-html +: save reporter results as HTML. This is the default if no format is specified. +If any format is specified, only the specified formats will be saved. + +-reporterJson
or --reporter-save-json : save reporter results as JSON +-reporterCallsOnlyTsv
or --reporter-save-calls-only-tsv +: save call results only as TSV + + ### Running Individual Modules @@ -199,7 +207,7 @@ Saving phenotyper JSON results to /tmp/results/outside_calls.phenotype.json #### Just the `Reporter` -This will take the phenotyper data and output the relevant drug annotations in a comprehensive HTML report. +This will take the `Phenotyper` data and output the relevant drug annotations in a comprehensive HTML report. Examples: diff --git a/preprocessor/pharmcat_pipeline b/preprocessor/pharmcat_pipeline index 82d89d86..082afb39 100755 --- a/preprocessor/pharmcat_pipeline +++ b/preprocessor/pharmcat_pipeline @@ -33,8 +33,12 @@ def add_reporter_java_args(java_args: List[str], cli_args): java_args.append('-rs') if cli_args.reporter_extended: java_args.append('-re') + if cli_args.reporter_save_html: + java_args.append('-reporterHtml') if cli_args.reporter_save_json: java_args.append('-reporterJson') + if cli_args.reporter_save_calls_only_tsv: + java_args.append('-reporterCallsOnlyTsv') def add_output_java_args(java_args: List[str], cli_args, output_dir: Optional[Path]): @@ -110,8 +114,13 @@ if __name__ == '__main__': help='Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]') reporter_group.add_argument('-re', '--reporter-extended', action='store_true', help='Write an extended report (includes all possible genes and drugs, even if no data is available).') + reporter_group.add_argument('-reporterHtml', '--reporter-save-html', action='store_true', + help='Save reporter results as HTML. This is the default if no format is specified. ' + 'If any format is specified, only the specified formats will be saved.') reporter_group.add_argument('-reporterJson', '--reporter-save-json', action='store_true', help='Save reporter results as JSON.') + reporter_group.add_argument('-reporterCallsOnlyTsv', '--reporter-save-calls_only_tsv', action='store_true', + help='Save calls results only as TSV.') # output args output_group = parser.add_argument_group('Output arguments') output_group.add_argument('-o', '--output-dir', type=str, metavar='', diff --git a/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java b/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java index f5e7c91f..ce3cfb61 100644 --- a/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java +++ b/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java @@ -127,12 +127,17 @@ public class BaseConfig { reporterTitle = cliHelper.getValue("rt"); reporterCompact = !cliHelper.hasOption("re"); reporterJson = cliHelper.hasOption("reporterJson"); - reporterCallsOnlyTsv = cliHelper.hasOption("reporterCallsOnly"); + reporterCallsOnlyTsv = cliHelper.hasOption("reporterCallsOnlyTsv"); + // by default, generate the HTML report (which preserves backwards compatibility) + // only check for reporterHtml flag if specifying other reporter format outputs + if (reporterJson || reporterCallsOnlyTsv) { + reporterHtml = cliHelper.hasOption("reporterHtml"); + } if (researchMode) { System.out.println("WARNING: FULL REPORTER OUTPUT NOT AVAILABLE IN RESEARCH MODE"); if (!reporterCallsOnlyTsv) { - runReporter = false; + reporterCallsOnlyTsv = true; } reporterHtml = false; reporterJson = false; diff --git a/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java b/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java index 9a235d6f..4612a225 100644 --- a/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java +++ b/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java @@ -64,8 +64,9 @@ public static void main(String[] args) { .addOption("reporter", "reporter", "Run reporter independently") .addOption("rs", "reporter-sources", "Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]", false, "sources") .addOption("re", "reporter-extended", "Write an extended report (includes all possible genes and drugs, even if no data is available)") + .addOption("reporterHtml", "reporter-save-html", "Save reporter results as HTML (the default if no format is specified)") .addOption("reporterJson", "reporter-save-json", "Save reporter results as JSON") - .addOption("reporterCallsOnly", "reporter-save-calls-only", "Save calls only as TSV") + .addOption("reporterCallsOnlyTsv", "reporter-save-calls-only-tsv", "Save calls only as TSV") // outputs .addOption("o", "output-dir", "Directory to output to (optional, default is input file directory)", false, "directory") diff --git a/src/main/java/org/pharmgkb/pharmcat/Env.java b/src/main/java/org/pharmgkb/pharmcat/Env.java index f6283a7c..c78c4f35 100644 --- a/src/main/java/org/pharmgkb/pharmcat/Env.java +++ b/src/main/java/org/pharmgkb/pharmcat/Env.java @@ -128,6 +128,9 @@ public PgkbGuidelineCollection getDrugs() { } + /** + * Checks if gene is used in any guideline from the specified {@code source}. + */ public boolean hasGene(DataSource source, String gene) { return m_drugs.getGenesUsedInSource(source).contains(gene); } diff --git a/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java b/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java index 9254cfce..4c5a001e 100644 --- a/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java +++ b/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java @@ -46,8 +46,9 @@ public static void main(String[] args) { .addOption("rt", "reporter-title", "Text to add to the report title", false, "title") .addOption("rs", "reporter-sources", "Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]", false, "sources") .addOption("re", "reporter-extended", "Write an extended report (includes all possible genes and drugs, even if no data is available)") + .addOption("reporterHtml", "reporter-save-html", "Save reporter results as HTML (the default if no format is specified)") .addOption("reporterJson", "reporter-save-json", "Save reporter results as JSON") - .addOption("reporterCallsOnly", "reporter-save-calls-only", "Save calls only as TSV") + .addOption("reporterCallsOnlyTsv", "reporter-save-calls-only-tsv", "Save calls results only as TSV") // outputs .addOption("o", "output-dir", "Directory to output to (optional, default is input file directory)", false, "directory") diff --git a/src/main/java/org/pharmgkb/pharmcat/Pipeline.java b/src/main/java/org/pharmgkb/pharmcat/Pipeline.java index 6d5f4665..b63f8cf2 100644 --- a/src/main/java/org/pharmgkb/pharmcat/Pipeline.java +++ b/src/main/java/org/pharmgkb/pharmcat/Pipeline.java @@ -22,6 +22,7 @@ import org.pharmgkb.pharmcat.haplotype.NamedAlleleMatcher; import org.pharmgkb.pharmcat.haplotype.ResultSerializer; import org.pharmgkb.pharmcat.haplotype.model.GeneCall; +import org.pharmgkb.pharmcat.haplotype.model.Metadata; import org.pharmgkb.pharmcat.phenotype.OutsideCallParser; import org.pharmgkb.pharmcat.phenotype.Phenotyper; import org.pharmgkb.pharmcat.phenotype.model.OutsideCall; @@ -300,14 +301,17 @@ public PipelineResult call() throws IOException { Phenotyper phenotyper = null; if (m_runPhenotyper) { + Metadata metadata = null; List calls; Map> warnings = new HashMap<>(); if (matcherResult != null) { + metadata = matcherResult.getMetadata(); calls = matcherResult.getGeneCalls(); warnings.putAll(matcherResult.getVcfWarnings()); } else if (m_phenotyperInputFile != null) { org.pharmgkb.pharmcat.haplotype.model.Result deserializedMatcherResult = new ResultSerializer() .fromJson(m_phenotyperInputFile); + metadata = deserializedMatcherResult.getMetadata(); calls = deserializedMatcherResult.getGeneCalls(); warnings.putAll(deserializedMatcherResult.getVcfWarnings()); } else { @@ -337,7 +341,7 @@ public PipelineResult call() throws IOException { } } - phenotyper = new Phenotyper(m_env, calls, outsideCalls, warnings); + phenotyper = new Phenotyper(m_env, metadata, calls, outsideCalls, warnings); if (!m_deleteIntermediateFiles || !m_runReporter) { if (!batchDisplayMode) { output.add("Saving phenotyper JSON results to " + m_phenotyperJsonFile); @@ -352,7 +356,7 @@ public PipelineResult call() throws IOException { Path inputFile = m_phenotyperJsonFile != null ? m_phenotyperJsonFile : m_reporterInputFile; phenotyper = Phenotyper.read(inputFile); } - m_reportContext = new ReportContext(m_env, phenotyper.getGeneReports(), m_reporterTitle); + m_reportContext = new ReportContext(m_env, phenotyper, m_reporterTitle); if (m_reporterHtmlFile != null) { if (!batchDisplayMode) { output.add("Saving reporter HTML results to " + m_reporterHtmlFile); @@ -373,11 +377,8 @@ public PipelineResult call() throws IOException { if (!batchDisplayMode) { output.add("Saving calls-only TSV results to " + m_reporterCallsOnlyFile); } - CallsOnlyFormat caf = new CallsOnlyFormat(m_reporterCallsOnlyFile, m_env); - if (!m_topCandidateOnly) { - caf.showMatchScores(); - } - caf.write(m_reportContext); + new CallsOnlyFormat(m_reporterCallsOnlyFile, m_env) + .write(m_reportContext); } didSomething = true; } diff --git a/src/main/java/org/pharmgkb/pharmcat/phenotype/Phenotyper.java b/src/main/java/org/pharmgkb/pharmcat/phenotype/Phenotyper.java index aacac1ae..c193da24 100644 --- a/src/main/java/org/pharmgkb/pharmcat/phenotype/Phenotyper.java +++ b/src/main/java/org/pharmgkb/pharmcat/phenotype/Phenotyper.java @@ -7,15 +7,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; +import java.util.*; import java.util.stream.Collectors; import javax.annotation.Nullable; import com.google.common.base.Preconditions; @@ -24,6 +16,7 @@ import org.pharmgkb.pharmcat.Env; import org.pharmgkb.pharmcat.haplotype.NamedAlleleMatcher; import org.pharmgkb.pharmcat.haplotype.model.GeneCall; +import org.pharmgkb.pharmcat.haplotype.model.Metadata; import org.pharmgkb.pharmcat.phenotype.model.OutsideCall; import org.pharmgkb.pharmcat.reporter.ReportContext; import org.pharmgkb.pharmcat.reporter.model.DataSource; @@ -44,33 +37,58 @@ public class Phenotyper { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + @SerializedName("matcherMetadata") + @Expose + private Metadata m_matcherMetadata; @Expose @SerializedName("geneReports") private final SortedMap> m_geneReports = new TreeMap<>(); + @Expose + @SerializedName("unannotatedGeneCalls") + private SortedSet m_unannotatedGeneCalls = new TreeSet<>(); /** * Public constructor. This needs {@link GeneCall} objects from the {@link NamedAlleleMatcher} and {@link OutsideCall} * objects coming from other allele calling sources. This relies on reading definition files as well. * + * @param matcherMetadata metadata for the named allele matcher used for {@code geneCalls}; + * can be null if all outside calls * @param geneCalls a List of {@link GeneCall} objects * @param outsideCalls a List of {@link OutsideCall} objects * @param variantWarnings map of VCF warnings, keyed to chromosomal position */ - public Phenotyper(Env env, List geneCalls, Set outsideCalls, + public Phenotyper(Env env, @Nullable Metadata matcherMetadata, List geneCalls, Set outsideCalls, @Nullable Map> variantWarnings) { - initialize(geneCalls, outsideCalls, env, DataSource.CPIC, variantWarnings); - initialize(geneCalls, outsideCalls, env, DataSource.DPWG, variantWarnings); + List unusedGenes = initialize(geneCalls, outsideCalls, env, DataSource.CPIC, variantWarnings); + unusedGenes.retainAll(initialize(geneCalls, outsideCalls, env, DataSource.DPWG, variantWarnings)); + + if (!unusedGenes.isEmpty()) { + for (String gene : unusedGenes) { + GeneCall geneCall = geneCalls.stream() + .filter(gc -> gc.getGene().equals(gene)) + .findFirst() + .orElseThrow(() -> new IllegalStateException("Cannot find gene call for " + gene)); + GeneReport geneReport = new GeneReport(geneCall, env, DataSource.UNKNOWN); + if (!geneReport.isNoData()) { + m_unannotatedGeneCalls.add(geneReport); + } + } + } + + m_matcherMetadata = matcherMetadata; } - private void initialize(List geneCalls, Set outsideCalls, Env env, DataSource source, + private List initialize(List geneCalls, Set outsideCalls, Env env, DataSource source, @Nullable Map> variantWarnings) { SortedMap reportMap = m_geneReports.computeIfAbsent(source, (s) -> new TreeMap<>()); + List unusedGeneCalls = new ArrayList<>(); // matcher calls for (GeneCall geneCall : geneCalls) { if (!env.hasGene(source, geneCall.getGene())) { + unusedGeneCalls.add(geneCall.getGene()); continue; } GeneReport geneReport = new GeneReport(geneCall, env, source); @@ -83,7 +101,7 @@ private void initialize(List geneCalls, Set outsideCalls, MessageAnnotation msgAnnotation = null; if (geneReport != null) { if (geneReport.getCallSource() != CallSource.OUTSIDE) { - // outside call trumps matcher + // outside call trumps the matcher's result // warn the user of the conflict String matcherCall = geneReport.getSourceDiplotypes().stream() .sorted() @@ -117,6 +135,8 @@ private void initialize(List geneCalls, Set outsideCalls, // add VCF warnings reportMap.values().forEach(geneReport -> geneReport.addVariantWarningMessages(variantWarnings)); + + return unusedGeneCalls; } @@ -135,6 +155,11 @@ public Optional findGeneReport(DataSource source, String geneSymbol) } + public SortedSet getUnannotatedGeneCalls() { + return m_unannotatedGeneCalls; + } + + /** * Writes out {@link Phenotyper} data. * @@ -170,4 +195,9 @@ private Set listUnspecifiedGenes(Env env, DataSource source) { .forEach(unspecifiedGenes::remove); return unspecifiedGenes; } + + + public Metadata getMatcherMetadata() { + return m_matcherMetadata; + } } diff --git a/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java b/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java index 9dcf42fa..6550ba29 100644 --- a/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java +++ b/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java @@ -9,11 +9,15 @@ import java.util.Objects; import java.util.Set; import java.util.SortedMap; +import java.util.SortedSet; import java.util.TreeMap; +import java.util.TreeSet; import com.google.gson.annotations.Expose; import com.google.gson.annotations.SerializedName; import org.checkerframework.checker.nullness.qual.Nullable; import org.pharmgkb.pharmcat.Env; +import org.pharmgkb.pharmcat.haplotype.model.Metadata; +import org.pharmgkb.pharmcat.phenotype.Phenotyper; import org.pharmgkb.pharmcat.reporter.model.DataSource; import org.pharmgkb.pharmcat.reporter.model.MessageAnnotation; import org.pharmgkb.pharmcat.reporter.model.PrescribingGuidanceSource; @@ -51,18 +55,28 @@ public class ReportContext { @Expose @SerializedName("messages") private final List f_messages = new ArrayList<>(); + @SerializedName("matcherMetadata") + @Expose + private Metadata m_matcherMetadata; + @Expose + @SerializedName("unannotatedGeneCalls") + private SortedSet m_unannotatedGeneCalls = new TreeSet<>(); + /** * Public constructor. Compiles all the incoming data into useful objects to be held for later reporting. * - * @param geneReports {@link GeneReport} objects, non-null but can be empty + * @param phenotyper phenotyper data to build this report from * @param title the optional text to show as a user-friendly title or identifier for this report */ - public ReportContext(Env env, SortedMap> geneReports, String title) throws IOException { + public ReportContext(Env env, Phenotyper phenotyper, String title) throws IOException { f_title = title; - m_geneReports = geneReports; - + m_matcherMetadata = phenotyper.getMatcherMetadata(); + m_geneReports = phenotyper.getGeneReports(); m_dataVersion = validateVersions(env.getDrugs()); + if (!phenotyper.getUnannotatedGeneCalls().isEmpty()) { + m_unannotatedGeneCalls.addAll(phenotyper.getUnannotatedGeneCalls()); + } for (PrescribingGuidanceSource dataSourceType : PrescribingGuidanceSource.values()) { Map drugReports = m_drugReports.computeIfAbsent(dataSourceType, (s) -> new TreeMap<>()); @@ -79,7 +93,7 @@ public ReportContext(Env env, SortedMap m.values().stream()) .forEach(messageHelper::addMatchingMessagesTo); // to drug reports @@ -212,4 +226,12 @@ public List getMessages() { public void addMessage(MessageAnnotation message) { f_messages.add(message); } + + public Metadata getMatcherMetadata() { + return m_matcherMetadata; + } + + public SortedSet getUnannotatedGeneCalls() { + return m_unannotatedGeneCalls; + } } diff --git a/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java b/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java index cb970498..f3cbd201 100644 --- a/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java +++ b/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java @@ -4,14 +4,21 @@ import java.io.PrintWriter; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.nio.file.OpenOption; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.TreeSet; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.commons.lang3.StringUtils; import org.pharmgkb.pharmcat.BaseConfig; import org.pharmgkb.pharmcat.Env; +import org.pharmgkb.pharmcat.haplotype.ResultSerializer; import org.pharmgkb.pharmcat.phenotype.Phenotyper; import org.pharmgkb.pharmcat.reporter.ReportContext; import org.pharmgkb.pharmcat.reporter.TextConstants; @@ -19,6 +26,8 @@ import org.pharmgkb.pharmcat.reporter.model.result.Diplotype; import org.pharmgkb.pharmcat.reporter.model.result.GeneReport; +import static org.pharmgkb.pharmcat.Constants.isLowestFunctionGene; + /** * Generates a .tsv file that only contains calls from the Named Allele Matcher. @@ -26,7 +35,7 @@ * @author Mark Woon */ public class CallsOnlyFormat extends AbstractFormat { - private boolean m_showMatchScores; + private boolean m_singleFileMode; public CallsOnlyFormat(Path outputPath, Env env) { @@ -34,109 +43,263 @@ public CallsOnlyFormat(Path outputPath, Env env) { } /** - * Sets whether match scores should be exported. - * This is only necessary if not calling with top-candidates-only. + * Sets whether all results should be appended to a single file. */ - public CallsOnlyFormat showMatchScores() { - m_showMatchScores = true; + public CallsOnlyFormat singleFileMode() { + m_singleFileMode = true; return this; } @Override public void write(ReportContext reportContext) throws IOException { - try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(getOutputPath(), StandardCharsets.UTF_8))) { - writer.print("Gene\tDiplotype\tPhenotype\tActivity Score" + - "\tHaplotype 1\tHaplotype 1 Function\tHaplotype 1 Activity Value" + - "\tHaplotype 2\tHaplotype 2 Function\tHaplotype 2 Activity Value" + - "\tOutside Call\t"); - if (m_showMatchScores) { - writer.print("Match Score\t"); + + boolean printHeaders = true; + OpenOption[] options = new OpenOption[] { + StandardOpenOption.CREATE, + StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING, + }; + if (m_singleFileMode && Files.exists(getOutputPath())) { + printHeaders = false; + options = new OpenOption[] { + StandardOpenOption.CREATE, + StandardOpenOption.WRITE, + StandardOpenOption.APPEND, + }; + } + + Map calledGenes = new HashMap<>(); + for (String gene : getEnv().getDefinitionReader().getGenes()) { + GeneReport cpicReport = reportContext.getGeneReport(DataSource.CPIC, gene); + GeneReport dpwgReport = reportContext.getGeneReport(DataSource.DPWG, gene); + if ((cpicReport == null || cpicReport.isNoData()) && (dpwgReport == null || dpwgReport.isNoData())) { + continue; + } + + GeneReport primary = cpicReport == null ? dpwgReport : cpicReport; + calledGenes.put(gene, primary); + } + + + try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(getOutputPath(), StandardCharsets.UTF_8, options))) { + if (printHeaders) { + writer.println("Gene\tSource Diplotype\tPhenotype\tActivity Score" + + "\tHaplotype 1\tHaplotype 1 Function\tHaplotype 1 Activity Value" + + "\tHaplotype 2\tHaplotype 2 Function\tHaplotype 2 Activity Value" + + "\tOutside Call\tMatch Score\tMissing positions?\t" + + "Recommendation Lookup Diplotype\tRecommendation Lookup Phenotype\tRecommendation Lookup Activity Score"); } - writer.println("Missing positions?"); - for (String gene : getEnv().getDefinitionReader().getGenes()) { - GeneReport cpicReport = reportContext.getGeneReport(DataSource.CPIC, gene); - GeneReport dpwgReport = reportContext.getGeneReport(DataSource.DPWG, gene); - if ((cpicReport == null || !cpicReport.isCalled()) && (dpwgReport == null || !dpwgReport.isCalled())) { + for (String gene : calledGenes.keySet()) { + GeneReport report = calledGenes.get(gene); + if (!report.isCalled()) { + writeNoCall(writer, report); continue; } - GeneReport primary = (cpicReport == null || !cpicReport.isCalled()) ? dpwgReport : cpicReport; - for (Diplotype dip : primary.getSourceDiplotypes()) { - writer.print(gene); - writer.print("\t"); - // diplotype - if (dip.getAllele1() != null) { - writer.print(dip.getAllele1().getName()); - if (dip.getAllele2() != null) { - writer.print("/"); - writer.print(dip.getAllele2().getName()); - } - } - writer.print("\t"); - // phenotype - if (!dip.getPhenotypes().isEmpty()) { - writer.print(dip.getPhenotypes().stream() - .filter(p -> !p.equals(TextConstants.NO_RESULT)) - .collect(Collectors.joining(", "))); - } - writer.print("\t"); - // activity score - if (dip.getActivityScore() != null) { - writer.print(dip.getActivityScore()); - } - writer.print("\t"); - // haplotype 1 - if (dip.getAllele1() != null) { - writer.print(dip.getAllele1().getName()); - writer.print("\t"); - if (dip.getAllele1().getFunction() != null) { - writer.print(dip.getAllele1().getFunction()); - } - writer.print("\t"); - if (dip.getAllele1().getActivityValue() != null && - !dip.getAllele1().getActivityValue().equals(TextConstants.NA)) { - writer.print(dip.getAllele1().getActivityValue()); - } - } else { - writer.print("\t"); - writer.print("\t"); - } - writer.print("\t"); - // haplotype 2 - if (dip.getAllele2() != null) { - writer.print(dip.getAllele2()); - writer.print("\t"); - if (dip.getAllele2().getFunction() != null) { - writer.print(dip.getAllele2().getFunction()); - } - writer.print("\t"); - if (dip.getAllele2().getActivityValue() != null && - !dip.getAllele2().getActivityValue().equals(TextConstants.NA)) { - writer.print(dip.getAllele2().getActivityValue()); - } - } else { - writer.print("\t"); - writer.print("\t"); + // only have component haplotypes for lowest function genes when diplotypes are true diplotypes + // (vs. individual haplotypes) + boolean lowestFunctionSingles = isLowestFunctionGene(gene) && report.getMatcherComponentHaplotypes().isEmpty(); + + if (report.getSourceDiplotypes().size() > 1 || lowestFunctionSingles) { + writeCollapsedDiplotypes(writer, report, lowestFunctionSingles, true); + } else { + for (Diplotype dip : report.getSourceDiplotypes()) { + writeDiplotype(writer, report, dip, true); } - writer.print("\t"); - // outside call - writer.print(primary.isOutsideCall()); - writer.print("\t"); - if (m_showMatchScores) { - // match score - writer.print(dip.getMatchScore()); - writer.print("\t"); + } + } + + for (GeneReport report : reportContext.getUnannotatedGeneCalls()) { + if (report.getSourceDiplotypes().size() > 1) { + writeCollapsedDiplotypes(writer, report, false, false); + } else { + for (Diplotype dip : report.getSourceDiplotypes()) { + writeDiplotype(writer, report, dip, false); } - // missing positions - writer.print(primary.isMissingVariants()); - writer.println(); } } } } + private void writeNoCall(PrintWriter writer, GeneReport report) { + writer.print(report.getGene()); + writer.print("\tno call\t\t" + + "\t\t\t" + + "\t\t\t"); + writeCommon(writer, report, null, false); + writer.println(); + } + + + private boolean isIgnorableValue(String text) { + return StringUtils.isBlank(text) || text.equals(TextConstants.NA) || text.equals(TextConstants.NO_RESULT); + } + + private String generateStandardizedValue(String text) { + return isIgnorableValue(text) ? " " : text; + } + + private String generatePhenotypeValue(List phenotypes) { + return phenotypes.stream() + .map(this::generateStandardizedValue) + .collect(Collectors.joining(", ")); + } + + + private void writeCollapsedDiplotypes(PrintWriter writer, GeneReport report, boolean lowestFunctionSingles, + boolean showRecommendationDiplotype) { + + boolean hasPhenotypes = report.getSourceDiplotypes().stream() + .anyMatch(d -> !d.getPhenotypes().isEmpty() && !isIgnorableValue(d.getPhenotypes().get(0))); + boolean hasActivityScores = report.getSourceDiplotypes().stream() + .anyMatch(d -> !isIgnorableValue(d.getActivityScore())); + + StringBuilder diplotypes = new StringBuilder(); + StringBuilder matchScores = new StringBuilder(); + StringBuilder phenotypes = new StringBuilder(); + StringBuilder activityScores = new StringBuilder(); + for (Diplotype dip : report.getSourceDiplotypes()) { + if (!diplotypes.isEmpty()) { + diplotypes.append(lowestFunctionSingles ? " AND " : " OR "); + } + diplotypes.append(buildDiplotypeName(dip, report)); + + if (hasPhenotypes) { + if (!phenotypes.isEmpty()) { + phenotypes.append(" / "); + } + phenotypes.append(generatePhenotypeValue(dip.getPhenotypes())); + } + + if (hasActivityScores) { + if (!activityScores.isEmpty()) { + activityScores.append(" / "); + } + activityScores.append(generateStandardizedValue(dip.getActivityScore())); + } + + if (!lowestFunctionSingles) { + if (!matchScores.isEmpty()) { + matchScores.append(" / "); + } + matchScores.append(dip.getMatchScore()); + } + }; + + writer.print(report.getGene()); + writer.print("\t"); + writer.print(diplotypes); + writer.print("\t"); + writer.print(phenotypes); + writer.print("\t"); + writer.print(activityScores); + writer.print("\t" + + "\t\t\t" + + "\t\t\t"); + + writeCommon(writer, report, matchScores.toString(), showRecommendationDiplotype); + writer.println(); + } + + private void writeDiplotype(PrintWriter writer, GeneReport report, Diplotype dip, + boolean showRecommendationDiplotype) { + writer.print(report.getGene()); + writer.print("\t"); + // diplotype + writer.print(buildDiplotypeName(dip, report)); + writer.print("\t"); + // phenotype + writer.print(generatePhenotypeValue(dip.getPhenotypes())); + writer.print("\t"); + // activity score + if (dip.getActivityScore() != null) { + writer.print(generateStandardizedValue(dip.getActivityScore())); + } + writer.print("\t"); + // haplotype 1 + if (dip.getAllele1() != null) { + writer.print(dip.getAllele1().getName()); + writer.print("\t"); + if (dip.getAllele1().getFunction() != null) { + writer.print(dip.getAllele1().getFunction()); + } + writer.print("\t"); + if (dip.getAllele1().getActivityValue() != null && + !dip.getAllele1().getActivityValue().equals(TextConstants.NA)) { + writer.print(dip.getAllele1().getActivityValue()); + } + } else { + writer.print("\t"); + writer.print("\t"); + } + writer.print("\t"); + // haplotype 2 + if (dip.getAllele2() != null) { + writer.print(dip.getAllele2()); + writer.print("\t"); + if (dip.getAllele2().getFunction() != null) { + writer.print(dip.getAllele2().getFunction()); + } + writer.print("\t"); + if (dip.getAllele2().getActivityValue() != null && + !dip.getAllele2().getActivityValue().equals(TextConstants.NA)) { + writer.print(dip.getAllele2().getActivityValue()); + } + } else { + writer.print("\t"); + writer.print("\t"); + } + writer.print("\t"); + + writeCommon(writer, report, Integer.toString(dip.getMatchScore()), showRecommendationDiplotype); + writer.println(); + } + + + private void writeCommon(PrintWriter writer, GeneReport report, String matchScore, + boolean showRecommendationDiplotype) { + // outside call + writer.print(report.isOutsideCall()); + writer.print("\t"); + writer.print(matchScore); + writer.print("\t"); + // missing positions + writer.print(report.isMissingVariants()); + writer.print("\t"); + // recommendation lookup fields + if (showRecommendationDiplotype && report.getRecommendationDiplotypes() != null && + report.getRecommendationDiplotypes().size() == 1) { + Diplotype recDip = report.getRecommendationDiplotypes().first(); + // recommendation lookup diplotype + writer.print(buildDiplotypeName(recDip, report)); + writer.print("\t"); + // recommendation lookup phenotype + writer.print(generatePhenotypeValue(recDip.getPhenotypes())); + writer.print("\t"); + // recommendation lookup activity score + writer.print(generateStandardizedValue(recDip.getActivityScore())); + } else { + writer.print("\t\t"); + } + } + + private String buildDiplotypeName(Diplotype dip, GeneReport geneReport) { + StringBuilder builder = new StringBuilder(); + if (dip.getAllele1() != null) { + builder.append(dip.getAllele1().getName()); + if (dip.getAllele2() != null) { + builder.append("/") + .append(dip.getAllele2().getName()); + } else if (geneReport.getMatcherHomozygousComponentHaplotypes().contains(dip.getAllele1().getName())) { + builder.append("/") + .append(dip.getAllele1().getName()); + } + } + return builder.toString(); + } + public static void main(String[] args) { @@ -171,7 +334,7 @@ public static void main(String[] args) { private static int readDir(Env env, Path inDir, Path outDir, int index) throws IOException { - List phenotypeFiles = new ArrayList<>(); + List matchFiles = new ArrayList<>(); List dirs = new ArrayList<>(); try (Stream files = Files.list(inDir)) { @@ -179,26 +342,34 @@ private static int readDir(Env env, Path inDir, Path outDir, int index) throws I if (Files.isDirectory(f)) { dirs.add(f); } else if (Files.isRegularFile(f)) { - if (f.toString().endsWith("phenotype.json")) { - phenotypeFiles.add(f); + if (f.toString().endsWith(".match.json")) { + matchFiles.add(f); } } }); } - System.out.println("Found " + phenotypeFiles.size() + " in " + inDir); + System.out.println("Found " + matchFiles.size() + " in " + inDir); for (Path d : dirs) { index = readDir(env, d, outDir, index); } - for (Path pFile : phenotypeFiles) { + for (Path mFile : matchFiles) { index += 1; - String basename = String.format("%06d", index); - //System.out.println(pFile + " -> " + basename); - Phenotyper phenotyper = Phenotyper.read(pFile); - ReportContext reportContext = new ReportContext(env, phenotyper.getGeneReports(), basename); + //String basename = BaseConfig.getBaseFilename(mFile); + String basename = "complete"; + if (index % 1000 == 0) { + System.out.println(index); + } + org.pharmgkb.pharmcat.haplotype.model.Result matcherResult = new ResultSerializer() + .fromJson(mFile); + Phenotyper phenotyper = new Phenotyper(env, matcherResult.getMetadata(), matcherResult.getGeneCalls(), + new TreeSet<>(), new HashMap<>()); + + ReportContext reportContext = new ReportContext(env, phenotyper, basename); Path outFile = outDir.resolve(basename + BaseConfig.REPORTER_SUFFIX + ".tsv"); new CallsOnlyFormat(outFile, env) + .singleFileMode() .write(reportContext); } return index; diff --git a/src/main/java/org/pharmgkb/pharmcat/reporter/model/result/Diplotype.java b/src/main/java/org/pharmgkb/pharmcat/reporter/model/result/Diplotype.java index cc7237fb..5275c102 100644 --- a/src/main/java/org/pharmgkb/pharmcat/reporter/model/result/Diplotype.java +++ b/src/main/java/org/pharmgkb/pharmcat/reporter/model/result/Diplotype.java @@ -255,6 +255,10 @@ public boolean isUnknownAlleles() { } + /** + * Gets the phenotype for this {@link Diplotype}. + * Should only have one phenotype, except for HLAs. + */ public List getPhenotypes() { return m_phenotypes; } diff --git a/src/test/java/org/pharmgkb/pharmcat/CftrTest.java b/src/test/java/org/pharmgkb/pharmcat/CftrTest.java index 50902154..f3396f23 100644 --- a/src/test/java/org/pharmgkb/pharmcat/CftrTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/CftrTest.java @@ -114,7 +114,7 @@ void outsideReferenceCall(TestInfo testInfo) throws Exception { .reference("CYP2C19") .reference("CYP2C9") ; - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testCalledByMatcher("CYP2C9"); diff --git a/src/test/java/org/pharmgkb/pharmcat/Cyp2d6Test.java b/src/test/java/org/pharmgkb/pharmcat/Cyp2d6Test.java index c9a05b7a..21015d82 100644 --- a/src/test/java/org/pharmgkb/pharmcat/Cyp2d6Test.java +++ b/src/test/java/org/pharmgkb/pharmcat/Cyp2d6Test.java @@ -69,7 +69,7 @@ void testCyp2c19s4s17(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs12248560", "C", "T") .variation("CYP2C19", "rs28399504", "A", "G") .variation("CYP2C19", "rs3758581", "G", "G"); - testWrapper.execute(s_outsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_outsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testReportable("CYP2D6"); @@ -88,7 +88,7 @@ void testAmitriptylineCallWoCyp2c19(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("DPYD"); - testWrapper.execute(s_outsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_outsideCallFilePath); testWrapper.testReportable("CYP2D6"); testWrapper.testPrintCpicCalls("CYP2D6", "*1/*4"); @@ -115,7 +115,7 @@ void testCyp2d6AlleleWithNoFunction(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testPrintCalls(DataSource.CPIC, "CYP2D6", "*1/*XXX"); testWrapper.testPrintCalls(DataSource.DPWG, "CYP2D6", "*1/*XXX"); @@ -156,7 +156,7 @@ void testCyp2d6EquivalentDoubleCall(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - Path vcfFile = testWrapper.execute(outsideCallPath); + Path vcfFile = testWrapper.executeWithOutsideCalls(outsideCallPath); GeneReport geneReport = testWrapper.getContext().getGeneReport(DataSource.CPIC, "CYP2D6"); assertNotNull(geneReport); @@ -189,7 +189,7 @@ void testCyp2d6DoubleCall(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - Path vcfFile = testWrapper.execute(outsideCallPath); + Path vcfFile = testWrapper.executeWithOutsideCalls(outsideCallPath); GeneReport geneReport = testWrapper.getContext().getGeneReport(DataSource.CPIC, "CYP2D6"); assertNotNull(geneReport); @@ -228,7 +228,7 @@ void testCyp2d6PhenotypeHasMultipleActivityScore(TestInfo testInfo) throws Excep PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - Path vcfFile = testWrapper.execute(outsideCallPath); + Path vcfFile = testWrapper.executeWithOutsideCalls(outsideCallPath); List cyp2c19ExpectedCalls = List.of("*38/*38"); testWrapper.testCalledByMatcher("CYP2C19"); @@ -313,7 +313,7 @@ void testCyp2d6CpicVsDpwg(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); List expectedCyp2d6Calls = List.of("*1/*1", "*1x2/*9", "*1x2/*10", "*1x2/*17", "*1/*1x3", "*4/*4", "*4/*10"); @@ -413,7 +413,7 @@ PipelineWrapper checkCombination(TestInfo testInfo, String diplotype, String phe } testWrapper.getVcfBuilder() .reference("CYP2C19"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testPrintCalls(DataSource.CPIC, "CYP2D6", diplotype); testWrapper.testSourcePhenotype(DataSource.CPIC, "CYP2D6", phenotype); diff --git a/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java b/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java index c700c2e4..c22de68e 100644 --- a/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java @@ -6,6 +6,7 @@ import java.util.Collection; import java.util.Optional; import java.util.function.Consumer; +import java.util.stream.Stream; import org.checkerframework.checker.nullness.qual.Nullable; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -195,6 +196,34 @@ void reference_fdaOnly(TestInfo testInfo) throws Exception { }); } + @Test + void referenceTsvOnly(TestInfo testInfo) throws Exception { + Path vcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/reference.vcf"); + Path outputDir = TestUtils.getTestOutputDir(testInfo, true); + + Path refMatcherOutput = outputDir.resolve("reference.match.json"); + Path refPhenotyperOutput = outputDir.resolve("reference.phenotype.json"); + Path refReporterOutput = outputDir.resolve("reference.report.html"); + Path refReporterTsvOutput = outputDir.resolve("reference.report.tsv"); + + try { + String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { + "-vcf", vcfFile.toString(), + "-o", outputDir.toString(), + "-del", "-reporterCallsOnlyTsv" + })); + System.out.println(systemOut); + assertTrue(systemOut.contains("Done.")); + assertFalse(Files.exists(refMatcherOutput)); + assertFalse(Files.exists(refPhenotyperOutput)); + assertFalse(Files.exists(refReporterOutput)); + assertTrue(Files.exists(refReporterTsvOutput)); + + } finally { + TestUtils.deleteTestFiles(refMatcherOutput, refPhenotyperOutput, refReporterOutput); + } + } + /** * An example run with CYP2D6 research mode enabled. @@ -276,27 +305,24 @@ void outsideCallsOnly(TestInfo testInfo) throws Exception { Path outsideCallFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/PharmCATTest-cyp2d6.tsv"); Path outputDir = TestUtils.getTestOutputDir(testInfo, true); - Path matcherOutput = outputDir.resolve("PharmCATTest-cyp2d6.match.json"); - Path phenotyperOutput = outputDir.resolve("PharmCATTest-cyp2d6.phenotype.json"); - Path reporterOutput = outputDir.resolve("PharmCATTest-cyp2d6.report.html"); - - try { - String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { - "-o", outputDir.toString(), - "-phenotyper", - "-po", outsideCallFile.toString() - })); - System.out.println(systemOut); - assertTrue(systemOut.contains("Done.")); - assertFalse(Files.exists(matcherOutput)); - assertTrue(Files.exists(phenotyperOutput)); - assertFalse(Files.exists(reporterOutput)); + String baseFilename = TestUtils.getFullTestName(testInfo); + Path matcherOutput = outputDir.resolve(baseFilename + ".match.json"); + Path phenotyperOutput = outputDir.resolve(baseFilename + ".phenotype.json"); + Path reporterOutput = outputDir.resolve(baseFilename + ".report.html"); - validateCyp2d6OutsideCallOutput(phenotyperOutput); - - } finally { - TestUtils.deleteTestFiles(outputDir); - } + String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { + "-o", outputDir.toString(), + "-phenotyper", + "-po", outsideCallFile.toString(), + "-bf", baseFilename, + })); + System.out.println(systemOut); + assertTrue(systemOut.contains("Done.")); + assertFalse(Files.exists(matcherOutput)); + assertTrue(Files.exists(phenotyperOutput)); + assertFalse(Files.exists(reporterOutput)); + + validateCyp2d6OutsideCallOutput(phenotyperOutput); } @Test @@ -417,32 +443,27 @@ void matchAllFlag(TestInfo testInfo) throws Exception { } // matcher only, expecting many CYP2C19 matches - try { - String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { - "-vcf", vcfFile.toString(), - "-matcher", - "-ma", - "-o", outputDir.toString(), - "-bf", baseFilename, - })); - //System.out.println(systemOut); - assertTrue(systemOut.contains("Done.")); - assertTrue(Files.exists(matcherOutput)); - assertFalse(Files.exists(phenotyperOutput)); - assertFalse(Files.exists(reporterOutput)); - - ResultSerializer resultSerializer = new ResultSerializer(); - Result result = resultSerializer.fromJson(matcherOutput); - Optional gcOpt = result.getGeneCalls().stream() - .filter(gc -> gc.getGene().equals("CYP2C19")) - .findFirst(); - assertTrue(gcOpt.isPresent()); - GeneCall gc = gcOpt.get(); - assertTrue(gc.getDiplotypes().size() > 50); - - } finally { - TestUtils.deleteTestFiles(outputDir); - } + String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { + "-vcf", vcfFile.toString(), + "-matcher", + "-ma", + "-o", outputDir.toString(), + "-bf", baseFilename, + })); + //System.out.println(systemOut); + assertTrue(systemOut.contains("Done.")); + assertTrue(Files.exists(matcherOutput)); + assertFalse(Files.exists(phenotyperOutput)); + assertFalse(Files.exists(reporterOutput)); + + ResultSerializer resultSerializer = new ResultSerializer(); + Result result = resultSerializer.fromJson(matcherOutput); + Optional gcOpt = result.getGeneCalls().stream() + .filter(gc -> gc.getGene().equals("CYP2C19")) + .findFirst(); + assertTrue(gcOpt.isPresent()); + GeneCall gc = gcOpt.get(); + assertTrue(gc.getDiplotypes().size() > 50, "Expecting more than 50, found " + gc.getDiplotypes().size()); } @@ -489,7 +510,25 @@ void consistentOutput(TestInfo testInfo) throws Exception { assertTrue(doubleOut.contains("Done.")); assertTrue(Files.exists(doublePhenotyperOutput)); - assertEquals(Files.readString(singlesPhenotyperOutput), Files.readString(doublePhenotyperOutput)); + StringBuilder singlePhenoJson = new StringBuilder(); + try (Stream lines = Files.lines(singlesPhenotyperOutput)) { + // Process each line + lines.filter(l -> !l.contains("\"timestamp\":")) + .forEach(l -> { + singlePhenoJson.append(l) + .append("\n"); + }); + } + StringBuilder doublePhenoJson = new StringBuilder(); + try (Stream lines = Files.lines(doublePhenotyperOutput)) { + lines.filter(l -> !l.contains("\"timestamp\":")) + .forEach(l -> { + doublePhenoJson.append(l) + .append("\n"); + }); + } + + assertEquals(singlePhenoJson.toString(), doublePhenoJson.toString()); } finally { TestUtils.deleteTestFiles(outputDir); } @@ -535,38 +574,34 @@ void multipleOutsideCallFiles(TestInfo testInfo) throws Exception { Path outsideCallFile2 = PathUtils.getPathToResource("org/pharmgkb/pharmcat/PharmCATTest-outsideCallsNoRecs.tsv"); Path outputDir = TestUtils.getTestOutputDir(testInfo, true); - try { - String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { - "-phenotyper", - "-reporter", - "-reporterJson", - "-po", outsideCallFile1.toString(), - "-po", outsideCallFile2.toString(), - "-o", outputDir.toString(), - })); - assertTrue(systemOut.contains("Done.")); - - // file names should be based on the first outside call file - assertTrue(Files.exists(outputDir.resolve("PharmCATTest-cyp2d6.phenotype.json"))); - assertTrue(Files.exists(outputDir.resolve("PharmCATTest-cyp2d6.report.json"))); - assertTrue(Files.exists(outputDir.resolve("PharmCATTest-cyp2d6.report.html"))); - - // file names should NOT be based on the second outside call file - assertFalse(Files.exists(outputDir.resolve("PharmCATTest-outsideCallsNoRecs.phenotype.json"))); - assertFalse(Files.exists(outputDir.resolve("PharmCATTest-outsideCallsNoRecs.report.json"))); - assertFalse(Files.exists(outputDir.resolve("PharmCATTest-outsideCallsNoRecs.report.html"))); - - Phenotyper phenotyper = Phenotyper.read(outputDir.resolve("PharmCATTest-cyp2d6.phenotype.json")); - checkOutsideDiplotype(phenotyper.findGeneReport(DataSource.CPIC, "CYP2D6").orElse(null), - "*3", "*4"); - checkOutsideDiplotype(phenotyper.findGeneReport(DataSource.CPIC, "CYP4F2").orElse(null), - "*1", "*3"); - checkOutsideDiplotype(phenotyper.findGeneReport(DataSource.CPIC, "IFNL3").orElse(null), - "rs12979860 variant (T)", "rs12979860 variant (T)"); - - } finally { - TestUtils.deleteTestFiles(outputDir); - } + String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { + "-phenotyper", + "-reporter", + "-reporterHtml", + "-reporterJson", + "-po", outsideCallFile1.toString(), + "-po", outsideCallFile2.toString(), + "-o", outputDir.toString(), + })); + assertTrue(systemOut.contains("Done.")); + + // file names should be based on the first outside call file + assertTrue(Files.exists(outputDir.resolve("PharmCATTest-cyp2d6.phenotype.json"))); + assertTrue(Files.exists(outputDir.resolve("PharmCATTest-cyp2d6.report.json"))); + assertTrue(Files.exists(outputDir.resolve("PharmCATTest-cyp2d6.report.html"))); + + // file names should NOT be based on the second outside call file + assertFalse(Files.exists(outputDir.resolve("PharmCATTest-outsideCallsNoRecs.phenotype.json"))); + assertFalse(Files.exists(outputDir.resolve("PharmCATTest-outsideCallsNoRecs.report.json"))); + assertFalse(Files.exists(outputDir.resolve("PharmCATTest-outsideCallsNoRecs.report.html"))); + + Phenotyper phenotyper = Phenotyper.read(outputDir.resolve("PharmCATTest-cyp2d6.phenotype.json")); + checkOutsideDiplotype(phenotyper.findGeneReport(DataSource.CPIC, "CYP2D6").orElse(null), + "*3", "*4"); + checkOutsideDiplotype(phenotyper.findGeneReport(DataSource.CPIC, "CYP4F2").orElse(null), + "*1", "*3"); + checkOutsideDiplotype(phenotyper.findGeneReport(DataSource.CPIC, "IFNL3").orElse(null), + "rs12979860 variant (T)", "rs12979860 variant (T)"); } public static void checkOutsideDiplotype(@Nullable GeneReport report, String allele1, String allele2) { diff --git a/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java b/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java index b49dbafa..9d380ecc 100644 --- a/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java @@ -462,7 +462,7 @@ void testAll(TestInfo testInfo) throws Exception { .reference("TPMT") .reference("UGT1A1") .reference("VKORC1"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testCalledByMatcher( "ABCG2", @@ -491,7 +491,7 @@ void testAll(TestInfo testInfo) throws Exception { @Test void testNoData(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); - testWrapper.execute(null, true); + testWrapper.execute(null, null, true); } @@ -649,7 +649,7 @@ void testCyp2c19(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .variation("CYP2C19", "rs3758581", "G", "G"); - testWrapper.execute(s_otherOutsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_otherOutsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testPrintCpicCalls( "CYP2C19", "*1/*1"); @@ -673,7 +673,7 @@ void testCyp2c19_s1s2rs58973490het(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs58973490", "G", "A") .variation("CYP2C19", "rs4244285", "G", "A") .variation("CYP2C19", "rs3758581", "G", "G"); - testWrapper.execute(s_otherOutsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_otherOutsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testPrintCpicCalls( "CYP2C19", "*1/*2"); @@ -720,7 +720,7 @@ void testCyp2c19_s1s2(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs12769205", "A", "G") .variation("CYP2C19", "rs4244285", "G", "A") .variation("CYP2C19", "rs3758581", "G", "G"); - testWrapper.execute(s_otherOutsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_otherOutsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testPrintCpicCalls( "CYP2C19", "*1/*2"); @@ -767,7 +767,7 @@ void testClomipramineCall(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs12769205", "G", "G") .variation("CYP2C19", "rs4244285", "A", "A") .variation("CYP2C19", "rs3758581", "G", "G"); - testWrapper.execute(s_otherOutsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_otherOutsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testPrintCpicCalls( "CYP2C19", "*2/*2"); @@ -819,7 +819,7 @@ void testCyp2c19noCall(TestInfo testInfo) throws Exception { testWrapper.getVcfBuilder() .variation("CYP2C19", "rs12769205", "A", "G") .variation("CYP2C19", "rs4244285", "A", "A"); - testWrapper.execute(s_otherOutsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_otherOutsideCallFilePath); testWrapper.testNotCalledByMatcher("CYP2C19"); @@ -856,7 +856,7 @@ void testCyp2c19s1s4het(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs12248560", "T", "T") .variation("CYP2C19", "rs28399504", "A", "G") .variation("CYP2C19", "rs3758581", "G", "G"); - testWrapper.execute(s_outsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_outsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testReportable("CYP2D6"); @@ -876,7 +876,7 @@ void testCyp2c19s1s4missingS1(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs12248560", "C", "T") .variation("CYP2C19", "rs28399504", "A", "G") .missing("CYP2C19", "rs3758581"); - testWrapper.execute(s_outsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_outsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testReportable("CYP2D6"); @@ -914,7 +914,7 @@ void testCyp2c19SingleGeneMatch(TestInfo testInfo) throws Exception { .reference("CYP2C19") .variation("CYP2C19", "rs3758581", "A", "G") .missing("CYP2C19", "rs56337013"); - testWrapper.execute(s_outsideCallFilePath); + testWrapper.executeWithOutsideCalls(s_outsideCallFilePath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testReportable("CYP2D6"); @@ -1517,7 +1517,7 @@ void testHlab(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C9"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testCalledByMatcher("CYP2C9"); testWrapper.testReportable("CYP2C9"); @@ -1556,7 +1556,7 @@ void testSingleHlabAllele(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C9"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testNotCalledByMatcher("HLA-B"); testWrapper.testReportable("HLA-B"); @@ -1591,7 +1591,7 @@ void testHlabPhenotype(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C9"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testCalledByMatcher("CYP2C9"); testWrapper.testNotCalledByMatcher("HLA-B"); @@ -1639,7 +1639,7 @@ void testRecommendationExamples(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs12769205", "G", "G") .variation("CYP2C19", "rs4244285", "A", "A") .variation("CYP2C19", "rs3758581", "G", "G"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testRecommendedDiplotypes("CYP2C19", "*2", "*2"); testWrapper.testPrintCpicCalls("CYP2C19", "*2/*2"); @@ -1782,7 +1782,7 @@ void testMtrnr1(TestInfo testInfo) throws Exception { .reference("CYP2C19") .reference("CYP2C9") ; - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testCalledByMatcher("CYP2C9"); @@ -1850,7 +1850,7 @@ void testPartialCallInTwoGene(TestInfo testInfo) throws Exception { .variation("CYP2C19", "rs367543002", "C", "T") .variation("CYP2C19", "rs3758581", "G", "G") .missing("CYP2C19", "rs367543003"); - testWrapper.execute(s_outsideCallFilePath); //CYP2D6 *1/*4 + testWrapper.executeWithOutsideCalls(s_outsideCallFilePath); //CYP2D6 *1/*4 testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testReportable("CYP2C19"); @@ -1876,7 +1876,7 @@ void testOutsideCallCollision(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testNotCalledByMatcher("CYP2C19"); // this is the diplotype indicated in the outside call, not the one matched @@ -1904,7 +1904,7 @@ void outsideCallCollision2Files(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C9"); - Path vcfFile = testWrapper.execute(outsideCallPath1, outsideCallPath2); + Path vcfFile = testWrapper.executeWithOutsideCalls(outsideCallPath1, outsideCallPath2); // this is an outside calls testWrapper.testNotCalledByMatcher("CYP4F2"); @@ -1934,7 +1934,7 @@ void testOutsideCallDiplotypeNormalization(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C9"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testNotCalledByMatcher("CYP2C19"); // this should be a normalized version of the given diplotype @@ -1951,7 +1951,7 @@ void testOutsideCallPhenotypeOverridesDiplotype(TestInfo testInfo) throws Except PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); GeneReport geneReport = testWrapper.getContext().getGeneReport(DataSource.CPIC, "CYP2D6"); assertNotNull(geneReport); @@ -1987,7 +1987,7 @@ void testOutsideCallActivityScore(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testPrintCalls(DataSource.CPIC, "CYP2C19", "*38/*38"); @@ -2016,7 +2016,7 @@ void testOutsideCallActivityScoreAndPhenotype(TestInfo testInfo) throws Exceptio PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C19"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); testWrapper.testCalledByMatcher("CYP2C19"); testWrapper.testPrintCalls(DataSource.CPIC, "CYP2C19", "*38/*38"); @@ -2088,7 +2088,7 @@ void testOutsideSinglePositionCalls(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); testWrapper.getVcfBuilder() .reference("CYP2C9"); - testWrapper.execute(outsideCallPath); + testWrapper.executeWithOutsideCalls(outsideCallPath); // these are outside calls testWrapper.testNotCalledByMatcher("IFNL3"); diff --git a/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java b/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java index 62ae268c..f8c956e9 100644 --- a/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java +++ b/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -38,7 +39,7 @@ * * @author Mark Woon */ -class PipelineWrapper { +public class PipelineWrapper { // controls to support running PipelineTest from SyntheticBatchTest private static boolean m_compact = true; private static List m_sources = PrescribingGuidanceSource.listValues(); @@ -63,16 +64,16 @@ static void setSources(List sources) { } - PipelineWrapper(TestInfo testInfo, boolean allMatches) throws IOException, ReportableException { + public PipelineWrapper(TestInfo testInfo, boolean allMatches) throws IOException, ReportableException { this(testInfo, false, allMatches, false); } - PipelineWrapper(TestInfo testInfo, boolean findCombinations, boolean allMatches, boolean callCyp2d6) + public PipelineWrapper(TestInfo testInfo, boolean findCombinations, boolean allMatches, boolean callCyp2d6) throws IOException, ReportableException { this(testInfo, null, findCombinations, allMatches, callCyp2d6); } - PipelineWrapper(TestInfo testInfo, @Nullable String name, boolean findCombinations, boolean allMatches, + public PipelineWrapper(TestInfo testInfo, @Nullable String name, boolean findCombinations, boolean allMatches, boolean callCyp2d6) throws IOException, ReportableException { Preconditions.checkNotNull(testInfo); @@ -96,7 +97,7 @@ PipelineWrapper extendedReport() { return this; } - PipelineWrapper saveIntermediateFiles() { + public PipelineWrapper saveIntermediateFiles() { m_deleteIntermediateFiles = false; return this; } @@ -105,22 +106,43 @@ ReportContext getContext() { return m_reportContext; } - TestVcfBuilder getVcfBuilder() { + public TestVcfBuilder getVcfBuilder() { return m_vcfBuilder; } - @Nullable Path execute(Path... outsideCallPath) throws Exception { + + public @Nullable Path executeWithOutsideCalls(Path... outsideCallPath) throws Exception { if (outsideCallPath == null || outsideCallPath.length == 0) { - return execute(null, false); + return execute(); } - return execute(ImmutableList.copyOf(outsideCallPath), false); + return execute(null, ImmutableList.copyOf(outsideCallPath), false); } - @Nullable Path execute(@Nullable List outsideCallPaths, boolean allowNoData) throws Exception { - Path vcfFile = null; + /** + * Execute the pipeline with the specified VCF file (this file will be copied to the output location). + * + * @return path to actual VCF used + */ + public Path executeWithVcf(Path vcfFile) throws Exception { + return execute(vcfFile, null, false); + } + + public @Nullable Path execute() throws Exception { + return execute(null, null, false); + } + + + @Nullable Path execute(@Nullable Path vcfFile, @Nullable List outsideCallPaths, boolean allowNoData) + throws Exception { VcfFile vcfFileObj = null; boolean runMatcher = false; - if (m_vcfBuilder.hasData() || allowNoData) { + if (vcfFile != null) { + runMatcher = true; + Path copy = m_outputPath.resolve(vcfFile.getFileName()); + Files.copy(vcfFile, copy, StandardCopyOption.REPLACE_EXISTING); + vcfFile = copy; + vcfFileObj = new VcfFile(vcfFile, false); + } else if (m_vcfBuilder.hasData() || allowNoData) { runMatcher = true; vcfFile = m_vcfBuilder.generate(); vcfFileObj = new VcfFile(vcfFile, false); @@ -143,6 +165,10 @@ public Env getEnv() { return m_env; } + public Path getOutputDir() { + return m_outputPath; + } + private List stripHomozygousNotes(List calls) { return calls.stream() diff --git a/src/test/java/org/pharmgkb/pharmcat/TestUtils.java b/src/test/java/org/pharmgkb/pharmcat/TestUtils.java index 36c5d790..69d05dc9 100644 --- a/src/test/java/org/pharmgkb/pharmcat/TestUtils.java +++ b/src/test/java/org/pharmgkb/pharmcat/TestUtils.java @@ -46,9 +46,14 @@ public static String getTestName(TestInfo testInfo) { return testInfo.getDisplayName().replace("(TestInfo)", ""); } + public static String getFullTestName(TestInfo testInfo) { + //noinspection OptionalGetWithoutIsPresent + return testInfo.getTestClass().get().getSimpleName() + "-" + testInfo.getTestMethod().get().getName(); + } + /** - * Checks if test is running in a continuous integration environment. + * Checks if the test is running in a continuous integration environment. * This is determined based on the `CI` * environment variable on GH Actions. @@ -98,7 +103,7 @@ private static Path getDefaultTestOutputDir() { * Gets the output directory for the given test. * Directory is guaranteed to exist. * - * @param deleteIfExist if directory exists, it will be deleted and re-created + * @param deleteIfExist if the directory exists, it will be deleted and re-created */ public static Path getTestOutputDir(TestInfo testInfo, boolean deleteIfExist) throws IOException { return getTestOutputDir(testInfo, null, deleteIfExist); @@ -133,7 +138,7 @@ public static Path getTestOutputDir(TestInfo testInfo, @Nullable String subName, * Gets the output directory for the given class. * Directory is guaranteed to exist. * - * @param deleteIfExist if directory exists, it will be deleted and re-created + * @param deleteIfExist if the directory exists, it will be deleted and re-created */ public static Path getTestOutputDir(Class testClass, boolean deleteIfExist) throws IOException { Path dir = s_testOutputDir.resolve(testClass.getSimpleName()); diff --git a/src/test/java/org/pharmgkb/pharmcat/phenotype/PhenotyperTest.java b/src/test/java/org/pharmgkb/pharmcat/phenotype/PhenotyperTest.java index 0921811d..83713601 100644 --- a/src/test/java/org/pharmgkb/pharmcat/phenotype/PhenotyperTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/phenotype/PhenotyperTest.java @@ -44,7 +44,7 @@ void testCyp2C19Het() throws Exception { warnings.put("chr10:94775453", ImmutableList.of("Test warning message")); warnings.put("chr10:94852914", ImmutableList.of("Test other message")); - Phenotyper phenotyper = new Phenotyper(s_env, + Phenotyper phenotyper = new Phenotyper(s_env, null, readMatchData("Cyp2C19Het.match.json"), OutsideCallParser.parse(s_env, "CYP2D6\t*1/*3"), warnings); @@ -64,7 +64,7 @@ void testCyp2C19Het() throws Exception { @Test void testCyp2D6Only() throws Exception { - Phenotyper phenotyper = new Phenotyper(s_env, + Phenotyper phenotyper = new Phenotyper(s_env, null, new ArrayList<>(), OutsideCallParser.parse(s_env, "CYP2D6\t*1/*3"), null); @@ -95,7 +95,7 @@ void testCyp2D6Only() throws Exception { @Test void testCyp2C19Hom() throws Exception { - Phenotyper phenotyper = new Phenotyper(s_env, + Phenotyper phenotyper = new Phenotyper(s_env, null, readMatchData("Cyp2C19s2s2.match.json"), new HashSet<>(), null); @@ -106,7 +106,7 @@ void testCyp2C19Hom() throws Exception { @Test void testUGT1A1Phased() throws Exception { - Phenotyper phenotyper = new Phenotyper(s_env, + Phenotyper phenotyper = new Phenotyper(s_env, null, readMatchData("UGT1A1s1s60s80phased.match.json"), new HashSet<>(), null); @@ -119,7 +119,7 @@ void testUGT1A1Phased() throws Exception { @Test void testUGT1A1Unphased() throws Exception { - Phenotyper phenotyper = new Phenotyper(s_env, + Phenotyper phenotyper = new Phenotyper(s_env, null, readMatchData("UGT1A1s1s60s80unphased.match.json"), new HashSet<>(), null); @@ -132,7 +132,7 @@ void testUGT1A1Unphased() throws Exception { @Test void testNUDT15() throws Exception { - Phenotyper phenotyper = new Phenotyper(s_env, + Phenotyper phenotyper = new Phenotyper(s_env, null, readMatchData("NUDT15ref.match.json"), new HashSet<>(), null); @@ -144,7 +144,7 @@ void testNUDT15() throws Exception { @Test void testNUDT15star3() throws Exception { - Phenotyper phenotyper = new Phenotyper(s_env, + Phenotyper phenotyper = new Phenotyper(s_env, null, readMatchData("NUDT15s3.match.json"), new HashSet<>(), null); diff --git a/src/test/java/org/pharmgkb/pharmcat/reporter/ReporterTest.java b/src/test/java/org/pharmgkb/pharmcat/reporter/ReporterTest.java index 7306db4a..b1d12d10 100644 --- a/src/test/java/org/pharmgkb/pharmcat/reporter/ReporterTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/reporter/ReporterTest.java @@ -50,7 +50,7 @@ void deleteDirectory(TestInfo testInfo) { void cypc2c9VariantPassthrough() throws Exception { Phenotyper phenotyper = Phenotyper.read(PathUtils.getPathToResource("org/pharmgkb/pharmcat/reporter/ReporterTest-cypc2c9VariantPassthrough.json")); - ReportContext reportContext = new ReportContext(m_env, phenotyper.getGeneReports(), null); + ReportContext reportContext = new ReportContext(m_env, phenotyper, null); // test the CYP2C9 data GeneReport geneReport = reportContext.getGeneReport(DataSource.CPIC, "CYP2C9"); @@ -85,7 +85,7 @@ void multipleActivityScores(TestInfo testInfo) throws Exception { Phenotyper phenotyper = Phenotyper.read( PathUtils.getPathToResource("org/pharmgkb/pharmcat/reporter/ReporterTest-multipleActivityScores.json")); - ReportContext reportContext = new ReportContext(new Env(), phenotyper.getGeneReports(), null); + ReportContext reportContext = new ReportContext(new Env(), phenotyper, null); // test the CYP2C9 data GeneReport geneReport = reportContext.getGeneReport(DataSource.CPIC, "CYP2C9"); @@ -138,7 +138,7 @@ void multiplePhenotypes(TestInfo testInfo) throws Exception { Phenotyper phenotyper = Phenotyper.read( PathUtils.getPathToResource("org/pharmgkb/pharmcat/reporter/ReporterTest-multiplePhenotypes.json")); - ReportContext reportContext = new ReportContext(new Env(), phenotyper.getGeneReports(), null); + ReportContext reportContext = new ReportContext(new Env(), phenotyper, null); // test the CYP2C9 data GeneReport geneReport = reportContext.getGeneReport(DataSource.CPIC, "CYP2C9"); diff --git a/src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java b/src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java new file mode 100644 index 00000000..da7b6a89 --- /dev/null +++ b/src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java @@ -0,0 +1,150 @@ +package org.pharmgkb.pharmcat.reporter.format; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; +import org.pharmgkb.common.util.PathUtils; +import org.pharmgkb.pharmcat.BaseConfig; +import org.pharmgkb.pharmcat.PipelineWrapper; + +import static org.junit.jupiter.api.Assertions.*; +import static org.pharmgkb.pharmcat.reporter.model.result.Haplotype.UNKNOWN; + + +/** + * This is a JUnit test for {@link CallsOnlyFormat}. + * + * @author Mark Woon + */ +class CallsOnlyFormatTest { + + + @Test + void reference(TestInfo testInfo) throws Exception { + PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false) + .saveIntermediateFiles() + ; + Path vcfFile = testWrapper.executeWithVcf(PathUtils.getPathToResource("org/pharmgkb/pharmcat/reference.vcf")); + + String basename = BaseConfig.getBaseFilename(Objects.requireNonNull(vcfFile).getFileName()); + Path normalFile = testWrapper.getOutputDir().resolve(basename + BaseConfig.REPORTER_SUFFIX + ".tsv"); + String normalTsv = Files.readString(normalFile); + String[] lines = normalTsv.split("\n"); + Map> geneMap = parseTsv(lines, 16); + assertTrue(geneMap.size() >= 18); + } + + + @Test + void dpydHaplotypes(TestInfo testInfo) throws Exception { + + PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false) + .saveIntermediateFiles(); + testWrapper.getVcfBuilder() + .variation("DPYD", "rs72547601", "C", "C") // c.2933A>G - no function + .variation("DPYD", "rs67376798", "A", "T") // c.2846A>T - decreased + .variation("DPYD", "rs60139309", "T", "C") // c.2582A>G - normal + ; + Path vcfFile = testWrapper.execute(); + + List expectedCalls = List.of("c.2582A>G", "c.2846A>T", "c.2933A>G/c.2933A>G"); + + String basename = BaseConfig.getBaseFilename(Objects.requireNonNull(vcfFile).getFileName()); + Path normalFile = vcfFile.getParent().resolve(basename + BaseConfig.REPORTER_SUFFIX + ".tsv"); + String normalTsv = Files.readString(normalFile); + String[] lines = normalTsv.split("\n"); + Map> geneMap = parseTsv(lines, 16, "DPYD"); + assertEquals(1, geneMap.get("DPYD").size()); + checkTextContains(geneMap.get("DPYD").get(0), expectedCalls); + } + + + @Test + void multipleDiplotypes(TestInfo testInfo) throws Exception { + + PipelineWrapper testWrapper = new PipelineWrapper(testInfo, true) + .saveIntermediateFiles(); + testWrapper.getVcfBuilder() + .variation("DPYD", "rs72547601", "C", "C") // c.2933A>G - no function + .variation("DPYD", "rs67376798", "A", "T") // c.2846A>T - decreased + .variation("DPYD", "rs60139309", "T", "C") // c.2582A>G - normal + .missing("CYP2C19", + "rs55752064", + "rs1564656981", + "rs55640102") + .variation("CYP2C19", "rs3758581", "G", "G") + ; + Path vcfFile = testWrapper.execute(); + + List expectedDpydCalls = List.of("c.2582A>G", "c.2846A>T", "c.2933A>G/c.2933A>G"); + + String basename = BaseConfig.getBaseFilename(Objects.requireNonNull(vcfFile).getFileName()); + Path normalFile = vcfFile.getParent().resolve(basename + BaseConfig.REPORTER_SUFFIX + ".tsv"); + String normalTsv = Files.readString(normalFile); + + System.out.println("normal"); + System.out.println(normalTsv); + String[] lines = normalTsv.split("\n"); + assertEquals(3, lines.length); + Map> geneMap = parseTsv(lines, 16, "CYP2C19", "DPYD"); + assertEquals(1, geneMap.get("CYP2C19").size()); + assertEquals(1, geneMap.get("DPYD").size()); + + checkTextContains(geneMap.get("DPYD").get(0), expectedDpydCalls); + String[] dpydRow = geneMap.get("DPYD").get(0).split("\t"); + // no phenotype and activity score, but recommendation phenotype and activity score + assertTrue(StringUtils.isBlank(dpydRow[2])); + assertTrue(StringUtils.isBlank(dpydRow[3])); + System.out.println(dpydRow[14]); + System.out.println(dpydRow[15]); + assertTrue(StringUtils.isNotBlank(dpydRow[14])); + assertTrue(StringUtils.isNotBlank(dpydRow[15])); + } + + + private Map> parseTsv(String[] lines, int maxColumns, String... genes) { + // test normal + Map> geneMap = new HashMap<>(); + for (String line : lines) { + String[] data = line.split("\t"); + if (data.length != maxColumns) { + StringBuilder builder = new StringBuilder(); + for (int x = 0; x < data.length; x += 1) { + builder.append(x) + .append(" - ") + .append(data[x]) + .append("\n"); + } + assertEquals(maxColumns, data.length, "Found columns:\n" + builder); + } + if (line.startsWith("Gene\t")) { + continue; + } + geneMap.computeIfAbsent(data[0], k -> new ArrayList<>()) + .add(line); + } + if (genes != null && genes.length > 0) { + assertEquals(genes.length, geneMap.keySet().stream() + .map(g -> geneMap.get(g).get(0)) + .filter(n -> !n.contains(UNKNOWN)) + .count()); + for (String gene : genes) { + assertNotNull(geneMap.get(gene), "Missing data for " + gene); + } + } + return geneMap; + } + + private void checkTextContains(String text, List calls) { + for (String call : calls) { + assertTrue(text.contains(call)); + } + } +}