diff --git a/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java b/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java index ce3cfb61..a2273fa5 100644 --- a/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java +++ b/src/main/java/org/pharmgkb/pharmcat/BaseConfig.java @@ -54,6 +54,7 @@ public class BaseConfig { boolean deleteIntermediateFiles; boolean verbose; SortedSet samples = new TreeSet<>(); + Path sampleMetadataFile; BaseConfig(CliHelper cliHelper) throws IOException, ReportableException { @@ -94,6 +95,9 @@ public class BaseConfig { } } } + if (cliHelper.hasOption("sm")) { + sampleMetadataFile = cliHelper.getValidFile("sm", true); + } boolean researchMode = false; if (runMatcher) { diff --git a/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java b/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java index 4612a225..ca20a565 100644 --- a/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java +++ b/src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java @@ -49,6 +49,7 @@ public static void main(String[] args) { .addOption("i", "input-dir", "Directory containing source data files", false, "dir") .addOption("s", "samples", "Comma-separated list of samples", false, "samples") .addOption("S", "sample-file", "File containing a list of sample, one per line", false, "file") + .addOption("sm", "sample-metadata", "TSV containing sample metadata", false, "file") // named allele matcher args .addOption("matcher", "matcher", "Run named allele matcher independently") @@ -401,7 +402,7 @@ public Pipeline build(Env env, int index, int totalTasks) throws ReportableExcep m_config.reporterSources, m_config.reporterCompact, m_config.reporterJson, m_config.reporterHtml, m_config.reporterCallsOnlyTsv, m_config.outputDir, m_config.baseFilename, m_config.deleteIntermediateFiles, - mode, (index + "/" + totalTasks), m_verbose); + mode, (index + "/" + totalTasks), m_verbose, null); } diff --git a/src/main/java/org/pharmgkb/pharmcat/Constants.java b/src/main/java/org/pharmgkb/pharmcat/Constants.java index 07ab0390..62594613 100644 --- a/src/main/java/org/pharmgkb/pharmcat/Constants.java +++ b/src/main/java/org/pharmgkb/pharmcat/Constants.java @@ -1,5 +1,6 @@ package org.pharmgkb.pharmcat; +import java.util.Collections; import java.util.List; import java.util.Set; import java.util.SortedSet; @@ -97,7 +98,7 @@ public static boolean isActivityScoreGene(String gene, DataSource dataSource) { Set genes = switch (dataSource) { case CPIC -> ACTIVITY_SCORE_GENES_CPIC; case DPWG -> ACTIVITY_SCORE_GENES_DPWG; - default -> throw new RuntimeException("No genes specified for " + dataSource); + default -> Collections.emptySet(); }; return gene != null && genes.contains(gene.toUpperCase()); } diff --git a/src/main/java/org/pharmgkb/pharmcat/Env.java b/src/main/java/org/pharmgkb/pharmcat/Env.java index c78c4f35..7d4bd7c7 100644 --- a/src/main/java/org/pharmgkb/pharmcat/Env.java +++ b/src/main/java/org/pharmgkb/pharmcat/Env.java @@ -1,10 +1,13 @@ package org.pharmgkb.pharmcat; +import java.io.BufferedReader; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.Map; import java.util.Optional; +import java.util.TreeMap; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import org.checkerframework.checker.nullness.qual.Nullable; @@ -33,6 +36,7 @@ public class Env { private MessageHelper m_messageHelper; private final Map>> m_haplotypeCache = new HashMap<>(); private final Multimap m_validHaplotypes = HashMultimap.create(); + private final Map>> m_sampleDataMap = new HashMap<>(); public Env() throws IOException, ReportableException { @@ -189,4 +193,41 @@ public synchronized Haplotype makeHaplotype(String gene, String name, DataSource return haplotype; }); } + + + public synchronized @Nullable Map getSampleMetadata(Path sampleMetadataFile, String sampleId, + boolean cache) throws IOException { + + Map> fileMap; + if (cache) { + fileMap = m_sampleDataMap.computeIfAbsent(sampleMetadataFile, f -> new TreeMap<>()); + if (!fileMap.isEmpty()) { + return fileMap.get(sampleId); + } + } else { + fileMap = new TreeMap<>(); + } + try (BufferedReader reader = Files.newBufferedReader(sampleMetadataFile)) { + String line; + while ((line = reader.readLine()) != null) { + String[] row = line.split("\t"); + if (row.length >= 3) { + String sid = row[0]; + if (cache) { + fileMap.computeIfAbsent(sid, k -> new HashMap<>()) + .put(row[1], row[2]); + } else { + if (sid.equals(sampleId)) { + fileMap.computeIfAbsent(sid, k -> new HashMap<>()) + .put(row[1], row[2]); + } else if (fileMap.containsKey(sid)) { + // all values for a single sample must be consecutive + break; + } + } + } + } + } + return fileMap.get(sampleId); + } } diff --git a/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java b/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java index 4c5a001e..2fcf8da6 100644 --- a/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java +++ b/src/main/java/org/pharmgkb/pharmcat/PharmCAT.java @@ -28,6 +28,7 @@ public static void main(String[] args) { // inputs .addOption("s", "samples", "Comma-separated list of samples", false, "samples") .addOption("S", "sample-file", "File containing a list of sample, one per line", false, "file") + .addOption("sm", "sample-metadata", "TSV containing sample metadata", false, "file") // named allele matcher args .addOption("matcher", "matcher", "Run named allele matcher independently") diff --git a/src/main/java/org/pharmgkb/pharmcat/Pipeline.java b/src/main/java/org/pharmgkb/pharmcat/Pipeline.java index b63f8cf2..774b36ae 100644 --- a/src/main/java/org/pharmgkb/pharmcat/Pipeline.java +++ b/src/main/java/org/pharmgkb/pharmcat/Pipeline.java @@ -61,6 +61,7 @@ public enum Mode { private Path m_matcherHtmlFile; /** True if the VCF file only contains a single sample. */ private final boolean m_singleSample; + private final Path m_sampleMetadataFile; private final boolean m_runPhenotyper; private Path m_phenotyperInputFile; @@ -97,7 +98,8 @@ public Pipeline(Env env, BaseConfig config, @Nullable VcfFile vcfFile, config.reporterSources, config.reporterCompact, config.reporterJson, config.reporterHtml, config.reporterCallsOnlyTsv, config.outputDir, config.baseFilename, config.deleteIntermediateFiles, - Pipeline.Mode.CLI, null, config.verbose); + Pipeline.Mode.CLI, null, config.verbose, + config.sampleMetadataFile); } @@ -109,11 +111,13 @@ public Pipeline(Env env, @Nullable List reporterSources, boolean reporterCompact, boolean reporterJson, boolean reporterHtml, boolean reporterCallsOnlyTsv, @Nullable Path outputDir, @Nullable String baseFilename, boolean deleteIntermediateFiles, - Mode mode, @Nullable String displayCount, boolean verbose) throws ReportableException { + Mode mode, @Nullable String displayCount, boolean verbose, + @Nullable Path sampleMetadataFile) throws ReportableException { m_env = env; m_runMatcher = runMatcher; m_baseDir = outputDir; + m_sampleMetadataFile = sampleMetadataFile; if (runMatcher) { m_vcfFile = Objects.requireNonNull(vcfFile); m_sampleId = sampleId; @@ -268,7 +272,7 @@ public PipelineResult call() throws IOException { if (!batchDisplayMode) { namedAlleleMatcher.printWarnings(); } - matcherResult = namedAlleleMatcher.call(m_vcfFile, m_sampleId); + matcherResult = namedAlleleMatcher.call(m_vcfFile, m_sampleId, m_sampleMetadataFile); if (matcherResult.getVcfWarnings() != null && !matcherResult.getVcfWarnings().isEmpty()) { diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java index 11859f14..2f817a31 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java @@ -214,7 +214,7 @@ private void test(String gene, NamedAlleleMatcher namedAlleleMatcher, VcfFile vc boolean hasUnknownCall = expectedAlleles.contains("?"); boolean hasComboCall = !hasUnknownCall && vcfFile.getFile().getFileName().toString().contains("noCall"); - Result result = namedAlleleMatcher.call(vcfFile, null); + Result result = namedAlleleMatcher.call(vcfFile, null, null); if (isLowestFunctionGene(gene)) { if (result.getGeneCalls().get(0).getDiplotypes().isEmpty()) { diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java index 2576af40..e95a6f23 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java @@ -138,7 +138,7 @@ public static void main(String[] args) { NamedAlleleMatcher namedAlleleMatcher = new NamedAlleleMatcher(new Env(), definitionReader, findCombinations, topCandidateOnly, callCyp2d6) .printWarnings(); - Result result = namedAlleleMatcher.call(new VcfFile(vcfFile), null); + Result result = namedAlleleMatcher.call(new VcfFile(vcfFile), null, null); Path jsonFile = CliUtils.getOutputFile(cliHelper, vcfFile, "json", BaseConfig.MATCHER_SUFFIX + ".json"); ResultSerializer resultSerializer = new ResultSerializer(); @@ -182,10 +182,18 @@ private boolean getTopCandidateOnly(String gene) { * Calls diplotypes for the given VCF file for all genes for which a definition exists. */ public Result call(VcfFile vcfFile, @Nullable String sampleId) throws IOException { + return call(vcfFile, sampleId, null); + } + + /** + * Calls diplotypes for the given VCF file for all genes for which a definition exists. + */ + public Result call(VcfFile vcfFile, @Nullable String sampleId, @Nullable Path sampleMetadataFile) throws IOException { VcfReader vcfReader = vcfFile.getReader(m_definitionReader, sampleId, m_findCombinations); SortedMap alleleMap = vcfReader.getAlleleMap(); ResultBuilder resultBuilder = new ResultBuilder(m_definitionReader, m_topCandidateOnly, m_findCombinations, m_callCyp2d6) - .forFile(vcfFile, vcfReader.getWarnings().asMap()); + .forFile(vcfFile, vcfReader.getWarnings().asMap(), vcfReader.getSampleId(), sampleMetadataFile); + if (m_printWarnings) { vcfReader.getWarnings().keySet() .forEach(key -> { @@ -207,7 +215,7 @@ public Result call(VcfFile vcfFile, @Nullable String sampleId) throws IOExceptio callAssumingReference(vcfReader.getSampleId(), alleleMap, gene, resultBuilder); } } - return resultBuilder.build(); + return resultBuilder.build(m_env); } /** diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java index ae91cc3e..8079fe24 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java @@ -1,5 +1,7 @@ package org.pharmgkb.pharmcat.haplotype; +import java.io.IOException; +import java.nio.file.Path; import java.util.Collection; import java.util.Date; import java.util.List; @@ -10,6 +12,7 @@ import com.google.common.base.Preconditions; import org.checkerframework.checker.nullness.qual.Nullable; import org.pharmgkb.common.util.PathUtils; +import org.pharmgkb.pharmcat.Env; import org.pharmgkb.pharmcat.VcfFile; import org.pharmgkb.pharmcat.definition.DefinitionReader; import org.pharmgkb.pharmcat.definition.model.DefinitionFile; @@ -35,6 +38,7 @@ public class ResultBuilder { private final boolean m_topCandidatesOnly; private final boolean m_findCombinations; private final boolean m_callCyp2d6; + private Path m_sampleMetadataFile; public ResultBuilder(DefinitionReader definitionReader, boolean topCandidatesOnly, boolean findCombinations, @@ -46,19 +50,30 @@ public ResultBuilder(DefinitionReader definitionReader, boolean topCandidatesOnl m_callCyp2d6 = callCyp2d6; } - public Result build() { + public Result build(Env env) throws IOException { + if (m_sampleMetadataFile != null) { + Metadata metadata = m_result.getMetadata(); + Map sampleData = env.getSampleMetadata(m_sampleMetadataFile, metadata.getSampleId(), true); + if (sampleData != null && !sampleData.isEmpty()) { + metadata.setSampleProps(sampleData); + } + } return m_result; } - public ResultBuilder forFile(VcfFile vcfFile, Map> warnings) { + public ResultBuilder forFile(VcfFile vcfFile, Map> warnings, String sampleId, + @Nullable Path sampleMetadataFile) { Preconditions.checkNotNull(vcfFile); - m_result.setMetadata(new Metadata(NamedAlleleMatcher.VERSION, m_definitionReader.getGenomeBuild(), - PathUtils.getFilename(vcfFile.getFile()), new Date(), m_topCandidatesOnly, m_findCombinations, m_callCyp2d6)); + Metadata metadata = new Metadata(NamedAlleleMatcher.VERSION, m_definitionReader.getGenomeBuild(), + PathUtils.getFilename(vcfFile.getFile()), new Date(), m_topCandidatesOnly, m_findCombinations, m_callCyp2d6, + sampleId); + m_result.setMetadata(metadata); if (warnings != null) { m_result.setVcfWarnings(warnings); } + m_sampleMetadataFile = sampleMetadataFile; return this; } @@ -74,7 +89,7 @@ protected ResultBuilder gene(String gene, MatchData matchData) { /** - * Adds diplotype results for specified gene. + * Adds diplotype results for a specified gene. */ protected ResultBuilder diplotypes(String gene, MatchData matchData, SortedSet matches) { Preconditions.checkNotNull(gene); @@ -82,7 +97,7 @@ protected ResultBuilder diplotypes(String gene, MatchData matchData, SortedSet matches, @Nullable List warnings) { @@ -100,7 +115,7 @@ protected ResultBuilder diplotypes(String gene, MatchData matchData, SortedSet * This should only be used when we can't get diplotypes but still need to track potential haplotypes (e.g. DPYD). */ @@ -109,7 +124,7 @@ protected ResultBuilder haplotypes(String gene, MatchData matchData, List * This should only be used when we can't get diplotypes but still need to track potential haplotypes (e.g. DPYD). */ diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java index 85580e4d..102a6596 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java @@ -122,7 +122,11 @@ public VcfReader(DefinitionReader definitionReader, Path vcfFile) throws IOExcep } - public @Nullable String getSampleId() { + /** + * Gets the Sample ID of the data to read. + */ + public String getSampleId() { + // this should never be null after read() is called return m_sampleId; } diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Metadata.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Metadata.java index c5516d81..6198127f 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Metadata.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Metadata.java @@ -2,8 +2,10 @@ package org.pharmgkb.pharmcat.haplotype.model; import java.util.Date; +import java.util.Map; import com.google.gson.annotations.Expose; import com.google.gson.annotations.SerializedName; +import org.checkerframework.checker.nullness.qual.Nullable; public class Metadata { @@ -18,7 +20,7 @@ public class Metadata { private String m_inputFilename; @Expose @SerializedName("timestamp") - private Date m_timetamp; + private Date m_timestamp; @Expose @SerializedName("topCandidatesOnly") private boolean m_topCandidatesOnly; @@ -28,17 +30,24 @@ public class Metadata { @Expose @SerializedName("callCyp2d") private boolean m_callCyp2d6; + @Expose + @SerializedName("sampleId") + private String m_sampleId; + @Expose + @SerializedName("sampleProps") + private Map m_sampleProps; public Metadata(String namedAlleleMatcherVersion, String genomeBuild, String vcfFilename, Date date, - boolean topCandidatesOnly, boolean findCombinations, boolean callCyp2d6) { + boolean topCandidatesOnly, boolean findCombinations, boolean callCyp2d6, String sampleId) { m_namedAlleleMatcherVersion = namedAlleleMatcherVersion; m_genomeBuild = genomeBuild; m_inputFilename = vcfFilename; - m_timetamp = date; + m_timestamp = date; m_topCandidatesOnly = topCandidatesOnly; m_findCombinations = findCombinations; m_callCyp2d6 = callCyp2d6; + m_sampleId = sampleId; } @@ -54,8 +63,8 @@ public String getInputFilename() { return m_inputFilename; } - public Date getTimetamp() { - return m_timetamp; + public Date getTimestamp() { + return m_timestamp; } public boolean isTopCandidatesOnly() { @@ -69,4 +78,21 @@ public boolean isFindCombinations() { public boolean isCallCyp2d6() { return m_callCyp2d6; } + + + public String getSampleId() { + return m_sampleId; + } + + public void setSampleId(String sampleId) { + m_sampleId = sampleId; + } + + public @Nullable Map getSampleProps() { + return m_sampleProps; + } + + public void setSampleProps(Map sampleProps) { + m_sampleProps = sampleProps; + } } diff --git a/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java b/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java index 6550ba29..8d8d2c58 100644 --- a/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java +++ b/src/main/java/org/pharmgkb/pharmcat/reporter/ReportContext.java @@ -74,7 +74,7 @@ public ReportContext(Env env, Phenotyper phenotyper, String title) throws IOExce m_matcherMetadata = phenotyper.getMatcherMetadata(); m_geneReports = phenotyper.getGeneReports(); m_dataVersion = validateVersions(env.getDrugs()); - if (!phenotyper.getUnannotatedGeneCalls().isEmpty()) { + if (phenotyper.getUnannotatedGeneCalls() != null && !phenotyper.getUnannotatedGeneCalls().isEmpty()) { m_unannotatedGeneCalls.addAll(phenotyper.getUnannotatedGeneCalls()); } diff --git a/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java b/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java index f3cbd201..f1b68de7 100644 --- a/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java +++ b/src/main/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormat.java @@ -6,26 +6,23 @@ import java.nio.file.Files; import java.nio.file.OpenOption; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardOpenOption; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.TreeSet; +import java.util.SortedMap; +import java.util.TreeMap; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import org.pharmgkb.pharmcat.BaseConfig; +import org.checkerframework.checker.nullness.qual.Nullable; import org.pharmgkb.pharmcat.Env; -import org.pharmgkb.pharmcat.haplotype.ResultSerializer; -import org.pharmgkb.pharmcat.phenotype.Phenotyper; import org.pharmgkb.pharmcat.reporter.ReportContext; import org.pharmgkb.pharmcat.reporter.TextConstants; import org.pharmgkb.pharmcat.reporter.model.DataSource; import org.pharmgkb.pharmcat.reporter.model.result.Diplotype; import org.pharmgkb.pharmcat.reporter.model.result.GeneReport; +import static org.pharmgkb.pharmcat.Constants.isActivityScoreGene; import static org.pharmgkb.pharmcat.Constants.isLowestFunctionGene; @@ -36,6 +33,7 @@ */ public class CallsOnlyFormat extends AbstractFormat { private boolean m_singleFileMode; + private boolean m_showSampleId = true; public CallsOnlyFormat(Path outputPath, Env env) { @@ -50,6 +48,11 @@ public CallsOnlyFormat singleFileMode() { return this; } + public CallsOnlyFormat hideSampleId() { + m_showSampleId = false; + return this; + } + @Override public void write(ReportContext reportContext) throws IOException { @@ -81,54 +84,80 @@ public void write(ReportContext reportContext) throws IOException { calledGenes.put(gene, primary); } - + String sampleId = null; + SortedMap sampleProps = null; + if (reportContext.getMatcherMetadata() != null) { + sampleId = reportContext.getMatcherMetadata().getSampleId(); + if (reportContext.getMatcherMetadata().getSampleProps() != null && + !reportContext.getMatcherMetadata().getSampleProps().isEmpty()) { + sampleProps = new TreeMap<>(reportContext.getMatcherMetadata().getSampleProps()); + } + } try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(getOutputPath(), StandardCharsets.UTF_8, options))) { if (printHeaders) { - writer.println("Gene\tSource Diplotype\tPhenotype\tActivity Score" + + if (m_singleFileMode && m_showSampleId) { + writer.print("Sample ID\t"); + } + writer.print("Gene\tSource Diplotype\tPhenotype\tActivity Score" + "\tHaplotype 1\tHaplotype 1 Function\tHaplotype 1 Activity Value" + "\tHaplotype 2\tHaplotype 2 Function\tHaplotype 2 Activity Value" + "\tOutside Call\tMatch Score\tMissing positions?\t" + "Recommendation Lookup Diplotype\tRecommendation Lookup Phenotype\tRecommendation Lookup Activity Score"); + if (sampleProps != null) { + for (String key : sampleProps.keySet()) { + writer.print("\t"); + writer.print(key); + } + } + writer.println(); } for (String gene : calledGenes.keySet()) { GeneReport report = calledGenes.get(gene); if (!report.isCalled()) { - writeNoCall(writer, report); + writeNoCall(writer, sampleId, sampleProps, report); continue; } + // only have component haplotypes for lowest function genes when diplotypes are true diplotypes // (vs. individual haplotypes) boolean lowestFunctionSingles = isLowestFunctionGene(gene) && report.getMatcherComponentHaplotypes().isEmpty(); if (report.getSourceDiplotypes().size() > 1 || lowestFunctionSingles) { - writeCollapsedDiplotypes(writer, report, lowestFunctionSingles, true); + writeCollapsedDiplotypes(writer, sampleId, sampleProps, report, lowestFunctionSingles, true); } else { for (Diplotype dip : report.getSourceDiplotypes()) { - writeDiplotype(writer, report, dip, true); + writeDiplotype(writer, sampleId, sampleProps, report, dip, true); } } } for (GeneReport report : reportContext.getUnannotatedGeneCalls()) { if (report.getSourceDiplotypes().size() > 1) { - writeCollapsedDiplotypes(writer, report, false, false); + writeCollapsedDiplotypes(writer, sampleId, sampleProps, report, false, false); } else { for (Diplotype dip : report.getSourceDiplotypes()) { - writeDiplotype(writer, report, dip, false); + writeDiplotype(writer, sampleId, sampleProps, report, dip, false); } } } } } - private void writeNoCall(PrintWriter writer, GeneReport report) { + private void writeNoCall(PrintWriter writer, @Nullable String sampleId, @Nullable Map sampleProps, + GeneReport report) { + if (m_singleFileMode && m_showSampleId) { + if (sampleId != null) { + writer.print(sampleId); + } + writer.print("\t"); + } writer.print(report.getGene()); - writer.print("\tno call\t\t" + + writer.print("\tno call\t\t\t" + "\t\t\t" + "\t\t\t"); - writeCommon(writer, report, null, false); + writeCommon(writer, sampleProps, report, null, false); writer.println(); } @@ -148,13 +177,15 @@ private String generatePhenotypeValue(List phenotypes) { } - private void writeCollapsedDiplotypes(PrintWriter writer, GeneReport report, boolean lowestFunctionSingles, - boolean showRecommendationDiplotype) { + private void writeCollapsedDiplotypes(PrintWriter writer, + @Nullable String sampleId, @Nullable Map sampleProps, + GeneReport report, boolean lowestFunctionSingles, boolean showRecommendationDiplotype) { boolean hasPhenotypes = report.getSourceDiplotypes().stream() .anyMatch(d -> !d.getPhenotypes().isEmpty() && !isIgnorableValue(d.getPhenotypes().get(0))); - boolean hasActivityScores = report.getSourceDiplotypes().stream() - .anyMatch(d -> !isIgnorableValue(d.getActivityScore())); + boolean hasActivityScores = isActivityScoreGene(report.getGene(), report.getPhenotypeSource()) && + report.getSourceDiplotypes().stream() + .anyMatch(d -> !isIgnorableValue(d.getActivityScore())); StringBuilder diplotypes = new StringBuilder(); StringBuilder matchScores = new StringBuilder(); @@ -188,6 +219,12 @@ private void writeCollapsedDiplotypes(PrintWriter writer, GeneReport report, boo } }; + if (m_singleFileMode && m_showSampleId) { + if (sampleId != null) { + writer.print(sampleId); + } + writer.print("\t"); + } writer.print(report.getGene()); writer.print("\t"); writer.print(diplotypes); @@ -199,12 +236,19 @@ private void writeCollapsedDiplotypes(PrintWriter writer, GeneReport report, boo "\t\t\t" + "\t\t\t"); - writeCommon(writer, report, matchScores.toString(), showRecommendationDiplotype); + writeCommon(writer, sampleProps, report, matchScores.toString(), showRecommendationDiplotype); writer.println(); } - private void writeDiplotype(PrintWriter writer, GeneReport report, Diplotype dip, - boolean showRecommendationDiplotype) { + private void writeDiplotype(PrintWriter writer, @Nullable String sampleId, @Nullable Map sampleProps, + GeneReport report, Diplotype dip, boolean showRecommendationDiplotype) { + + if (m_singleFileMode && m_showSampleId) { + if (sampleId != null) { + writer.print(sampleId); + } + writer.print("\t"); + } writer.print(report.getGene()); writer.print("\t"); // diplotype @@ -214,7 +258,7 @@ private void writeDiplotype(PrintWriter writer, GeneReport report, Diplotype dip writer.print(generatePhenotypeValue(dip.getPhenotypes())); writer.print("\t"); // activity score - if (dip.getActivityScore() != null) { + if (isActivityScoreGene(report.getGene(), report.getPhenotypeSource()) && dip.getActivityScore() != null) { writer.print(generateStandardizedValue(dip.getActivityScore())); } writer.print("\t"); @@ -253,13 +297,13 @@ private void writeDiplotype(PrintWriter writer, GeneReport report, Diplotype dip } writer.print("\t"); - writeCommon(writer, report, Integer.toString(dip.getMatchScore()), showRecommendationDiplotype); + writeCommon(writer, sampleProps, report, Integer.toString(dip.getMatchScore()), showRecommendationDiplotype); writer.println(); } - private void writeCommon(PrintWriter writer, GeneReport report, String matchScore, - boolean showRecommendationDiplotype) { + private void writeCommon(PrintWriter writer, @Nullable Map sampleProps, GeneReport report, + String matchScore, boolean showRecommendationDiplotype) { // outside call writer.print(report.isOutsideCall()); writer.print("\t"); @@ -279,10 +323,18 @@ private void writeCommon(PrintWriter writer, GeneReport report, String matchScor writer.print(generatePhenotypeValue(recDip.getPhenotypes())); writer.print("\t"); // recommendation lookup activity score - writer.print(generateStandardizedValue(recDip.getActivityScore())); + if (isActivityScoreGene(report.getGene(), report.getPhenotypeSource())) { + writer.print(generateStandardizedValue(recDip.getActivityScore())); + } } else { writer.print("\t\t"); } + if (sampleProps != null) { + for (String key : sampleProps.keySet()) { + writer.print("\t"); + writer.print(sampleProps.get(key)); + } + } } private String buildDiplotypeName(Diplotype dip, GeneReport geneReport) { @@ -299,79 +351,4 @@ private String buildDiplotypeName(Diplotype dip, GeneReport geneReport) { } return builder.toString(); } - - - public static void main(String[] args) { - - if (args == null || args.length != 2) { - throw new IllegalArgumentException("Please specify an input and output directory"); - } - Path inDir = Paths.get(args[0]); - if (!Files.isDirectory(inDir)) { - throw new IllegalArgumentException("Not a valid directory: " + inDir); - } - Path outDir = Paths.get(args[1]); - if (!Files.isDirectory(outDir) && Files.exists(outDir)) { - throw new IllegalArgumentException("Not a valid directory: " + outDir); - } - - System.out.println("Reading from " + inDir); - System.out.println("Writing to " + outDir); - - try { - if (!Files.exists(outDir)) { - Files.createDirectories(outDir); - } - - Env env = new Env(); - readDir(env, inDir, outDir, 0); - - } catch (Exception ex) { - //noinspection CallToPrintStackTrace - ex.printStackTrace(); - } - } - - private static int readDir(Env env, Path inDir, Path outDir, int index) throws IOException { - - List matchFiles = new ArrayList<>(); - List dirs = new ArrayList<>(); - - try (Stream files = Files.list(inDir)) { - files.forEach(f -> { - if (Files.isDirectory(f)) { - dirs.add(f); - } else if (Files.isRegularFile(f)) { - if (f.toString().endsWith(".match.json")) { - matchFiles.add(f); - } - } - }); - } - - System.out.println("Found " + matchFiles.size() + " in " + inDir); - for (Path d : dirs) { - index = readDir(env, d, outDir, index); - } - for (Path mFile : matchFiles) { - index += 1; - //String basename = BaseConfig.getBaseFilename(mFile); - String basename = "complete"; - if (index % 1000 == 0) { - System.out.println(index); - } - org.pharmgkb.pharmcat.haplotype.model.Result matcherResult = new ResultSerializer() - .fromJson(mFile); - Phenotyper phenotyper = new Phenotyper(env, matcherResult.getMetadata(), matcherResult.getGeneCalls(), - new TreeSet<>(), new HashMap<>()); - - ReportContext reportContext = new ReportContext(env, phenotyper, basename); - - Path outFile = outDir.resolve(basename + BaseConfig.REPORTER_SUFFIX + ".tsv"); - new CallsOnlyFormat(outFile, env) - .singleFileMode() - .write(reportContext); - } - return index; - } } diff --git a/src/test/java/org/pharmgkb/pharmcat/BatchPharmCATTest.java b/src/test/java/org/pharmgkb/pharmcat/BatchPharmCATTest.java index c146025d..17d48f0d 100644 --- a/src/test/java/org/pharmgkb/pharmcat/BatchPharmCATTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/BatchPharmCATTest.java @@ -211,7 +211,7 @@ void sixSamples(TestInfo testInfo) throws Exception { @Test void multisample(TestInfo testInfo) throws Exception { Path na18526Vcf = PathUtils.getPathToResource("org/pharmgkb/pharmcat/PharmCATTest-cyp2c19MissingPositions.vcf"); - Path multisampleVcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.vcf"); + Path multisampleVcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.vcf"); Path[] vcfFiles = new Path[] { multisampleVcfFile, na18526Vcf @@ -258,7 +258,7 @@ void multisample(TestInfo testInfo) throws Exception { allInputs.add(phenotypeFile5); checkForOutputFiles(tmpDir, allInputs.toArray(new Path[0])); - PharmCATTest.validateCyp2d6OutsideCallOutput(tmpDir.resolve("VcfSampleReaderTest.Sample_1.phenotype.json")); + PharmCATTest.validateCyp2d6OutsideCallOutput(tmpDir.resolve("VcfSampleReaderTest-multisample.Sample_1.phenotype.json")); PharmCATTest.validateCyp2d6OutsideCallOutput(tmpDir.resolve("Sample_4.phenotype.json")); } @@ -266,7 +266,7 @@ void multisample(TestInfo testInfo) throws Exception { @Test void multisampleRestricted(TestInfo testInfo) throws Exception { Path na18526Vcf = PathUtils.getPathToResource("org/pharmgkb/pharmcat/PharmCATTest-cyp2c19MissingPositions.vcf"); - Path multisampleVcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.vcf"); + Path multisampleVcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.vcf"); Path[] vcfFiles = new Path[] { multisampleVcfFile, na18526Vcf @@ -313,7 +313,7 @@ void multisampleRestricted(TestInfo testInfo) throws Exception { allInputs.add(phenotypeFile5); checkForOutputFiles(tmpDir, allInputs.toArray(new Path[0])); - PharmCATTest.validateCyp2d6OutsideCallOutput(tmpDir.resolve("VcfSampleReaderTest.Sample_1.phenotype.json")); + PharmCATTest.validateCyp2d6OutsideCallOutput(tmpDir.resolve("VcfSampleReaderTest-multisample.Sample_1.phenotype.json")); PharmCATTest.validateCyp2d6OutsideCallOutput(tmpDir.resolve("Sample_4.phenotype.json")); } diff --git a/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java b/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java index c22de68e..f0f35585 100644 --- a/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/PharmCATTest.java @@ -40,7 +40,7 @@ class PharmCATTest { @AfterEach void deleteDirectory(TestInfo testInfo) { - TestUtils.deleteTestOutputDirectory(testInfo); + //TestUtils.deleteTestOutputDirectory(testInfo); } @@ -537,35 +537,37 @@ void consistentOutput(TestInfo testInfo) throws Exception { @Test void multisample(TestInfo testInfo) throws Exception { - Path vcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.vcf"); + Path vcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.vcf"); + Path saFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.sampleData.tsv"); Path outputDir = TestUtils.getTestOutputDir(testInfo, true); - Path matcherOutput1 = outputDir.resolve("VcfSampleReaderTest.Sample_1.match.json"); - Path phenotyperOutput1 = outputDir.resolve("VcfSampleReaderTest.Sample_1.phenotype.json"); - Path reporterOutput1 = outputDir.resolve("VcfSampleReaderTest.Sample_1.report.html"); - Path matcherOutput2 = outputDir.resolve("VcfSampleReaderTest.Sample_2.match.json"); - Path phenotyperOutput2 = outputDir.resolve("VcfSampleReaderTest.Sample_2.phenotype.json"); - Path reporterOutput2 = outputDir.resolve("VcfSampleReaderTest.Sample_2.report.html"); + Path matcherOutput1 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_1.match.json"); + Path phenotyperOutput1 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_1.phenotype.json"); + Path reporterOutput1 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_1.report.html"); + Path matcherOutput2 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_2.match.json"); + Path phenotyperOutput2 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_2.phenotype.json"); + Path reporterOutput2 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_2.report.html"); + Path callsOutput1 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_1.report.tsv"); + Path callsOutput2 = outputDir.resolve("VcfSampleReaderTest-multisample.Sample_2.report.tsv"); - try { - String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { - "-vcf", vcfFile.toString(), - "-o", outputDir.toString(), - })); - System.out.println(systemOut); - assertTrue(systemOut.contains("Done.")); - - assertTrue(Files.exists(matcherOutput1)); - assertTrue(Files.exists(phenotyperOutput1)); - assertTrue(Files.exists(reporterOutput1)); + String systemOut = tapSystemOut(() -> PharmCAT.main(new String[] { + "-vcf", vcfFile.toString(), + "-o", outputDir.toString(), + "-sm", saFile.toString(), + "-reporterHtml", "-reporterJson", "-reporterCallsOnlyTsv" + })); + System.out.println(systemOut); + assertTrue(systemOut.contains("Done.")); - assertTrue(Files.exists(matcherOutput2)); - assertTrue(Files.exists(phenotyperOutput2)); - assertTrue(Files.exists(reporterOutput2)); + assertTrue(Files.exists(matcherOutput1)); + assertTrue(Files.exists(phenotyperOutput1)); + assertTrue(Files.exists(reporterOutput1)); + assertTrue(Files.exists(callsOutput1)); - } finally { - TestUtils.deleteTestFiles(outputDir); - } + assertTrue(Files.exists(matcherOutput2)); + assertTrue(Files.exists(phenotyperOutput2)); + assertTrue(Files.exists(reporterOutput2)); + assertTrue(Files.exists(callsOutput2)); } @Test diff --git a/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java b/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java index 9d380ecc..00a1d48d 100644 --- a/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/PipelineTest.java @@ -491,7 +491,7 @@ void testAll(TestInfo testInfo) throws Exception { @Test void testNoData(TestInfo testInfo) throws Exception { PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false); - testWrapper.execute(null, null, true); + testWrapper.execute(null, null, null, true); } diff --git a/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java b/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java index f8c956e9..55051722 100644 --- a/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java +++ b/src/test/java/org/pharmgkb/pharmcat/PipelineWrapper.java @@ -115,7 +115,7 @@ public TestVcfBuilder getVcfBuilder() { if (outsideCallPath == null || outsideCallPath.length == 0) { return execute(); } - return execute(null, ImmutableList.copyOf(outsideCallPath), false); + return execute(null, ImmutableList.copyOf(outsideCallPath), null, false); } /** @@ -124,16 +124,16 @@ public TestVcfBuilder getVcfBuilder() { * @return path to actual VCF used */ public Path executeWithVcf(Path vcfFile) throws Exception { - return execute(vcfFile, null, false); + return execute(vcfFile, null, null, false); } public @Nullable Path execute() throws Exception { - return execute(null, null, false); + return execute(null, null, null, false); } - @Nullable Path execute(@Nullable Path vcfFile, @Nullable List outsideCallPaths, boolean allowNoData) - throws Exception { + public @Nullable Path execute(@Nullable Path vcfFile, @Nullable List outsideCallPaths, + Path sampleMetadataFile, boolean allowNoData) throws Exception { VcfFile vcfFileObj = null; boolean runMatcher = false; if (vcfFile != null) { @@ -153,7 +153,7 @@ public Path executeWithVcf(Path vcfFile) throws Exception { true, null, outsideCallPaths, true, null, null, m_sources, m_compactReport, true, true, true, m_outputPath, null, m_deleteIntermediateFiles, - Pipeline.Mode.TEST, null, false + Pipeline.Mode.TEST, null, false, sampleMetadataFile ); pcat.call(); m_reportContext = pcat.getReportContext(); diff --git a/src/test/java/org/pharmgkb/pharmcat/SyntheticBatchTest.java b/src/test/java/org/pharmgkb/pharmcat/SyntheticBatchTest.java index 467cf94b..c8e651d6 100644 --- a/src/test/java/org/pharmgkb/pharmcat/SyntheticBatchTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/SyntheticBatchTest.java @@ -614,7 +614,7 @@ true, new VcfFile(sampleVcf), null, true, true, null, outsideCallPaths, true, null, null, m_sources, m_compact, false, true, true, testDir, null, m_compact, - Pipeline.Mode.TEST, null, false + Pipeline.Mode.TEST, null, false, null ).call(); } diff --git a/src/test/java/org/pharmgkb/pharmcat/TestUtils.java b/src/test/java/org/pharmgkb/pharmcat/TestUtils.java index 69d05dc9..d07c4abc 100644 --- a/src/test/java/org/pharmgkb/pharmcat/TestUtils.java +++ b/src/test/java/org/pharmgkb/pharmcat/TestUtils.java @@ -161,6 +161,8 @@ public static Path getTestOutputDir(Class testClass, boolean deleteIfExist) thro /** * Creates a temporary test file based on {@code testInfo}, with the specified suffix. * This file name will change from one test to the next. + * + * @param suffix file extension including dot (e.g. ".tsv") */ public static Path createTempFile(TestInfo testInfo, String suffix) throws IOException { return createTempFile(getTestOutputDir(testInfo, false), getTestName(testInfo), suffix); diff --git a/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.java b/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.java index 424578a9..97350224 100644 --- a/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.java @@ -19,7 +19,7 @@ class VcfSampleReaderTest { @Test void read() throws IOException { - Path vcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.vcf"); + Path vcfFile = PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.vcf"); VcfSampleReader sampleReader = new VcfSampleReader(vcfFile); assertThat(sampleReader.getSamples(), hasItems("Sample_1", "Sample_2")); diff --git a/src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java b/src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java index da7b6a89..3d930921 100644 --- a/src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/reporter/format/CallsOnlyFormatTest.java @@ -1,18 +1,24 @@ package org.pharmgkb.pharmcat.reporter.format; +import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; import org.pharmgkb.common.util.PathUtils; import org.pharmgkb.pharmcat.BaseConfig; +import org.pharmgkb.pharmcat.Env; import org.pharmgkb.pharmcat.PipelineWrapper; +import org.pharmgkb.pharmcat.TestUtils; import static org.junit.jupiter.api.Assertions.*; import static org.pharmgkb.pharmcat.reporter.model.result.Haplotype.UNKNOWN; @@ -109,9 +115,40 @@ void multipleDiplotypes(TestInfo testInfo) throws Exception { } - private Map> parseTsv(String[] lines, int maxColumns, String... genes) { + @Test + void withSampleData(TestInfo testInfo) throws Exception { + Path sampleDataFile = TestUtils.createTempFile(testInfo, "tsv"); + try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(sampleDataFile))) { + writer.println("PharmCAT\tTown\tStanford"); + writer.println("PharmCAT\tState\tCA"); + } + + PipelineWrapper testWrapper = new PipelineWrapper(testInfo, false) + .saveIntermediateFiles(); + Path vcfFile = testWrapper.execute(PathUtils.getPathToResource("org/pharmgkb/pharmcat/reference.vcf"), + null, sampleDataFile, false); + + String basename = BaseConfig.getBaseFilename(Objects.requireNonNull(vcfFile).getFileName()); + Path normalFile = testWrapper.getOutputDir().resolve(basename + BaseConfig.REPORTER_SUFFIX + ".tsv"); + String normalTsv = Files.readString(normalFile); + String[] lines = normalTsv.split("\n"); + + Map> geneMap = parseTsv(lines, 18); + Env env = new Env(); + SortedSet allGenes = new TreeSet<>(env.getDefinitionReader().getGenes()); + allGenes.remove("CYP2D6"); + // TODO(markwoon): ignore NAT2 until it's fully integrated + allGenes.remove("NAT2"); + assertEquals(allGenes, geneMap.keySet()); + + System.out.println(geneMap.get("TPMT")); + assertTrue(geneMap.get("TPMT").get(0).contains("Stanford")); + } + + + private SortedMap> parseTsv(String[] lines, int maxColumns, String... genes) { // test normal - Map> geneMap = new HashMap<>(); + SortedMap> geneMap = new TreeMap<>(); for (String line : lines) { String[] data = line.split("\t"); if (data.length != maxColumns) { diff --git a/src/test/resources/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.sampleData.tsv b/src/test/resources/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.sampleData.tsv new file mode 100644 index 00000000..1c077c9f --- /dev/null +++ b/src/test/resources/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.sampleData.tsv @@ -0,0 +1,6 @@ +Sample_1 Group A +Sample_1 Town Los Altos +Sample_2 Town Palo Alto +Sample_2 Group B +Sample_3 Group A +Sample_3 Town Mountain View diff --git a/src/test/resources/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.vcf b/src/test/resources/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.vcf similarity index 100% rename from src/test/resources/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest.vcf rename to src/test/resources/org/pharmgkb/pharmcat/haplotype/VcfSampleReaderTest-multisample.vcf