Skip to content

Commit

Permalink
feat(reporter): finalize call-only tsv reporter
Browse files Browse the repository at this point in the history
  • Loading branch information
markwoon committed Feb 18, 2025
1 parent 9d0920a commit 485aa06
Show file tree
Hide file tree
Showing 23 changed files with 752 additions and 265 deletions.
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ task updateExample {
args = [
'-vcf', file('docs/examples/pharmcat.example.vcf'),
'-po', file('docs/examples/pharmcat.example.outsideCall.tsv'),
'-reporterJson', '-matcherHtml'
'reporterCallsOnlyTsv', '-reporterJson', '-reporterHtml', '-matcherHtml'
]
}

Expand All @@ -214,7 +214,7 @@ task updateExample {
classpath = sourceSets.main.runtimeClasspath
args = [
'-vcf', file('docs/examples/pharmcat.example2.vcf'),
'-reporterJson', '-matcherHtml'
'reporterCallsOnlyTsv', '-reporterJson', '-reporterHtml', '-matcherHtml'
]
}
}
Expand Down
10 changes: 10 additions & 0 deletions dockstore/pipeline/PharmCAT_Pipeline.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ workflow pharmcat_pipeline {
run_reporter: "Run reporter independently."
reporter_sources: "Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]"
reporter_extended: "Write an extended report (includes all possible genes and drugs, even if no data is available)"
reporter_save_html: "Save reporter results as HTML (the default if no format is specified)."
reporter_save_json: "Save reporter results as JSON."
reporter_save_calls_only_tsv: "Save call results only as TSV."

base_filename: "Prefix for output files. Defaults to the same base name as the input."
delete_intermediate_files: "Delete intermediate PharmCAT files. Defaults to saving all files."
Expand All @@ -52,7 +54,9 @@ workflow pharmcat_pipeline {
Boolean run_reporter = false
String reporter_sources = ""
Boolean reporter_extended = false
Boolean reporter_save_html = false
Boolean reporter_save_json = false
Boolean reporter_save_calls_only_tsv = false
String base_filename = ""
Boolean delete_intermediate_files = false
Int max_concurrent_processes = 1
Expand All @@ -74,7 +78,9 @@ workflow pharmcat_pipeline {
run_reporter = run_reporter,
reporter_sources = reporter_sources,
reporter_extended = reporter_extended,
reporter_save_html = reporter_save_html,
reporter_save_json = reporter_save_json,
reporter_save_calls_only_tsv = reporter_save_calls_only_tsv,
base_filename = base_filename,
delete_intermediate_files = delete_intermediate_files,
max_concurrent_processes = max_concurrent_processes,
Expand Down Expand Up @@ -108,7 +114,9 @@ task pharmcat_pipeline_task {
Boolean run_reporter = false
String reporter_sources = ""
Boolean reporter_extended = false
Boolean reporter_save_html = false
Boolean reporter_save_json = false
Boolean reporter_save_calls_only_tsv = false
String base_filename = ""
Boolean delete_intermediate_files = false
Int max_concurrent_processes = 1
Expand All @@ -133,7 +141,9 @@ task pharmcat_pipeline_task {
~{if run_reporter then '-reporter' else ''} \
~{if reporter_sources != "" then '-rs ' + reporter_sources else ''} \
~{if reporter_extended then '-re' else ''} \
~{if reporter_save_html then '-reporterHtml' else ''} \
~{if reporter_save_json then '-reporterJson' else ''} \
~{if reporter_save_calls_only_tsv then 'reporterCallsOnlyTsv' else ''} \
~{if base_filename != "" then '-bf ' + base_filename else ''} \
~{if delete_intermediate_files then '-del' else ''} \
-cp ~{max_concurrent_processes} -cm ~{max_memory}
Expand Down
8 changes: 7 additions & 1 deletion docs/using/Running-PharmCAT-Pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ usage: pharmcat_pipeline [-s <samples> | -S <txt_file>]
[-R <bed_file>]
[-matcher] [-ma] [-matcherHtml] [-research <type>]
[-phenotyper]
[-reporter] [-rs <sources>] [-re] [-reporterJson]
[-reporter] [-rs <sources>] [-re]
[-reporterHtml] [-reporterJson] [-reporterCallsOnlyTsv]
[-o <dir>] [-bf <name>] [-del]
[-cp <num processes>]
[-v] [-V]
Expand Down Expand Up @@ -91,8 +92,13 @@ Reporter arguments:
Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]
-re, --reporter-extended
Write an extended report (includes all possible genes and drugs, even if no data is available)
-reporterHtml, --reporter-save-html
Save reporter results as HTML. This is the default if no format is specified.
If any format is specified, only the specified formats will be saved.
-reporterJson, --reporter-save-json
Save reporter results as JSON.
-reporterCallsOnlyTsv, --reporter-save-calls-only-tsv
Save call results only as TSV.
Output arguments:
-o <dir>, --output-dir <dir>
Expand Down
12 changes: 10 additions & 2 deletions docs/using/Running-PharmCAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,17 @@ Each module has its own arguments to customize its behavior.
-re <span class="altArg"><br />or --reporter-extended</span>
: write an extended report (includes all possible genes and drugs, even if no data is available)

-reporterJson
-reporterHtml<br />or --reporter-save-html</span>
: save reporter results as HTML. This is the default if no format is specified.
If any format is specified, only the specified formats will be saved.

-reporterJson<br />or --reporter-save-json</span>
: save reporter results as JSON

-reporterCallsOnlyTsv<br />or --reporter-save-calls-only-tsv</span>
: save call results only as TSV



### Running Individual Modules

Expand Down Expand Up @@ -199,7 +207,7 @@ Saving phenotyper JSON results to /tmp/results/outside_calls.phenotype.json

#### Just the `Reporter`

This will take the phenotyper data and output the relevant drug annotations in a comprehensive HTML report.
This will take the `Phenotyper` data and output the relevant drug annotations in a comprehensive HTML report.

Examples:

Expand Down
9 changes: 9 additions & 0 deletions preprocessor/pharmcat_pipeline
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ def add_reporter_java_args(java_args: List[str], cli_args):
java_args.append('-rs')
if cli_args.reporter_extended:
java_args.append('-re')
if cli_args.reporter_save_html:
java_args.append('-reporterHtml')
if cli_args.reporter_save_json:
java_args.append('-reporterJson')
if cli_args.reporter_save_calls_only_tsv:
java_args.append('-reporterCallsOnlyTsv')


def add_output_java_args(java_args: List[str], cli_args, output_dir: Optional[Path]):
Expand Down Expand Up @@ -110,8 +114,13 @@ if __name__ == '__main__':
help='Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]')
reporter_group.add_argument('-re', '--reporter-extended', action='store_true',
help='Write an extended report (includes all possible genes and drugs, even if no data is available).')
reporter_group.add_argument('-reporterHtml', '--reporter-save-html', action='store_true',
help='Save reporter results as HTML. This is the default if no format is specified. '
'If any format is specified, only the specified formats will be saved.')
reporter_group.add_argument('-reporterJson', '--reporter-save-json', action='store_true',
help='Save reporter results as JSON.')
reporter_group.add_argument('-reporterCallsOnlyTsv', '--reporter-save-calls_only_tsv', action='store_true',
help='Save calls results only as TSV.')
# output args
output_group = parser.add_argument_group('Output arguments')
output_group.add_argument('-o', '--output-dir', type=str, metavar='<dir>',
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/org/pharmgkb/pharmcat/BaseConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,17 @@ public class BaseConfig {
reporterTitle = cliHelper.getValue("rt");
reporterCompact = !cliHelper.hasOption("re");
reporterJson = cliHelper.hasOption("reporterJson");
reporterCallsOnlyTsv = cliHelper.hasOption("reporterCallsOnly");
reporterCallsOnlyTsv = cliHelper.hasOption("reporterCallsOnlyTsv");
// by default, generate the HTML report (which preserves backwards compatibility)
// only check for reporterHtml flag if specifying other reporter format outputs
if (reporterJson || reporterCallsOnlyTsv) {
reporterHtml = cliHelper.hasOption("reporterHtml");
}

if (researchMode) {
System.out.println("WARNING: FULL REPORTER OUTPUT NOT AVAILABLE IN RESEARCH MODE");
if (!reporterCallsOnlyTsv) {
runReporter = false;
reporterCallsOnlyTsv = true;
}
reporterHtml = false;
reporterJson = false;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ public static void main(String[] args) {
.addOption("reporter", "reporter", "Run reporter independently")
.addOption("rs", "reporter-sources", "Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]", false, "sources")
.addOption("re", "reporter-extended", "Write an extended report (includes all possible genes and drugs, even if no data is available)")
.addOption("reporterHtml", "reporter-save-html", "Save reporter results as HTML (the default if no format is specified)")
.addOption("reporterJson", "reporter-save-json", "Save reporter results as JSON")
.addOption("reporterCallsOnly", "reporter-save-calls-only", "Save calls only as TSV")
.addOption("reporterCallsOnlyTsv", "reporter-save-calls-only-tsv", "Save calls only as TSV")

// outputs
.addOption("o", "output-dir", "Directory to output to (optional, default is input file directory)", false, "directory")
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/org/pharmgkb/pharmcat/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ public PgkbGuidelineCollection getDrugs() {
}


/**
* Checks if gene is used in any guideline from the specified {@code source}.
*/
public boolean hasGene(DataSource source, String gene) {
return m_drugs.getGenesUsedInSource(source).contains(gene);
}
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/pharmgkb/pharmcat/PharmCAT.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ public static void main(String[] args) {
.addOption("rt", "reporter-title", "Text to add to the report title", false, "title")
.addOption("rs", "reporter-sources", "Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]", false, "sources")
.addOption("re", "reporter-extended", "Write an extended report (includes all possible genes and drugs, even if no data is available)")
.addOption("reporterHtml", "reporter-save-html", "Save reporter results as HTML (the default if no format is specified)")
.addOption("reporterJson", "reporter-save-json", "Save reporter results as JSON")
.addOption("reporterCallsOnly", "reporter-save-calls-only", "Save calls only as TSV")
.addOption("reporterCallsOnlyTsv", "reporter-save-calls-only-tsv", "Save calls results only as TSV")

// outputs
.addOption("o", "output-dir", "Directory to output to (optional, default is input file directory)", false, "directory")
Expand Down
15 changes: 8 additions & 7 deletions src/main/java/org/pharmgkb/pharmcat/Pipeline.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.pharmgkb.pharmcat.haplotype.NamedAlleleMatcher;
import org.pharmgkb.pharmcat.haplotype.ResultSerializer;
import org.pharmgkb.pharmcat.haplotype.model.GeneCall;
import org.pharmgkb.pharmcat.haplotype.model.Metadata;
import org.pharmgkb.pharmcat.phenotype.OutsideCallParser;
import org.pharmgkb.pharmcat.phenotype.Phenotyper;
import org.pharmgkb.pharmcat.phenotype.model.OutsideCall;
Expand Down Expand Up @@ -300,14 +301,17 @@ public PipelineResult call() throws IOException {

Phenotyper phenotyper = null;
if (m_runPhenotyper) {
Metadata metadata = null;
List<GeneCall> calls;
Map<String, Collection<String>> warnings = new HashMap<>();
if (matcherResult != null) {
metadata = matcherResult.getMetadata();
calls = matcherResult.getGeneCalls();
warnings.putAll(matcherResult.getVcfWarnings());
} else if (m_phenotyperInputFile != null) {
org.pharmgkb.pharmcat.haplotype.model.Result deserializedMatcherResult = new ResultSerializer()
.fromJson(m_phenotyperInputFile);
metadata = deserializedMatcherResult.getMetadata();
calls = deserializedMatcherResult.getGeneCalls();
warnings.putAll(deserializedMatcherResult.getVcfWarnings());
} else {
Expand Down Expand Up @@ -337,7 +341,7 @@ public PipelineResult call() throws IOException {
}
}

phenotyper = new Phenotyper(m_env, calls, outsideCalls, warnings);
phenotyper = new Phenotyper(m_env, metadata, calls, outsideCalls, warnings);
if (!m_deleteIntermediateFiles || !m_runReporter) {
if (!batchDisplayMode) {
output.add("Saving phenotyper JSON results to " + m_phenotyperJsonFile);
Expand All @@ -352,7 +356,7 @@ public PipelineResult call() throws IOException {
Path inputFile = m_phenotyperJsonFile != null ? m_phenotyperJsonFile : m_reporterInputFile;
phenotyper = Phenotyper.read(inputFile);
}
m_reportContext = new ReportContext(m_env, phenotyper.getGeneReports(), m_reporterTitle);
m_reportContext = new ReportContext(m_env, phenotyper, m_reporterTitle);
if (m_reporterHtmlFile != null) {
if (!batchDisplayMode) {
output.add("Saving reporter HTML results to " + m_reporterHtmlFile);
Expand All @@ -373,11 +377,8 @@ public PipelineResult call() throws IOException {
if (!batchDisplayMode) {
output.add("Saving calls-only TSV results to " + m_reporterCallsOnlyFile);
}
CallsOnlyFormat caf = new CallsOnlyFormat(m_reporterCallsOnlyFile, m_env);
if (!m_topCandidateOnly) {
caf.showMatchScores();
}
caf.write(m_reportContext);
new CallsOnlyFormat(m_reporterCallsOnlyFile, m_env)
.write(m_reportContext);
}
didSomething = true;
}
Expand Down
58 changes: 44 additions & 14 deletions src/main/java/org/pharmgkb/pharmcat/phenotype/Phenotyper.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,7 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.*;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import com.google.common.base.Preconditions;
Expand All @@ -24,6 +16,7 @@
import org.pharmgkb.pharmcat.Env;
import org.pharmgkb.pharmcat.haplotype.NamedAlleleMatcher;
import org.pharmgkb.pharmcat.haplotype.model.GeneCall;
import org.pharmgkb.pharmcat.haplotype.model.Metadata;
import org.pharmgkb.pharmcat.phenotype.model.OutsideCall;
import org.pharmgkb.pharmcat.reporter.ReportContext;
import org.pharmgkb.pharmcat.reporter.model.DataSource;
Expand All @@ -44,33 +37,58 @@
public class Phenotyper {
private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

@SerializedName("matcherMetadata")
@Expose
private Metadata m_matcherMetadata;
@Expose
@SerializedName("geneReports")
private final SortedMap<DataSource, SortedMap<String, GeneReport>> m_geneReports = new TreeMap<>();
@Expose
@SerializedName("unannotatedGeneCalls")
private SortedSet<GeneReport> m_unannotatedGeneCalls = new TreeSet<>();


/**
* Public constructor. This needs {@link GeneCall} objects from the {@link NamedAlleleMatcher} and {@link OutsideCall}
* objects coming from other allele calling sources. This relies on reading definition files as well.
*
* @param matcherMetadata metadata for the named allele matcher used for {@code geneCalls};
* can be null if all outside calls
* @param geneCalls a List of {@link GeneCall} objects
* @param outsideCalls a List of {@link OutsideCall} objects
* @param variantWarnings map of VCF warnings, keyed to chromosomal position
*/
public Phenotyper(Env env, List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls,
public Phenotyper(Env env, @Nullable Metadata matcherMetadata, List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls,
@Nullable Map<String, Collection<String>> variantWarnings) {
initialize(geneCalls, outsideCalls, env, DataSource.CPIC, variantWarnings);
initialize(geneCalls, outsideCalls, env, DataSource.DPWG, variantWarnings);
List<String> unusedGenes = initialize(geneCalls, outsideCalls, env, DataSource.CPIC, variantWarnings);
unusedGenes.retainAll(initialize(geneCalls, outsideCalls, env, DataSource.DPWG, variantWarnings));

if (!unusedGenes.isEmpty()) {
for (String gene : unusedGenes) {
GeneCall geneCall = geneCalls.stream()
.filter(gc -> gc.getGene().equals(gene))
.findFirst()
.orElseThrow(() -> new IllegalStateException("Cannot find gene call for " + gene));
GeneReport geneReport = new GeneReport(geneCall, env, DataSource.UNKNOWN);
if (!geneReport.isNoData()) {
m_unannotatedGeneCalls.add(geneReport);
}
}
}

m_matcherMetadata = matcherMetadata;
}


private void initialize(List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls, Env env, DataSource source,
private List<String> initialize(List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls, Env env, DataSource source,
@Nullable Map<String, Collection<String>> variantWarnings) {
SortedMap<String, GeneReport> reportMap = m_geneReports.computeIfAbsent(source, (s) -> new TreeMap<>());

List<String> unusedGeneCalls = new ArrayList<>();
// matcher calls
for (GeneCall geneCall : geneCalls) {
if (!env.hasGene(source, geneCall.getGene())) {
unusedGeneCalls.add(geneCall.getGene());
continue;
}
GeneReport geneReport = new GeneReport(geneCall, env, source);
Expand All @@ -83,7 +101,7 @@ private void initialize(List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls,
MessageAnnotation msgAnnotation = null;
if (geneReport != null) {
if (geneReport.getCallSource() != CallSource.OUTSIDE) {
// outside call trumps matcher
// outside call trumps the matcher's result
// warn the user of the conflict
String matcherCall = geneReport.getSourceDiplotypes().stream()
.sorted()
Expand Down Expand Up @@ -117,6 +135,8 @@ private void initialize(List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls,

// add VCF warnings
reportMap.values().forEach(geneReport -> geneReport.addVariantWarningMessages(variantWarnings));

return unusedGeneCalls;
}


Expand All @@ -135,6 +155,11 @@ public Optional<GeneReport> findGeneReport(DataSource source, String geneSymbol)
}


public SortedSet<GeneReport> getUnannotatedGeneCalls() {
return m_unannotatedGeneCalls;
}


/**
* Writes out {@link Phenotyper} data.
*
Expand Down Expand Up @@ -170,4 +195,9 @@ private Set<String> listUnspecifiedGenes(Env env, DataSource source) {
.forEach(unspecifiedGenes::remove);
return unspecifiedGenes;
}


public Metadata getMatcherMetadata() {
return m_matcherMetadata;
}
}
Loading

0 comments on commit 485aa06

Please sign in to comment.