Skip to content

Commit

Permalink
[ALS-6467] Configurable VCF excerpt info column ordering
Browse files Browse the repository at this point in the history
- Make column sorter that pulls from properties
- Use it to sort and exclude columns

(cherry picked from commit d3289ef)
  • Loading branch information
Luke Sikina authored and ramari16 committed Dec 20, 2024
1 parent 465d041 commit 6ad4d25
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import java.util.*;
import java.util.stream.Collectors;

@Component
public class ColumnSorter {
private final Map<String, Integer> infoColumnsOrder;

@Autowired
public ColumnSorter(@Value("#{'${variant.info_column_order:}'}") String infoColumnOrderString) {
if (infoColumnOrderString == null || infoColumnOrderString.isEmpty()) {
infoColumnsOrder = Map.of();
} else {
String[] infoColumnOrder = infoColumnOrderString.split(",");
HashMap<String, Integer> order = new HashMap<>();
for (int i = 0; i < infoColumnOrder.length; i++) {
order.put(infoColumnOrder[i], i);
}
this.infoColumnsOrder = order;
}
}

public List<String> sortInfoColumns(Set<String> columns) {
// backwards compatibility check.
if (infoColumnsOrder.isEmpty()) {
return new ArrayList<>(columns);
}
return columns.stream()
.filter(infoColumnsOrder::containsKey)
.sorted((a, b) -> Integer.compare(
infoColumnsOrder.getOrDefault(a, Integer.MAX_VALUE),
infoColumnsOrder.getOrDefault(b, Integer.MAX_VALUE)
))
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.*;
import java.util.stream.Collectors;

import org.checkerframework.checker.units.qual.C;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -33,12 +34,14 @@ public class VariantListProcessor implements HpdsProcessor {
private final int CACHE_SIZE;

private final AbstractProcessor abstractProcessor;
private final ColumnSorter columnSorter;


@Autowired
public VariantListProcessor(AbstractProcessor abstractProcessor, GenomicProcessor genomicProcessor, @Value("${VCF_EXCERPT_ENABLED:false}") boolean vcfExcerptEnabled ) {
public VariantListProcessor(AbstractProcessor abstractProcessor, GenomicProcessor genomicProcessor, ColumnSorter columnSorter, @Value("${VCF_EXCERPT_ENABLED:false}") boolean vcfExcerptEnabled) {
this.abstractProcessor = abstractProcessor;
this.genomicProcessor = genomicProcessor;
this.columnSorter = columnSorter;

VCF_EXCERPT_ENABLED = vcfExcerptEnabled;
//always enable aggregate queries if full queries are permitted.
Expand All @@ -53,6 +56,7 @@ public VariantListProcessor(AbstractProcessor abstractProcessor, GenomicProcesso
public VariantListProcessor(boolean isOnlyForTests, AbstractProcessor abstractProcessor) {
this.abstractProcessor = abstractProcessor;
this.genomicProcessor = null;
this.columnSorter = new ColumnSorter("");

VCF_EXCERPT_ENABLED = "TRUE".equalsIgnoreCase(System.getProperty("VCF_EXCERPT_ENABLED", "FALSE"));
//always enable aggregate queries if full queries are permitted.
Expand Down Expand Up @@ -171,8 +175,9 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws
//5 columns for gene info
builder.append("CHROM\tPOSITION\tREF\tALT");

List<String> infoStoreColumns = columnSorter.sortInfoColumns(abstractProcessor.getInfoStoreColumns());
//now add the variant metadata column headers
for(String key : abstractProcessor.getInfoStoreColumns()) {
for(String key : infoStoreColumns) {
builder.append("\t" + key);
}

Expand Down Expand Up @@ -250,7 +255,7 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws
}

//need to make sure columns are pushed out in the right order; use same iterator as headers
for(String key : abstractProcessor.getInfoStoreColumns()) {
for(String key : infoStoreColumns) {
Set<String> columnMeta = variantColumnMap.get(key);
if(columnMeta != null) {
//collect our sets to a single entry
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing;


import org.junit.jupiter.api.Test;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.jupiter.api.Assertions.assertEquals;

import static org.hamcrest.CoreMatchers.*;

public class ColumnSorterTest {

@Test
public void shouldSortColumns() {
ColumnSorter subject = new ColumnSorter("a,b,c");
List<String> actual = subject.sortInfoColumns(Set.of("b", "c", "a"));
List<String> expected = List.of("a", "b", "c");

assertEquals(expected, actual);
}

@Test
public void shouldExcludeMissingColumns() {
ColumnSorter subject = new ColumnSorter("a,b,c");
List<String> actual = subject.sortInfoColumns(Set.of("d", "b", "c", "a"));
List<String> expected = List.of("a", "b", "c");

assertEquals(expected, actual);
}

@Test
public void shouldNotBreakForMissingColumns() {
ColumnSorter subject = new ColumnSorter("a,b,c,d");
List<String> actual = subject.sortInfoColumns(Set.of("d", "a"));
List<String> expected = List.of("a", "d");

assertEquals(expected, actual);
}

@Test
public void shouldNoOpWithoutConfig() {
ColumnSorter subject = new ColumnSorter("");
List<String> actual = subject.sortInfoColumns(Set.of("b", "c", "a"));
List<String> expected = List.of("b", "c", "a");

assertThat(new HashSet<>(expected), is(equalTo(new HashSet<>(actual))));
}
}

0 comments on commit 6ad4d25

Please sign in to comment.