diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorter.java new file mode 100644 index 00000000..03b3cb73 --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorter.java @@ -0,0 +1,41 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.util.*; +import java.util.stream.Collectors; + +@Component +public class ColumnSorter { + private final Map infoColumnsOrder; + + @Autowired + public ColumnSorter(@Value("#{'${variant.info_column_order:}'}") String infoColumnOrderString) { + if (infoColumnOrderString == null || infoColumnOrderString.isEmpty()) { + infoColumnsOrder = Map.of(); + } else { + String[] infoColumnOrder = infoColumnOrderString.split(","); + HashMap order = new HashMap<>(); + for (int i = 0; i < infoColumnOrder.length; i++) { + order.put(infoColumnOrder[i], i); + } + this.infoColumnsOrder = order; + } + } + + public List sortInfoColumns(Set columns) { + // backwards compatibility check. + if (infoColumnsOrder.isEmpty()) { + return new ArrayList<>(columns); + } + return columns.stream() + .filter(infoColumnsOrder::containsKey) + .sorted((a, b) -> Integer.compare( + infoColumnsOrder.getOrDefault(a, Integer.MAX_VALUE), + infoColumnsOrder.getOrDefault(b, Integer.MAX_VALUE) + )) + .collect(Collectors.toList()); + } +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java index de77d1fe..7310e6f9 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java @@ -6,6 +6,7 @@ import java.util.*; import java.util.stream.Collectors; +import org.checkerframework.checker.units.qual.C; import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,12 +34,14 @@ public class VariantListProcessor implements HpdsProcessor { private final int CACHE_SIZE; private final AbstractProcessor abstractProcessor; + private final ColumnSorter columnSorter; @Autowired - public VariantListProcessor(AbstractProcessor abstractProcessor, GenomicProcessor genomicProcessor, @Value("${VCF_EXCERPT_ENABLED:false}") boolean vcfExcerptEnabled ) { + public VariantListProcessor(AbstractProcessor abstractProcessor, GenomicProcessor genomicProcessor, ColumnSorter columnSorter, @Value("${VCF_EXCERPT_ENABLED:false}") boolean vcfExcerptEnabled) { this.abstractProcessor = abstractProcessor; this.genomicProcessor = genomicProcessor; + this.columnSorter = columnSorter; VCF_EXCERPT_ENABLED = vcfExcerptEnabled; //always enable aggregate queries if full queries are permitted. @@ -53,6 +56,7 @@ public VariantListProcessor(AbstractProcessor abstractProcessor, GenomicProcesso public VariantListProcessor(boolean isOnlyForTests, AbstractProcessor abstractProcessor) { this.abstractProcessor = abstractProcessor; this.genomicProcessor = null; + this.columnSorter = new ColumnSorter(""); VCF_EXCERPT_ENABLED = "TRUE".equalsIgnoreCase(System.getProperty("VCF_EXCERPT_ENABLED", "FALSE")); //always enable aggregate queries if full queries are permitted. @@ -171,8 +175,9 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws //5 columns for gene info builder.append("CHROM\tPOSITION\tREF\tALT"); + List infoStoreColumns = columnSorter.sortInfoColumns(abstractProcessor.getInfoStoreColumns()); //now add the variant metadata column headers - for(String key : abstractProcessor.getInfoStoreColumns()) { + for(String key : infoStoreColumns) { builder.append("\t" + key); } @@ -250,7 +255,7 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws } //need to make sure columns are pushed out in the right order; use same iterator as headers - for(String key : abstractProcessor.getInfoStoreColumns()) { + for(String key : infoStoreColumns) { Set columnMeta = variantColumnMap.get(key); if(columnMeta != null) { //collect our sets to a single entry diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorterTest.java new file mode 100644 index 00000000..a19a7753 --- /dev/null +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorterTest.java @@ -0,0 +1,53 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing; + + +import org.junit.jupiter.api.Test; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import static org.hamcrest.CoreMatchers.*; + +public class ColumnSorterTest { + + @Test + public void shouldSortColumns() { + ColumnSorter subject = new ColumnSorter("a,b,c"); + List actual = subject.sortInfoColumns(Set.of("b", "c", "a")); + List expected = List.of("a", "b", "c"); + + assertEquals(expected, actual); + } + + @Test + public void shouldExcludeMissingColumns() { + ColumnSorter subject = new ColumnSorter("a,b,c"); + List actual = subject.sortInfoColumns(Set.of("d", "b", "c", "a")); + List expected = List.of("a", "b", "c"); + + assertEquals(expected, actual); + } + + @Test + public void shouldNotBreakForMissingColumns() { + ColumnSorter subject = new ColumnSorter("a,b,c,d"); + List actual = subject.sortInfoColumns(Set.of("d", "a")); + List expected = List.of("a", "d"); + + assertEquals(expected, actual); + } + + @Test + public void shouldNoOpWithoutConfig() { + ColumnSorter subject = new ColumnSorter(""); + List actual = subject.sortInfoColumns(Set.of("b", "c", "a")); + List expected = List.of("b", "c", "a"); + + assertThat(new HashSet<>(expected), is(equalTo(new HashSet<>(actual)))); + } +} \ No newline at end of file