Skip to content

Commit

Permalink
Merge branch 'release' into ALS-7809
Browse files Browse the repository at this point in the history
# Conflicts:
#	dictonaryReqeust.http
  • Loading branch information
Gcolon021 committed Nov 20, 2024
2 parents a7617d5 + 5cf4e51 commit 393cdd2
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 43 deletions.
3 changes: 2 additions & 1 deletion dictonaryReqeust.http
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ Content-Type: application/json
POST http://localhost:80/search
Content-Type: application/json

{"@type":"GeneralQueryRequest","resourceCredentials":{},"query":{"searchTerm":"breast","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":10000000},"resourceUUID":null}
{"@type":"GeneralQueryRequest","resourceCredentials":{},"query":{"searchTerm":"throat sore acute #8","includedTags":[],
"excludedTags":[],"returnTags":"true","offset":0,"limit":100000},"resourceUUID":null}

###

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,37 +82,7 @@ public QueryParamPair generateFilterQuery(Filter filter, Pageable pageable) {
params.addValue("consents", filter.consents());
}
clauses.add(createValuelessNodeFilter(filter.search(), filter.consents()));


String query = "(\n" + String.join("\n\tINTERSECT\n", clauses) + "\n)";
String superQuery = """
WITH q AS (
%s
)
%s
SELECT q.concept_node_id AS concept_node_id, max((1 + rank) * coalesce(rank_adjustment, 1)) AS rank
FROM q
LEFT JOIN allow_filtering ON allow_filtering.concept_node_id = q.concept_node_id
GROUP BY q.concept_node_id
ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC, q.concept_node_id ASC
""".formatted(query, RANK_ADJUSTMENTS);
// explanation of ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC
// you want to sort the best matches first, BUT anything that is marked as unfilterable should be put last
// coalesce will return the first non null value; this solves rows that aren't marked as filterable or not
// I then multiply that by 1 + rank instead of just rank so that a rank value of 0 for an unfilterable var
// is placed below a rank value of 0 for a filterable var
// Finally, I add the concept node id to the sort to keep it stable for ties, otherwise pagination gets weird

if (pageable.isPaged()) {
superQuery = superQuery + """
LIMIT :limit
OFFSET :offset
""";
params.addValue("limit", pageable.getPageSize()).addValue("offset", pageable.getOffset());
}

superQuery = " concepts_filtered_sorted AS (\n" + superQuery + "\n)";

String superQuery = getSuperQuery(pageable, clauses, params);

return new QueryParamPair(superQuery, params);
}
Expand Down Expand Up @@ -184,4 +154,78 @@ facet.name IN (:facets_for_category_%s ) AND facet_category.name = :category_%s
}).toList();
}

public QueryParamPair generateLegacyFilterQuery(Filter filter, Pageable pageable) {
MapSqlParameterSource params = new MapSqlParameterSource();
params.addValue("disallowed_meta_keys", disallowedMetaFields);
List<String> clauses = new java.util.ArrayList<>(List.of());
if (StringUtils.hasLength(filter.search())) {
params.addValue("dynamic_tsquery", filter.search().trim());
}
clauses.add(createDynamicValuelessNodeFilter(filter.search()));
String superQuery = getSuperQuery(pageable, clauses, params);

return new QueryParamPair(superQuery, params);
}

private String createDynamicValuelessNodeFilter(String search) {
String rankQuery = "0 as rank";
String rankWhere = "";
if (StringUtils.hasLength(search)) {
rankQuery = "ts_rank(searchable_fields, to_tsquery(:dynamic_tsquery)) AS rank";
rankWhere = "concept_node.searchable_fields @@ to_tsquery(:dynamic_tsquery) AND";
}
return """
SELECT
concept_node.concept_node_id,
%s
FROM
concept_node
LEFT JOIN dataset ON concept_node.dataset_id = dataset.dataset_id
LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min'
LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max'
LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values'
WHERE
%s
%s
(
continuous_min.value <> '' OR
continuous_max.value <> '' OR
categorical_values.value <> ''
)
"""
.formatted(rankQuery, rankWhere, "");
}

private static String getSuperQuery(Pageable pageable, List<String> clauses, MapSqlParameterSource params) {
String query = "(\n" + String.join("\n\tINTERSECT\n", clauses) + "\n)";
String superQuery = """
WITH q AS (
%s
)
%s
SELECT q.concept_node_id AS concept_node_id, max((1 + rank) * coalesce(rank_adjustment, 1)) AS rank
FROM q
LEFT JOIN allow_filtering ON allow_filtering.concept_node_id = q.concept_node_id
GROUP BY q.concept_node_id
ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC, q.concept_node_id ASC
""".formatted(query, RANK_ADJUSTMENTS);
// explanation of ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC
// you want to sort the best matches first, BUT anything that is marked as unfilterable should be put last
// coalesce will return the first non null value; this solves rows that aren't marked as filterable or not
// I then multiply that by 1 + rank instead of just rank so that a rank value of 0 for an unfilterable var
// is placed below a rank value of 0 for a filterable var
// Finally, I add the concept node id to the sort to keep it stable for ties, otherwise pagination gets weird

if (pageable.isPaged()) {
superQuery = superQuery + """
LIMIT :limit
OFFSET :offset
""";
params.addValue("limit", pageable.getPageSize()).addValue("offset", pageable.getOffset());
}

superQuery = " concepts_filtered_sorted AS (\n" + superQuery + "\n)";
return superQuery;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,59 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import edu.harvard.dbmi.avillach.dictionary.filter.Filter;
import edu.harvard.dbmi.avillach.dictionary.filter.FilterProcessor;
import edu.harvard.dbmi.avillach.dictionary.legacysearch.model.LegacySearchQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.domain.PageRequest;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

@Component
public class LegacySearchQueryMapper {

private static final ObjectMapper objectMapper = new ObjectMapper();
private final FilterProcessor filterProcessor;
private static final Logger log = LoggerFactory.getLogger(LegacySearchQueryMapper.class);

public LegacySearchQueryMapper(FilterProcessor filterProcessor) {
this.filterProcessor = filterProcessor;
}
public LegacySearchQueryMapper() {}

public LegacySearchQuery mapFromJson(String jsonString) throws IOException {
JsonNode rootNode = objectMapper.readTree(jsonString);
JsonNode queryNode = rootNode.get("query");

String searchTerm = queryNode.get("searchTerm").asText();
int limit = queryNode.get("limit").asInt();
Filter filter = filterProcessor.processsFilter(new Filter(List.of(), searchTerm, List.of()));
return new LegacySearchQuery(filter, PageRequest.of(0, limit));
searchTerm = constructTsQuery(searchTerm);
log.debug("Constructed Search Term: {}", searchTerm);
return new LegacySearchQuery(new Filter(List.of(), searchTerm, List.of()), PageRequest.of(0, limit));
}

// An attempt to provide OR search that will produce similar results to legacy search-prototype
private String constructTsQuery(String searchTerm) {
// Split on the | to enable or queries
String[] orGroups = searchTerm.split("\\|");
List<String> orClauses = new ArrayList<>();

for (String group : orGroups) {
// To replicate legacy search we will split using its regex [\\s\\p{Punct}]+
String[] tokens = group.trim().split("[\\s\\p{Punct}]+");

// Now we will combine the tokens in this group and '&' them together.
String andClause = Arrays.stream(tokens).filter(token -> !token.isBlank()) // remove empty tokens.
.map(token -> token + ":*") // add the wild card for search
.collect(Collectors.joining(" & "));

if (!andClause.isBlank()) {
orClauses.add(andClause);
}
}


return String.join(" | ", orClauses);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public LegacySearchRepository(
}

public List<SearchResult> getLegacySearchResults(Filter filter, Pageable pageable) {
QueryParamPair filterQ = filterGen.generateFilterQuery(filter, pageable);
QueryParamPair filterQ = filterGen.generateLegacyFilterQuery(filter, pageable);
String sql = ALLOW_FILTERING_Q + ", " + filterQ.query() + """
SELECT concept_node.concept_path AS conceptPath,
concept_node.display AS display,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,38 @@ void shouldGetLegacyResponseByStudyID() throws IOException {
searchResults.forEach(searchResult -> Assertions.assertEquals("phs000007", searchResult.result().studyId()));
}

@Test
void shouldHandleORRequest() throws IOException {
String jsonString = """
{"query":{"searchTerm":"age","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}}
""";

ResponseEntity<LegacyResponse> legacyResponseResponseEntity = legacySearchController.legacySearch(jsonString);
Assertions.assertEquals(HttpStatus.OK, legacyResponseResponseEntity.getStatusCode());
LegacyResponse legacyResponseBody = legacyResponseResponseEntity.getBody();
Assertions.assertNotNull(legacyResponseBody);
Results results = legacyResponseBody.results();
List<SearchResult> ageSearchResults = results.searchResults();
Assertions.assertEquals(4, ageSearchResults.size());

jsonString = """
{"query":{"searchTerm":"physical|age","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}}
""";

legacyResponseResponseEntity = legacySearchController.legacySearch(jsonString);
Assertions.assertEquals(HttpStatus.OK, legacyResponseResponseEntity.getStatusCode());
legacyResponseBody = legacyResponseResponseEntity.getBody();
Assertions.assertNotNull(legacyResponseBody);
results = legacyResponseBody.results();
List<SearchResult> physicalORAgeSearchResults = results.searchResults();
Assertions.assertEquals(5, physicalORAgeSearchResults.size());

// Verify that age|physical has expanded the search results
Assertions.assertNotEquals(ageSearchResults.size(), physicalORAgeSearchResults.size());

// Verify the OR statement has more results
Assertions.assertTrue(ageSearchResults.size() < physicalORAgeSearchResults.size());
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ void shouldParseSearchRequest() throws IOException {
Filter filter = legacySearchQuery.filter();
Pageable pageable = legacySearchQuery.pageable();

Assertions.assertEquals("age", filter.search());
Assertions.assertEquals("age:*", filter.search());
Assertions.assertEquals(100, pageable.getPageSize());
}

@Test
void shouldReplaceUnderscore() throws IOException {
void shouldHandlePunct() throws IOException {
String jsonString =
"""
{"query":{"searchTerm":"tutorial-biolincc_digitalis","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}}
Expand All @@ -43,7 +43,35 @@ void shouldReplaceUnderscore() throws IOException {
Filter filter = legacySearchQuery.filter();
Pageable pageable = legacySearchQuery.pageable();

Assertions.assertEquals("tutorial-biolincc/digitalis", filter.search());
Assertions.assertEquals("tutorial:* & biolincc:* & digitalis:*", filter.search());
Assertions.assertEquals(100, pageable.getPageSize());
}

@Test
void shouldHandleOR() throws IOException {
String jsonString = """
{"query":{"searchTerm":"sex|gender","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}}
""";

LegacySearchQuery legacySearchQuery = legacySearchQueryMapper.mapFromJson(jsonString);
Filter filter = legacySearchQuery.filter();
Pageable pageable = legacySearchQuery.pageable();

Assertions.assertEquals("sex:* | gender:*", filter.search());
Assertions.assertEquals(100, pageable.getPageSize());
}

@Test
void shouldHandleORAndPunct() throws IOException {
String jsonString = """
{"query":{"searchTerm":"sex|gender age","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}}
""";

LegacySearchQuery legacySearchQuery = legacySearchQueryMapper.mapFromJson(jsonString);
Filter filter = legacySearchQuery.filter();
Pageable pageable = legacySearchQuery.pageable();

Assertions.assertEquals("sex:* | gender:* & age:*", filter.search());
Assertions.assertEquals(100, pageable.getPageSize());
}

Expand Down

0 comments on commit 393cdd2

Please sign in to comment.