Skip to content

Commit

Permalink
Create extract_ordo_mappings()
Browse files Browse the repository at this point in the history
  • Loading branch information
allenbaron committed Feb 13, 2024
1 parent 2ac4cb3 commit b041d85
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 1 deletion.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export(elucidate)
export(extract_as_tidygraph)
export(extract_class_axiom)
export(extract_eq_axiom)
export(extract_ordo_mappings)
export(extract_pm_date)
export(extract_pmid)
export(extract_subclass_axiom)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ requiring downloads.
### New
* `download_omim()` downloads official API-key requiring files directly from
OMIM (e.g. mim2gene.txt, phenotypicSeries.txt, etc.).

* `extract_ordo_mappings()` extracts mappings from Orphanet Rare Disease Ontology, in native format as `oboInOwl:hasDbXref` with Orphanet's text-based predicate modifiers, or as SKOS (supplemented with filler `doid:` predicates where `SKOS` predicates don't exist.


# DO.utils 0.3.1
Expand Down
71 changes: 71 additions & 0 deletions R/extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,74 @@ extract_as_tidygraph <- function(x, query = NULL, collapse_method = "first",

tg
}


#' Extract mappings from ORDO
#'
#' Extract mappings from the Orphanet Rare Disease Ontology (ORDO). ORDO uses
#' `oboInOwl:hasDbXref` for mapping with annotations to indicate
#' exact/broad/narrow-ness. Utilizes [robot()].
#'
#' @param ordo_path The path to the ORDO file, as a string.
#' @param as_skos Whether to convert ORDO's annotated `oboInOwl:hasDbXref`
#' mappings to their
#' [Simple Knowledge Organization System (SKOS)](https://www.w3.org/TR/2009/REC-skos-reference-20090818/)
#' equivalents, as a boolean (default: `TRUE`).
#'
#' The ORDO-skos equivalent predicates are as follows:
#'
#' * `"BTNT"` - `skos:narrowMatch`
#' * `"NTBT"` - `skos:broadMatch`
#' * `"E"` - `skos:exactMatch`
#' * `"ND"` - `doid:undefinedMatch` (supplements SKOS)
#' * `"W"` - `doid:notMatch` (supplements SKOS)
#'
#' @param output The path where output will be written, as a string. If `NULL`
#' (default), the data will be read into R and not saved to a file.
#' @inheritDotParams tidy_sparql -query_res
#' @returns
#' If `output` is specified, the path to the output file with the data,
#' otherwise, a `tibble` with the data.
#'
#' ORDO mappings data will be formatted according to the
#' [SSSOM](https://github.com/mapping-commons/sssom) specification,
#' with an additional `status` column indicating the status (active, deprecated,
#' etc.) of each ORPHA term.
#'
#' If `as_skos = FALSE`, ORDO's text-based `oboInOwl:hasDbXref` annotations
#' denoting the type of relationship the Xref represents (simple text code only)
#' will be included in the `predicate_modifier` column.
#'
#' @export
extract_ordo_mappings <- function(ordo_path, as_skos = TRUE, output = NULL,
...) {
if (isTRUE(as_skos)) {
q_nm <- "mapping-ordo-skos.rq"
} else {
q_nm <- "mapping-ordo.rq"
}

q_file <- system.file("sparql", q_nm, package = "DO.utils", mustWork = TRUE)

if (is.null(output)) {
to_stdout <- TRUE
output <- tmp_out <- tempfile(fileext = ".tsv")
on.exit(unlink(tmp_out))
} else {
to_stdout <- FALSE
}

robot_query(input = ordo_path, query = q_file, output)

if (to_stdout) {
out <- readr::read_tsv(
output,
col_types = readr::cols(.default = readr::col_character())
)
out <- tidy_sparql(out, ...)
} else {
out <- output
}

out
}
48 changes: 48 additions & 0 deletions inst/sparql/mapping-ordo-skos.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# SPARQL query: Extract all ORDO mappings to SSSOM (with SKOS-conversion)
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX ORDO: <http://www.orpha.net/ORDO/Orphanet_>
PREFIX doid: <http://purl.obolibrary.org/obo/doid#>

SELECT ?subject_id ?subject_label ?predicate_id ?object_id ?status
WHERE {
?class skos:notation ?subject_id ;
rdfs:label ?subject_label ;
oboInOwl:hasDbXref ?object_id .
FILTER(STRSTARTS(?subject_id, "ORPHA:"))

OPTIONAL {
?xref_annot owl:annotatedSource ?class ;
owl:annotatedProperty oboInOwl:hasDbXref ;
owl:annotatedTarget ?object_id ;
obo:ECO_0000218 ?xref_eco .

BIND(
REPLACE(
str(?xref_eco), # convert to string to avoid possible language tag mismatches
".*(E|NTBT|BTNT|W|ND)(.|\\n)*",
"$1"
) AS ?xref_type
)

VALUES (?xref_type ?predicate_id) {
("E" skos:exactMatch)
("BTNT" skos:narrowMatch)
("NTBT" skos:broadMatch)
("ND" doid:undefinedMatch)
("W" doid:notMatch)
}
}

OPTIONAL {
?class rdfs:subClassOf* ?inactive .
?inactive rdfs:subClassOf ORDO:C041 ;
rdfs:label ?inactive_label .
}

# clean up output: F where missing and no language tags
BIND( if( BOUND( ?inactive_label ), ?inactive_label, "active") AS ?status)
}
40 changes: 40 additions & 0 deletions inst/sparql/mapping-ordo.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# SPARQL query: Extract all ORDO mappings to SSSOM
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX ORPHA: <http://www.orpha.net/ORDO/Orphanet_>

SELECT ?subject_id ?subject_label ?predicate_id ?predicate_modifier ?object_id ?status
WHERE {
VALUES ?predicate_id { oboInOwl:hasDbXref }
?class skos:notation ?subject_id ;
rdfs:label ?subject_label ;
?predicate_id ?object_id .
FILTER(STRSTARTS(?subject_id, "ORPHA:"))

OPTIONAL {
?xref_annot owl:annotatedSource ?class ;
owl:annotatedProperty ?predicate_id ;
owl:annotatedTarget ?object_id ;
obo:ECO_0000218 ?xref_eco .

BIND(
REPLACE(
str(?xref_eco), # convert to string to avoid possible language tag mismatches
".*(E|NTBT|BTNT|W|ND)(.|\\n)*",
"$1"
) AS ?predicate_modifier
)
}

OPTIONAL {
?class rdfs:subClassOf* ?inactive .
?inactive rdfs:subClassOf ORPHA:C041 ;
rdfs:label ?inactive_label .
}

# clean up output: F where missing and no language tags
BIND( if( BOUND( ?inactive_label ), ?inactive_label, "active") AS ?status)
}
55 changes: 55 additions & 0 deletions man/extract_ordo_mappings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b041d85

Please sign in to comment.