Skip to content

Commit

Permalink
Make read_omim() header identification more reliable
Browse files Browse the repository at this point in the history
Dependent on match of "mim number" (case-insensitive) and less
dependent on the presence of tabs (since files opened with Excel
will have tabs added to all lines, resulting in failure).
  • Loading branch information
allenbaron committed Feb 13, 2024
1 parent bb1f6da commit e7da978
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions R/read_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ preprocess_omim_dl <- function(file, ...) {
)
)
if (is_official && was_generated) {
# get header (last commented out line)
header_n <- which(stringr::str_detect(.lines, "^[^#]"))[1] - 1
# get header
header_n <- identify_omim_header_row(.lines)
header <- .lines[header_n] %>%
stringr::str_remove("^# *") %>%
stringr::str_split_1("\t")
Expand Down Expand Up @@ -159,9 +159,12 @@ preprocess_omim_dl <- function(file, ...) {
}

identify_omim_header_row <- function(.lines) {
dl_stmt <- stringr::str_detect(.lines, "Downloaded")
mim_number <- stringr::str_detect(
.lines,
stringr::regex("mim num", ignore_case = TRUE)
)
tab_separated <- stringr::str_count(.lines, "\t") > 0

header_n <- which(tab_separated & !dl_stmt)[1]
header_n <- which(tab_separated & mim_number)[1]
header_n
}

0 comments on commit e7da978

Please sign in to comment.