Skip to content

Commit

Permalink
re
Browse files Browse the repository at this point in the history
  • Loading branch information
jaytimm committed Apr 16, 2024
0 parents commit 518260f
Show file tree
Hide file tree
Showing 34 changed files with 1,641 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.github$
LICENSE.md
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
49 changes: 49 additions & 0 deletions .github/workflows/check-standard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: R-CMD-check

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
48 changes: 48 additions & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
release:
types: [published]
workflow_dispatch:

name: pkgdown

jobs:
pkgdown:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::pkgdown, local::.
needs: website

- name: Build site
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
shell: Rscript {0}

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@v4.4.1
with:
clean: false
branch: gh-pages
folder: docs
50 changes: 50 additions & 0 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: test-coverage

jobs:
test-coverage:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::covr
needs: coverage

- name: Test coverage
run: |
covr::codecov(
quiet = FALSE,
clean = FALSE,
install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
)
shell: Rscript {0}

- name: Show testthat output
if: always()
run: |
## --------------------------------------------------------------------
find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
shell: bash

- name: Upload test results
if: failure()
uses: actions/upload-artifact@v3
with:
name: coverage-test-failures
path: ${{ runner.temp }}/package
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
README.Rmd
25 changes: 25 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Package: puremoe
Type: Package
Title: Pubmed Unified REtrieval for Multi-Output Exploration
Version: 1.0.0
Author: Jason Timm [aut, cre]
Maintainer: Jason Timm <JaTimm@salud.unm.edu>
Description: An R package for accessing a variety of PubMed data, including abstracts, bibliometrics, pubtations, and full-text records, through a single, user-friendly interface.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: false
Depends:
R (>= 3.5)
Imports:
rentrez,
textshape,
xml2,
data.table,
httr,
pbapply,
jsonlite,
rappdirs
Suggests:
knitr,
rmarkdown
RoxygenNote: 7.3.1
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2022
COPYRIGHT HOLDER: Jason Timm
21 changes: 21 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# MIT License

Copyright (c) 2022 Jason Timm

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
32 changes: 32 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Generated by roxygen2: do not edit by hand

export(data_mesh_embeddings)
export(data_mesh_thesuarus)
export(data_mesh_trees)
export(data_pharm_action)
export(data_pmc_list)
export(get_records)
export(search_pubmed)
import(data.table)
importFrom(data.table,fread)
importFrom(data.table,rbindlist)
importFrom(data.table,setDT)
importFrom(httr,GET)
importFrom(httr,content)
importFrom(jsonlite,stream_in)
importFrom(parallel,clusterExport)
importFrom(parallel,detectCores)
importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(pbapply,pblapply)
importFrom(rappdirs,user_data_dir)
importFrom(rentrez,entrez_fetch)
importFrom(rentrez,entrez_search)
importFrom(utils,download.file)
importFrom(utils,read.csv)
importFrom(utils,untar)
importFrom(xml2,read_xml)
importFrom(xml2,xml_children)
importFrom(xml2,xml_find_all)
importFrom(xml2,xml_find_first)
importFrom(xml2,xml_text)
71 changes: 71 additions & 0 deletions R/data_mesh_embeddings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#' Download and Process Mesh and SCR Embeddings
#'
#' This function downloads Mesh and SCR embeddings data from the specified URLs and processes it for use.
#' The data is saved locally in RDS format. If the files do not exist, they will be downloaded and processed.
#'
#' @return A data frame containing the processed Mesh and SCR embeddings data.
#'
#' @importFrom rappdirs user_data_dir
#' @importFrom utils download.file
#' @export
#' @examples
#' \donttest{
#' if (interactive()) {
#' # Code that downloads data or performs other interactive-only operations
#' data <- data_mesh_embeddings()
#' }
#' }

#'
data_mesh_embeddings <- function() {

# Define the URLs for Mesh and SCR embeddings data
sf <- 'https://github.com/jaytimm/mesh-builds/blob/main/data/data_mesh_embeddings.rds?raw=true'
sf2 <- 'https://github.com/jaytimm/mesh-builds/blob/main/data/data_scr_embeddings.rds?raw=true'

# Define local file paths for storing the processed data
df <- file.path(rappdirs::user_data_dir('puremoe'), 'data_mesh_embeddings.rds')
df2 <- file.path(rappdirs::user_data_dir('puremoe'), 'data_scr_embeddings.rds')

# Check if the directory for data storage exists, and create it if not
if (!dir.exists(rappdirs::user_data_dir('puremoe'))) {
dir.create(rappdirs::user_data_dir('puremoe'), recursive = TRUE)
}

# Download and process Mesh embeddings data if it doesn't exist
if (!file.exists(df)) {
message('Downloading the Mesh embeddings ...')
out <- tryCatch({
utils::download.file(sf, df)
}, error = function(e) paste("Error"))

if (out == 'Error') {
message('Download not completed ... Try options(timeout = 600)')
file.remove(df)
}
}

# Download and process SCR embeddings data if it doesn't exist
if (!file.exists(df2)) {
message('Downloading the SCR embeddings ...')
out <- tryCatch({
utils::download.file(sf2, df2)
}, error = function(e) paste("Error"))

if (out == 'Error') {
message('Download not completed ... Try options(timeout = 600)')
file.remove(df2)
}
}

# If both files exist, read and combine them
if (all(file.exists(df), file.exists(df2))) {
a1 <- readRDS(df)
a2 <- readRDS(df2)

result <- rbind(a1, a2)
return(result)
}

return(NULL)
}
59 changes: 59 additions & 0 deletions R/data_mesh_thesaurus.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Download and Combine MeSH and Supplemental Thesauruses
#'
#' This function downloads and combines the MeSH (Medical Subject Headings) Thesaurus
#' and a supplemental concept thesaurus for use in biomedical research and analysis.
#' The data is sourced from specified URLs and stored locally for subsequent use.
#' @param force_download A logical value indicating whether to force re-downloading
#' of the data even if it already exists locally.
#' @return A data.table containing the combined MeSH and supplemental thesaurus data.
#' @importFrom rappdirs user_data_dir
#' @importFrom utils download.file
#' @importFrom data.table rbindlist
#' @export
#' @examples
#' \donttest{
#' if (interactive()) {
#' # Code that downloads data or performs other interactive-only operations
#' data <- data_mesh_thesaurus()
#' }
#' }


data_mesh_thesuarus <- function(force_download = FALSE) {

# URLs for the MeSH thesaurus and supplemental thesaurus data
sf <- 'https://github.com/jaytimm/mesh-builds/blob/main/data/data_mesh_thesaurus.rds?raw=true'
sf2 <- 'https://github.com/jaytimm/mesh-builds/blob/main/data/data_scr_thesaurus.rds?raw=true'

# Local file paths for storing the downloaded data
df <- file.path(rappdirs::user_data_dir('puremoe'), 'data_mesh_thesuarus.rds')
df2 <- file.path(rappdirs::user_data_dir('puremoe'), 'data_scr_thesuarus.rds')

# Check for the existence of the files or force download
if (!file.exists(df) | force_download) {
# Create the directory if it doesn't exist
if (!dir.exists(rappdirs::user_data_dir('puremoe'))) {
dir.create(rappdirs::user_data_dir('puremoe'), recursive = TRUE)
}

# Download the MeSH thesaurus data
message('Downloading the mesh thesaurus ...')
utils::download.file(sf, df, mode = "wb")
}

# Repeat the process for the supplemental concept thesaurus
if (!file.exists(df2) | force_download) {
message('Downloading the supplemental concept thesaurus ...')
utils::download.file(sf2, df2, mode = "wb")
}

# Read the downloaded RDS files
a1 <- readRDS(df)
a2 <- readRDS(df2)

# Ensure the column names are consistent between the two data sets
colnames(a2) <- colnames(a1)

# Combine the data using data.table's rbindlist
data.table::rbindlist(list(a1, a2))
}
Loading

0 comments on commit 518260f

Please sign in to comment.