Skip to content

Commit

Permalink
Merge pull request #17 from 4dn-dcic/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
SooLee authored Apr 5, 2017
2 parents 7a7cbcc + 3c80792 commit 9b089b8
Show file tree
Hide file tree
Showing 37 changed files with 177 additions and 60 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Rpairix
Title: Rpairix
Version: 0.1.1
Version: 0.1.2
Authors@R: person("Soo", "Lee", email = "duplexa@gmail.com", role = c("aut", "cre"))
Description: R binder for pairix, tool for querying a pair of genomic ranges in a pairs file (pairix-indexed bgzipped text file)
Depends:
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(px_endpos1_col)
export(px_endpos2_col)
export(px_exists)
export(px_getChar)
export(px_get_column_names)
export(px_keylist)
export(px_query)
export(px_seq1list)
Expand All @@ -21,6 +22,7 @@ useDynLib(Rpairix,check_1d_vs_2d)
useDynLib(Rpairix,getChar)
useDynLib(Rpairix,get_chr1_col)
useDynLib(Rpairix,get_chr2_col)
useDynLib(Rpairix,get_column_names)
useDynLib(Rpairix,get_endpos1_col)
useDynLib(Rpairix,get_endpos2_col)
useDynLib(Rpairix,get_keylist)
Expand Down
2 changes: 1 addition & 1 deletion R/px_chr1_col.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' res = px_chr1_col(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_chr1_col(filename)
#' print(res)
#'
Expand Down
2 changes: 1 addition & 1 deletion R/px_chr2_col.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' res = px_chr2_col(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_chr2_col(filename)
#' print(res)
#'
Expand Down
2 changes: 1 addition & 1 deletion R/px_endpos1_col.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' res = px_endpos1_col(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_endpos1_col(filename)
#' print(res)
#'
Expand Down
2 changes: 1 addition & 1 deletion R/px_endpos2_col.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' res = px_endpos2_col(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_endpos2_col(filename)
#' print(res)
#'
Expand Down
2 changes: 1 addition & 1 deletion R/px_exists.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' res = px_exists(filename, key)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' key = "10|20"
#' res = px_exists(filename, key)
#' print(res)
Expand Down
17 changes: 17 additions & 0 deletions R/px_get_column_names.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#' Column name retrival function on pairix-indexed pairs file.
#'
#' This function returns a vector of column names for a pairs format.
#'
#' @param filename a pairs file, or a bgzipped text file (sometextfile.gz) with an index file sometextfile.gz.px2 in the same folder.
#'
#' @keywords pairix names
#' @export px_get_column_names
#' @examples
#' filename = system.file(".","test_4dn.pairs.gz", package="Rpairix")
#' cols = px_get_column_names(filename)
#' print(cols)
#' @useDynLib Rpairix get_column_names
px_get_column_names<-function(filename){
out = .Call("get_column_names", filename)
if(!is.null(out)) { cols=strsplit(out, ' ')[[1]]; return(cols[2:length(cols)]); } else { return(NULL); }
}
2 changes: 1 addition & 1 deletion R/px_keylist.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' res = px_keylist(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_keylist(filename)
#' print(res)
#'
Expand Down
9 changes: 7 additions & 2 deletions R/px_query.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param linecount.only If TRUE, the function returns an integer corresponding to the number of output lines instead of the actual query result. (default FALSE)
#' @param autoflip If TRUE, the function will rerun on a flipped query (mate1 and mate2 swapped) if the original query results in an empty output. (default FALSE). If linecount.only option is used in combination with autoflip, the result count is on the flipped query in case the query gets flipped.
#'
#' @return data frame containing the query result. Column names are added if indexing was done with a pairs preset.
#' @keywords pairix query 2D
#' @export px_query
#' @examples
Expand All @@ -31,7 +32,7 @@
#' res = px_query(filename, querystr, autoflip=TRUE)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz", package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz", package="Rpairix")
#' querystr = "10:1-1000000|20"
#' res = px_query(filename, querystr)
#' print(res)
Expand Down Expand Up @@ -96,7 +97,11 @@ px_query<-function(filename, querystr, max_mem=100000000, stringsAsFactors=FALSE
if(out2[[2]][1] == -1) return(NULL) ## error

## tabularize
res.table = as.data.frame(do.call("rbind",strsplit(out2[[1]],'\t')),stringsAsFactors=stringsAsFactors)
##res.table = as.data.frame(do.call("rbind",strsplit(out2[[1]],'\t')),stringsAsFactors=stringsAsFactors)
res.table = as.data.frame(do.call("rbind",out2[[1]]),stringsAsFactors=stringsAsFactors)
cols = px_get_column_names(filename)
if(!is.null(cols) && length(cols)==ncol(res.table)) colnames(res.table)=cols;

return (res.table)
}

Expand Down
2 changes: 1 addition & 1 deletion R/px_seq1list.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' res = px_seq1list(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_seq1list(filename)
#' print(res)
px_seq1list<-function(filename){
Expand Down
2 changes: 1 addition & 1 deletion R/px_seq2list.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' res = px_seq2list(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_seq2list(filename)
#' print(res)
px_seq2list<-function(filename){
Expand Down
2 changes: 1 addition & 1 deletion R/px_seqlist.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' res = px_seqlist(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_seqlist(filename)
#' print(res)
px_seqlist<-function(filename){
Expand Down
2 changes: 1 addition & 1 deletion R/px_startpos1_col.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' res = px_startpos1_col(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_startpos1_col(filename)
#' print(res)
#'
Expand Down
2 changes: 1 addition & 1 deletion R/px_startpos2_col.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' res = px_startpos2_col(filename)
#' print(res)
#'
#' filename = system.file(".","merged_nodup.tab.chrblock_sorted.txt.gz",package="Rpairix")
#' filename = system.file(".","merged_nodups.space.chrblock_sorted.subsample1.txt.gz",package="Rpairix")
#' res = px_startpos2_col(filename)
#' print(res)
#'
Expand Down
36 changes: 28 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ R --no-site-file --no-environ --no-save --no-restore CMD INSTALL --install-tests
To install a specific version,
```
library(devtools)
install_url("https://github.com/4dn-dcic/Rpairix/archive/0.1.1.zip")
install_url("https://github.com/4dn-dcic/Rpairix/archive/0.1.2.zip")
```


## Available R functions
`px_build_index`, `px_query`, `px_keylist`, `px_seqlist`, `px_seq1list`, `px_seq2list`, `px_exists`, `px_chr1_col`, `px_chr2_col`, `px_startpos1_col`, `px_startpos2_col`, `px_endpos1_col`, `px_endpos2_col`, `px_check_dim`
`px_build_index`, `px_query`, `px_keylist`, `px_seqlist`, `px_seq1list`, `px_seq2list`, `px_exists`, `px_chr1_col`, `px_chr2_col`, `px_startpos1_col`, `px_startpos2_col`, `px_endpos1_col`, `px_endpos2_col`, `px_check_dim`, `px_get_column_names`

## Usage
```
Expand All @@ -59,6 +59,7 @@ px_startpos2_col(filename) # 1-based column index for mate2 start position
px_endpos1_col(filename) # 1-based column index for mate1 end position
px_endpos2_col(filename) # 1-based column index for mate2 end position
px_check_dim(filename) # returns 1 if the file is 1D-indexed, 2 if 2D-indexed. -1 if error.
px_get_column_names(filename) # returns a vector of column names, if available. (works only for pairs format)
```

### Indexing
Expand Down Expand Up @@ -147,6 +148,16 @@ px_check_dim(filename)
* `filename` is sometextfile.gz and an index file sometextfile.gz.px2 must exist.
* The return value is an integer; 1 if the input file is 1D-indexed, 2 if 2D-indexed, -1 if an error occurred.

### Getting column names
```
px_get_column_names(filename)
```
* `filename` is sometextfile.gz and an index file sometextfile.gz.px2 must exist
* The return value is a vector of column names.
* Returns values only if the indexing must have been done with 'pairs' preset (either explicitly by setting a preset or by file extension recognition) and if the column heading information is available.

***

## Example run
```
> library(Rpairix)
Expand All @@ -162,9 +173,9 @@ px_check_dim(filename)
> querystr = "chr10:1-3000000|chr20"
> res = px_query(filename,querystr)
> print(res)
V1 V2 V3 V4 V5 V6 V7
1 SRR1658581.51740952 chr10 157600 chr20 167993 - -
2 SRR1658581.33457260 chr10 2559777 chr20 7888262 - +
readID chr1 pos1 chr2 pos2 strand1 strand2
1 SRR1658581.51740952 chr10 157600 chr20 167993 - -
2 SRR1658581.33457260 chr10 2559777 chr20 7888262 - +
>
> # line-count-only
> n = px_query(filename,querystr, linecount.only=TRUE)
Expand All @@ -175,9 +186,9 @@ px_check_dim(filename)
> px_query("inst/test_4dn.pairs.gz","chr20|chr10:1-3000000")
data frame with 0 columns and 0 rows
> px_query("inst/test_4dn.pairs.gz","chr20|chr10:1-3000000", autoflip=TRUE)
V1 V2 V3 V4 V5 V6 V7
1 SRR1658581.51740952 chr10 157600 chr20 167993 - -
2 SRR1658581.33457260 chr10 2559777 chr20 7888262 - +
readID chr1 pos1 chr2 pos2 strand1 strand2
1 SRR1658581.51740952 chr10 157600 chr20 167993 - -
2 SRR1658581.33457260 chr10 2559777 chr20 7888262 - +
> px_query("inst/test_4dn.pairs.gz","chr20|chr10:1-3000000", linecount.only=TRUE)
[1] 0
> px_query("inst/test_4dn.pairs.gz","chr20|chr10:1-3000000", autoflip=TRUE, linecount.only=TRUE)
Expand Down Expand Up @@ -227,6 +238,10 @@ data frame with 0 columns and 0 rows
> # checking if the file is 1D-indexed or 2D-indexed
> px_check_dim("inst/test_4dn.pairs.gz")
[1] 2
>
> # get column names
> px_get_column_names("inst/test_4dn.pairs.gz")
[1] "readID" "chr1" "pos1" "chr2" "pos2" "strand1" "strand2"
```


Expand All @@ -241,6 +256,11 @@ Individual R functions are written and documented in `R/`. The `src/rpairixlib.c


## Version history
### 0.1.2
* Function `px_get_column_names` is now added.
* `px_query` now adds column names for the query result if indexing was done with pairs preset.
* `px_query`: problem of merged_nodups query result not splitting by space is now fixed.

### 0.1.1
* `px_build_index`: When neither `preset` nor a custom set of columns is given, file extensions are automatically recognized for indexing.

Expand Down
2 changes: 1 addition & 1 deletion Rpairix.Rproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version: 0.1.1
Version: 0.1.2

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
Binary file removed inst/merged_nodup.tab.chrblock_sorted.txt.gz
Binary file not shown.
Binary file removed inst/merged_nodup.tab.chrblock_sorted.txt.gz.px2
Binary file not shown.
Binary file modified inst/test_4dn.pairs.gz.px2
Binary file not shown.
2 changes: 1 addition & 1 deletion man/px_chr1_col.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/px_chr2_col.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/px_endpos1_col.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/px_endpos2_col.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/px_exists.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/px_get_column_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/px_keylist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions man/px_query.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/px_seq1list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/px_seq2list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9b089b8

Please sign in to comment.