Skip to content

Commit

Permalink
index structure comparible with pairix/pypairix 0.3.6
Browse files Browse the repository at this point in the history
  • Loading branch information
SooLee committed Apr 30, 2018
1 parent 73434f6 commit 08dd7fd
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 16 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Rpairix
Title: Rpairix
Version: 0.3.5
Version: 0.3.6
Authors@R: person("Soo", "Lee", email = "duplexa@gmail.com", role = c("aut", "cre"))
Description: R binder for pairix, tool for querying a pair of genomic ranges in a pairs file (pairix-indexed bgzipped text file)
Depends:
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ R --no-site-file --no-environ --no-save --no-restore CMD INSTALL --install-tests
To install a specific version,
```r
library(devtools)
install_url("https://github.com/4dn-dcic/Rpairix/archive/0.3.5.zip")
install_url("https://github.com/4dn-dcic/Rpairix/archive/0.3.6.zip")
```


Expand Down Expand Up @@ -325,6 +325,9 @@ Individual R functions are written and documented in `R/`. The `src/rpairixlib.c
***

## Version history
### 0.3.6
* Index structure is consistent with pairix/pypairix 0.3.6. This new structure resolves integer overflow issues for linecount. the older indices can be read and used otherwise. (backward-compatible)

### 0.3.5
* Index structure and C source codes are consistent with pairix/pypairix 0.3.5. This new structure can deal with large chromosomes (>length 2^29). The older index can be read and used for regular chromosomes (<2^29) (backward-compatible).

Expand Down
Binary file modified inst/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2
Binary file not shown.
Binary file modified inst/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2
Binary file not shown.
Binary file modified inst/test_4dn.pairs.gz.px2
Binary file not shown.
34 changes: 22 additions & 12 deletions src/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
int TAD_LIDX_SHIFT = TAD_LIDX_SHIFT_LARGE_CHR;
int MAX_CHR = MAX_CHR_LARGE_CHR;

#define MAGIC_NUMBER "PX2.003\1"
#define MAGIC_NUMBER "PX2.004\1"
#define OLD_MAGIC_NUMBER2 "PX2.003\1" // magic number for older version of pairix (0.3.4 - 0.3.5)
#define OLD_MAGIC_NUMBER "PX2.002\1" // magic number for older version of pairix (up to 0.3.3)


Expand Down Expand Up @@ -56,7 +57,7 @@ struct __ti_index_t {
khash_t(s) *tname;
khash_t(i) **index;
ti_lidx_t *index2;
int linecount;
uint64_t linecount;
};

struct __ti_iter_t {
Expand Down Expand Up @@ -490,9 +491,9 @@ void ti_index_save(const ti_index_t *idx, BGZF *fp)
bgzf_write(fp, bam_swap_endian_4p(&x), 4);
} else bgzf_write(fp, &idx->n, 4);
if (ti_is_be) {
uint32_t x = idx->linecount;
bgzf_write(fp, bam_swap_endian_4p(&x), 4);
} else bgzf_write(fp, &idx->linecount, 4);
uint64_t x = idx->linecount;
bgzf_write(fp, bam_swap_endian_8p(&x), 8);
} else bgzf_write(fp, &idx->linecount, 8);
assert(sizeof(ti_conf_t) == 40);
if (ti_is_be) { // write ti_conf_t;
uint32_t x[6];
Expand Down Expand Up @@ -574,19 +575,28 @@ static ti_index_t *ti_index_load_core(BGZF *fp)
}
bgzf_read(fp, magic, 8);
if (strncmp(magic, MAGIC_NUMBER, 8)) {
if (strncmp(magic, OLD_MAGIC_NUMBER, 8)) {
fprintf(stderr, "[ti_index_load] wrong magic number. Re-index if your index file was created by an earlier version of pairix.\n");
return 0;
} else {
if (strncmp(magic, OLD_MAGIC_NUMBER, 8)==0) {
TAD_LIDX_SHIFT = TAD_LIDX_SHIFT_ORIGINAL;
MAX_CHR = MAX_CHR_ORIGINAL;
}
else if(strncmp(magic, OLD_MAGIC_NUMBER2, 8)==0) {
}
else {
fprintf(stderr, "[ti_index_load] wrong magic number. Re-index if your index file was created by an earlier version of pairix.\n");
return 0;
}
}
idx = (ti_index_t*)calloc(1, sizeof(ti_index_t));
bgzf_read(fp, &idx->n, 4);
if (ti_is_be) bam_swap_endian_4p(&idx->n);
bgzf_read(fp, &idx->linecount, 4);
if (ti_is_be) bam_swap_endian_4p(&idx->linecount);
if(strncmp(magic, MAGIC_NUMBER, 8)==0) {
bgzf_read(fp, &idx->linecount, 8);
if (ti_is_be) bam_swap_endian_8p(&idx->linecount);
}
else if(strncmp(magic, OLD_MAGIC_NUMBER2, 8)==0 || strncmp(magic, OLD_MAGIC_NUMBER, 8)==0) {
bgzf_read(fp, &idx->linecount, 4);
if (ti_is_be) bam_swap_endian_4p(&idx->linecount);
}
idx->tname = kh_init(s);
idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
idx->index2 = (ti_lidx_t*)calloc(idx->n, sizeof(ti_lidx_t));
Expand Down Expand Up @@ -978,7 +988,7 @@ ti_iter_t ti_iter_first()
}


int get_linecount(const ti_index_t *idx)
uint64_t get_linecount(const ti_index_t *idx)
{
return(idx->linecount);
}
Expand Down
4 changes: 2 additions & 2 deletions src/pairix.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#ifndef __TABIDX_H
#define __TABIDX_H

#define PACKAGE_VERSION "0.3.5"
#define PACKAGE_VERSION "0.3.6"

#include <stdint.h>
#include "kstring.h"
Expand Down Expand Up @@ -151,7 +151,7 @@ extern "C" {
const char **ti_seqname(const ti_index_t *idx, int *n);

/* get linecount */
int get_linecount(const ti_index_t *idx);
uint64_t get_linecount(const ti_index_t *idx);

/* get file offset
* returns number of bgzf blocks spanning a sequence (pair) */
Expand Down

0 comments on commit 08dd7fd

Please sign in to comment.