diff --git a/.travis.yml b/.travis.yml index 78dd252a..37c16bc4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,8 +14,10 @@ matrix: - sudo python setup.py install - python test/test.py - python test/test_oldindex.py + - python test/test_oldindex2.py - source test/test_c.sh # valgrind test is applied when VALGRIND_TEST_ON=1 - source test/test_c_oldindex.sh # test for old index + - source test/test_c_oldindex2.sh # test for old index - language: c sudo: required compiler: gcc diff --git a/README.md b/README.md index e2891828..be70646c 100644 --- a/README.md +++ b/README.md @@ -639,6 +639,9 @@ ulimit -n 2000 ## Version history +### 0.3.6 +* Line count (`pairix -n`) integer overflow issue has been fixed. The index structure has changed. The index generated by the previous versions (0.2.5~0.3.3, 0.3.4~0.3.5) can be auto-detected and used as well (backward compatible). + ### 0.3.5 * Backward compatibility is added - The index generated by the previous version (0.2.5 ~ 0.3.3) can now be auto-detected and used by Pairix. diff --git a/VERSION.txt b/VERSION.txt index c2c0004f..449d7e73 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -0.3.5 +0.3.6 diff --git a/samples/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz.px2 b/samples/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz.px2 index 6fee4a77..90bae1c4 100644 Binary files a/samples/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz.px2 and b/samples/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz.px2 differ diff --git a/samples/4dn.bsorted.chr21_22_only.pairs.gz.px2 b/samples/4dn.bsorted.chr21_22_only.pairs.gz.px2 index f7269a6f..d30ebdc8 100644 Binary files a/samples/4dn.bsorted.chr21_22_only.pairs.gz.px2 and b/samples/4dn.bsorted.chr21_22_only.pairs.gz.px2 differ diff --git a/samples/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 b/samples/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 index 1ed5f4fc..14f261da 100644 Binary files a/samples/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 and b/samples/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 differ diff --git a/samples/merged_nodup.tab.chrblock_sorted.txt.gz.px2 b/samples/merged_nodup.tab.chrblock_sorted.txt.gz.px2 index e2e26be7..8fb55322 100644 Binary files a/samples/merged_nodup.tab.chrblock_sorted.txt.gz.px2 and b/samples/merged_nodup.tab.chrblock_sorted.txt.gz.px2 differ diff --git a/samples/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 b/samples/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 index 5ef9b921..6f3bd61f 100644 Binary files a/samples/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 and b/samples/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 differ diff --git a/samples/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 b/samples/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 index cdac0e31..bf0af794 100644 Binary files a/samples/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 and b/samples/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 differ diff --git a/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.ff.pairs.gz.px2 b/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.ff.pairs.gz.px2 index d2b71950..2ff6e887 100644 Binary files a/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.ff.pairs.gz.px2 and b/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.ff.pairs.gz.px2 differ diff --git a/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.nofrag.pairs.gz.px2 b/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.nofrag.pairs.gz.px2 index cfdf5521..5a1541d6 100644 Binary files a/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.nofrag.pairs.gz.px2 and b/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.nofrag.pairs.gz.px2 differ diff --git a/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.pairs.gz.px2 b/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.pairs.gz.px2 index d9f1f706..9e9df439 100644 Binary files a/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.pairs.gz.px2 and b/samples/merged_nodups.space.chrblock_sorted.subsample3.bsorted.pairs.gz.px2 differ diff --git a/samples/merged_nodups.space.chrblock_sorted.subsample3.txt.gz.px2 b/samples/merged_nodups.space.chrblock_sorted.subsample3.txt.gz.px2 index 869f036d..4945d9bd 100644 Binary files a/samples/merged_nodups.space.chrblock_sorted.subsample3.txt.gz.px2 and b/samples/merged_nodups.space.chrblock_sorted.subsample3.txt.gz.px2 differ diff --git a/samples/mock.largechr.pairs.gz.px2 b/samples/mock.largechr.pairs.gz.px2 index 97657442..12d1d239 100644 Binary files a/samples/mock.largechr.pairs.gz.px2 and b/samples/mock.largechr.pairs.gz.px2 differ diff --git a/samples/old_index/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 b/samples/old_index/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 index 9d6d6754..85dd1621 100644 Binary files a/samples/old_index/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 and b/samples/old_index/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 differ diff --git a/samples/old_index/merged_nodup.tab.chrblock_sorted.txt.gz.px2 b/samples/old_index/merged_nodup.tab.chrblock_sorted.txt.gz.px2 index 1c18dbc2..8fb55322 100644 Binary files a/samples/old_index/merged_nodup.tab.chrblock_sorted.txt.gz.px2 and b/samples/old_index/merged_nodup.tab.chrblock_sorted.txt.gz.px2 differ diff --git a/samples/old_index/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 b/samples/old_index/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 index 4009236b..bb3a4c71 100644 Binary files a/samples/old_index/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 and b/samples/old_index/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 differ diff --git a/samples/old_index/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 b/samples/old_index/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 index dbd287c2..bf0af794 100644 Binary files a/samples/old_index/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 and b/samples/old_index/merged_nodups.space.chrblock_sorted.subsample2.txt.gz.px2 differ diff --git a/samples/old_index/test_4dn.pairs.gz.px2 b/samples/old_index/test_4dn.pairs.gz.px2 index 9beb9ae3..0517b004 100644 Binary files a/samples/old_index/test_4dn.pairs.gz.px2 and b/samples/old_index/test_4dn.pairs.gz.px2 differ diff --git a/samples/old_index/test_juicer_shortform.bsorted.pairs.gz.px2 b/samples/old_index/test_juicer_shortform.bsorted.pairs.gz.px2 index f7fc1b98..5dabff0d 100644 Binary files a/samples/old_index/test_juicer_shortform.bsorted.pairs.gz.px2 and b/samples/old_index/test_juicer_shortform.bsorted.pairs.gz.px2 differ diff --git a/samples/old_index/test_merged_nodups.bsorted.pairs.gz.px2 b/samples/old_index/test_merged_nodups.bsorted.pairs.gz.px2 index 784343e4..54393567 100644 Binary files a/samples/old_index/test_merged_nodups.bsorted.pairs.gz.px2 and b/samples/old_index/test_merged_nodups.bsorted.pairs.gz.px2 differ diff --git a/samples/old_index/test_merged_nodups.txt.bsorted.gz.px2 b/samples/old_index/test_merged_nodups.txt.bsorted.gz.px2 index dbd287c2..bf0af794 100644 Binary files a/samples/old_index/test_merged_nodups.txt.bsorted.gz.px2 and b/samples/old_index/test_merged_nodups.txt.bsorted.gz.px2 differ diff --git a/samples/old_index/test_old_merged_nodups.bsorted.pairs.gz.px2 b/samples/old_index/test_old_merged_nodups.bsorted.pairs.gz.px2 index f187f8fe..61d0db90 100644 Binary files a/samples/old_index/test_old_merged_nodups.bsorted.pairs.gz.px2 and b/samples/old_index/test_old_merged_nodups.bsorted.pairs.gz.px2 differ diff --git a/samples/old_index/test_old_merged_nodups.txt.bsorted.gz.px2 b/samples/old_index/test_old_merged_nodups.txt.bsorted.gz.px2 index 1c70222a..cfa27a2c 100644 Binary files a/samples/old_index/test_old_merged_nodups.txt.bsorted.gz.px2 and b/samples/old_index/test_old_merged_nodups.txt.bsorted.gz.px2 differ diff --git a/samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz b/samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz new file mode 100644 index 00000000..c8fb6b2d Binary files /dev/null and b/samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz differ diff --git a/samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz.px2 b/samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz.px2 new file mode 100644 index 00000000..6fee4a77 Binary files /dev/null and b/samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz.px2 differ diff --git a/samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz b/samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz new file mode 100644 index 00000000..1a836bb5 Binary files /dev/null and b/samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz differ diff --git a/samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz.px2 b/samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz.px2 new file mode 100644 index 00000000..f7269a6f Binary files /dev/null and b/samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz.px2 differ diff --git a/samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz b/samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz new file mode 100644 index 00000000..3f8268b7 Binary files /dev/null and b/samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz differ diff --git a/samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 b/samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 new file mode 100644 index 00000000..9810f023 Binary files /dev/null and b/samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz.px2 differ diff --git a/samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz b/samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz new file mode 100644 index 00000000..bd16f9e0 Binary files /dev/null and b/samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz differ diff --git a/samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz.px2 b/samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz.px2 new file mode 100644 index 00000000..e2e26be7 Binary files /dev/null and b/samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz.px2 differ diff --git a/samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz b/samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz new file mode 100644 index 00000000..adb18c33 Binary files /dev/null and b/samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz differ diff --git a/samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 b/samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 new file mode 100644 index 00000000..98220bc6 Binary files /dev/null and b/samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz.px2 differ diff --git a/samples/old_index2/mock.largechr.pairs.gz b/samples/old_index2/mock.largechr.pairs.gz new file mode 100644 index 00000000..7e16b5f2 Binary files /dev/null and b/samples/old_index2/mock.largechr.pairs.gz differ diff --git a/samples/old_index2/mock.largechr.pairs.gz.px2 b/samples/old_index2/mock.largechr.pairs.gz.px2 new file mode 100644 index 00000000..97657442 Binary files /dev/null and b/samples/old_index2/mock.largechr.pairs.gz.px2 differ diff --git a/samples/old_index2/test_4dn.pairs.gz b/samples/old_index2/test_4dn.pairs.gz new file mode 100644 index 00000000..1a4bcd75 Binary files /dev/null and b/samples/old_index2/test_4dn.pairs.gz differ diff --git a/samples/old_index2/test_4dn.pairs.gz.px2 b/samples/old_index2/test_4dn.pairs.gz.px2 new file mode 100644 index 00000000..02c35986 Binary files /dev/null and b/samples/old_index2/test_4dn.pairs.gz.px2 differ diff --git a/samples/test_4dn.pairs.gz.px2 b/samples/test_4dn.pairs.gz.px2 index 02c35986..0517b004 100644 Binary files a/samples/test_4dn.pairs.gz.px2 and b/samples/test_4dn.pairs.gz.px2 differ diff --git a/samples/test_4dn_2.bsorted.pairs.gz.px2 b/samples/test_4dn_2.bsorted.pairs.gz.px2 index da40b5ef..320b93b9 100644 Binary files a/samples/test_4dn_2.bsorted.pairs.gz.px2 and b/samples/test_4dn_2.bsorted.pairs.gz.px2 differ diff --git a/samples/test_juicer_shortform.bsorted.pairs.gz.px2 b/samples/test_juicer_shortform.bsorted.pairs.gz.px2 index f621ced1..5dabff0d 100644 Binary files a/samples/test_juicer_shortform.bsorted.pairs.gz.px2 and b/samples/test_juicer_shortform.bsorted.pairs.gz.px2 differ diff --git a/samples/test_merged_nodups.bsorted.pairs.gz.px2 b/samples/test_merged_nodups.bsorted.pairs.gz.px2 index 20475c6e..54393567 100644 Binary files a/samples/test_merged_nodups.bsorted.pairs.gz.px2 and b/samples/test_merged_nodups.bsorted.pairs.gz.px2 differ diff --git a/samples/test_merged_nodups.txt.bsorted.gz.px2 b/samples/test_merged_nodups.txt.bsorted.gz.px2 index cdac0e31..bf0af794 100644 Binary files a/samples/test_merged_nodups.txt.bsorted.gz.px2 and b/samples/test_merged_nodups.txt.bsorted.gz.px2 differ diff --git a/samples/test_old_merged_nodups.bsorted.pairs.gz.px2 b/samples/test_old_merged_nodups.bsorted.pairs.gz.px2 index 7eb1619b..61d0db90 100644 Binary files a/samples/test_old_merged_nodups.bsorted.pairs.gz.px2 and b/samples/test_old_merged_nodups.bsorted.pairs.gz.px2 differ diff --git a/samples/test_old_merged_nodups.txt.bsorted.gz.px2 b/samples/test_old_merged_nodups.txt.bsorted.gz.px2 index 3c820f3f..cfa27a2c 100644 Binary files a/samples/test_old_merged_nodups.txt.bsorted.gz.px2 and b/samples/test_old_merged_nodups.txt.bsorted.gz.px2 differ diff --git a/src/index.c b/src/index.c index be6a8d77..c185132c 100644 --- a/src/index.c +++ b/src/index.c @@ -26,7 +26,8 @@ int TAD_LIDX_SHIFT = TAD_LIDX_SHIFT_LARGE_CHR; int MAX_CHR = MAX_CHR_LARGE_CHR; -#define MAGIC_NUMBER "PX2.003\1" +#define MAGIC_NUMBER "PX2.004\1" +#define OLD_MAGIC_NUMBER2 "PX2.003\1" // magic number for older version of pairix (0.3.4 - 0.3.5) #define OLD_MAGIC_NUMBER "PX2.002\1" // magic number for older version of pairix (up to 0.3.3) @@ -56,7 +57,7 @@ struct __ti_index_t { khash_t(s) *tname; khash_t(i) **index; ti_lidx_t *index2; - int linecount; + uint64_t linecount; }; struct __ti_iter_t { @@ -490,9 +491,9 @@ void ti_index_save(const ti_index_t *idx, BGZF *fp) bgzf_write(fp, bam_swap_endian_4p(&x), 4); } else bgzf_write(fp, &idx->n, 4); if (ti_is_be) { - uint32_t x = idx->linecount; - bgzf_write(fp, bam_swap_endian_4p(&x), 4); - } else bgzf_write(fp, &idx->linecount, 4); + uint64_t x = idx->linecount; + bgzf_write(fp, bam_swap_endian_8p(&x), 8); + } else bgzf_write(fp, &idx->linecount, 8); assert(sizeof(ti_conf_t) == 40); if (ti_is_be) { // write ti_conf_t; uint32_t x[6]; @@ -574,19 +575,28 @@ static ti_index_t *ti_index_load_core(BGZF *fp) } bgzf_read(fp, magic, 8); if (strncmp(magic, MAGIC_NUMBER, 8)) { - if (strncmp(magic, OLD_MAGIC_NUMBER, 8)) { - fprintf(stderr, "[ti_index_load] wrong magic number. Re-index if your index file was created by an earlier version of pairix.\n"); - return 0; - } else { + if (strncmp(magic, OLD_MAGIC_NUMBER, 8)==0) { TAD_LIDX_SHIFT = TAD_LIDX_SHIFT_ORIGINAL; MAX_CHR = MAX_CHR_ORIGINAL; } + else if(strncmp(magic, OLD_MAGIC_NUMBER2, 8)==0) { + } + else { + fprintf(stderr, "[ti_index_load] wrong magic number. Re-index if your index file was created by an earlier version of pairix.\n"); + return 0; + } } idx = (ti_index_t*)calloc(1, sizeof(ti_index_t)); bgzf_read(fp, &idx->n, 4); if (ti_is_be) bam_swap_endian_4p(&idx->n); - bgzf_read(fp, &idx->linecount, 4); - if (ti_is_be) bam_swap_endian_4p(&idx->linecount); + if(strncmp(magic, MAGIC_NUMBER, 8)==0) { + bgzf_read(fp, &idx->linecount, 8); + if (ti_is_be) bam_swap_endian_8p(&idx->linecount); + } + else if(strncmp(magic, OLD_MAGIC_NUMBER2, 8)==0 || strncmp(magic, OLD_MAGIC_NUMBER, 8)==0) { + bgzf_read(fp, &idx->linecount, 4); + if (ti_is_be) bam_swap_endian_4p(&idx->linecount); + } idx->tname = kh_init(s); idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*)); idx->index2 = (ti_lidx_t*)calloc(idx->n, sizeof(ti_lidx_t)); @@ -845,9 +855,10 @@ int ti_parse_region(const ti_index_t *idx, const char *str, int *tid, int *begin // if 1d, begin2 and end2 will have value -1. // query string error: -1 // region_split_character not matching error: -2 +// memory allocation error: -3 int ti_parse_region2d(const ti_index_t *idx, const char *str, int *tid, int *begin, int *end, int *begin2, int *end2) { - char *s, *p, *sname; + char *s, *p, *sname, *tmp_s; int i, l, k, h; int coord1s, coord1e, coord2s, coord2e, pos1s, pos2s; char region_split_character = ti_get_region_split_character(idx); @@ -872,7 +883,9 @@ int ti_parse_region2d(const ti_index_t *idx, const char *str, int *tid, int *beg free(s); return (res); } if(i == k && dim == 2) { //1d query on 2d data : interprete query 'x' as 'x|x' - s = (char*)realloc(s, k*2+2); + tmp_s = (char*)realloc(s, k*2+2); + if(tmp_s) s = tmp_s; + else return(-3); // memory alloc error strcpy(s+i+1, s); s[i] = region_split_character; k = k*2+1; @@ -975,7 +988,7 @@ ti_iter_t ti_iter_first() } -int get_linecount(const ti_index_t *idx) +uint64_t get_linecount(const ti_index_t *idx) { return(idx->linecount); } diff --git a/src/main.c b/src/main.c index 7be3be60..45214188 100644 --- a/src/main.c +++ b/src/main.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "bgzf.h" #include "pairix.h" #include "knetfile.h" @@ -220,8 +221,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "[main] fail to load the index file.\n"); return 1; } - int linecount = get_linecount(idx); - printf("%d\n", linecount); + printf("%"PRIu64"\n", get_linecount(idx)); ti_index_destroy(idx); return 0; } diff --git a/src/pairix.h b/src/pairix.h index 14552b1d..0526b16b 100644 --- a/src/pairix.h +++ b/src/pairix.h @@ -28,7 +28,7 @@ #ifndef __TABIDX_H #define __TABIDX_H -#define PACKAGE_VERSION "0.3.5" +#define PACKAGE_VERSION "0.3.6" #include #include "kstring.h" @@ -151,7 +151,7 @@ extern "C" { const char **ti_seqname(const ti_index_t *idx, int *n); /* get linecount */ - int get_linecount(const ti_index_t *idx); + uint64_t get_linecount(const ti_index_t *idx); /* get file offset * returns number of bgzf blocks spanning a sequence (pair) */ diff --git a/test/test.py b/test/test.py index 01021f08..11e81f13 100755 --- a/test/test.py +++ b/test/test.py @@ -253,10 +253,10 @@ def test_querys_2_bad_order(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # trigger a warning - it = self.pr.querys2D(query) + self.pr.querys2D(query) # verify some things about the warning - assert len(w) == 1 - assert issubclass(w[-1].category, pypairix.PairixWarning) + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[-1].category, pypairix.PairixWarning)) def test_build_index_with_force_merged_nodups_tab(self): ## recognizing custom set pypairix.build_index(TEST_FILE_2D, sc=2, bc=3, ec=3, sc2=6, bc2=7, ec2=7, force=1) @@ -300,10 +300,10 @@ def test_query2_rev_fail(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # trigger a warning - it = self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) # verify some things about the warning - assert len(w) == 1 - assert issubclass(w[-1].category, pypairix.PairixWarning) + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[-1].category, pypairix.PairixWarning)) ## 2D query on 2D indexed file with chromosomes using a 4DN pairs file diff --git a/test/test_c.sh b/test/test_c.sh index 937177db..3eff7c1b 100755 --- a/test/test_c.sh +++ b/test/test_c.sh @@ -1,3 +1,5 @@ +#!/bin/bash + PATH=./bin:$PATH if [ $VALGRIND_TEST_ON -eq 1 ]; then diff --git a/test/test_c_oldindex.sh b/test/test_c_oldindex.sh index 3121f292..56dea09e 100755 --- a/test/test_c_oldindex.sh +++ b/test/test_c_oldindex.sh @@ -1,3 +1,5 @@ +#!/bin/bash + PATH=./bin:$PATH if [ $VALGRIND_TEST_ON -eq 1 ]; then diff --git a/test/test_c_oldindex2.sh b/test/test_c_oldindex2.sh new file mode 100755 index 00000000..4b0f550d --- /dev/null +++ b/test/test_c_oldindex2.sh @@ -0,0 +1,172 @@ +#!/bin/bash + +PATH=./bin:$PATH + +if [ $VALGRIND_TEST_ON -eq 1 ]; then + VALGRIND="valgrind --error-exitcode=42 --leak-check=full" +else + VALGRIND="" +fi + +## 2D +echo "test 1" +$VALGRIND pairix samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz '10:1-1000000|20' > log1 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$2=="10" && $3>=1 && $3<=1000000 && $6=="20"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 1 failed" + return 1; +fi + +echo "test 1b" +$VALGRIND pairix -a samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz '10:1-1000000|20' > log1 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$2=="10" && $3>=1 && $3<=1000000 && $6=="20"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 1b failed" + return 1; +fi + +echo "test 1c" +$VALGRIND pairix -a samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz '20|10:1-1000000' > log1 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$2=="10" && $3>=1 && $3<=1000000 && $6=="20"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 1c failed" + return 1; +fi + +echo "test 1d" +$VALGRIND pairix samples/old_index2/test_4dn.pairs.gz 'chr22:50000000-60000000' > log1 +$VALGRIND pairix samples/old_index2/test_4dn.pairs.gz 'chr22:50000000-60000000|chr22:50000000-60000000' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 1d failed" + return 1; +fi + +echo "test 1e" +$VALGRIND pairix samples/old_index2/test_4dn.pairs.gz 'chrY:1-2000000' > log1 +$VALGRIND pairix samples/old_index2/test_4dn.pairs.gz 'chrY:1-2000000|chrY:1-2000000' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 1e failed" + return 1; +fi + +echo "test 2" +$VALGRIND pairix samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz '10:1-1000000|20:50000000-60000000' > log1 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$2=="10" && $3>=1 && $3<=1000000 && $6=="20" && $7>=50000000 && $7<=60000000' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 2 failed" + return 1; +fi + +echo "test 3" +$VALGRIND pairix samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz '1:1-10000000|20:50000000-60000000' '3:5000000-9000000|X:70000000-90000000' > log1 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$2=="1" && $3>=1 && $3<=10000000 && $6=="20" && $7>=50000000 && $7<=60000000' > log2 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$2=="3" && $3>=5000000 && $3<=9000000 && $6=="X" && $7>=70000000 && $7<=90000000' >> log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 3 failed" + return 1; +fi + +echo "test 4" +$VALGRIND pairix samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz '*|1:0-100000' > log1 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$6=="1" && $7>=0 && $7<=100000' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 4 failed" + return 1; +fi + +echo "test 5" +$VALGRIND pairix samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz '1:0-100000|*' > log1 +gunzip -c samples/old_index2/merged_nodup.tab.chrblock_sorted.txt.gz | awk '$2=="1" && $3>=0 && $3<=100000' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 5 failed" + return 1; +fi + + +## 1D +echo "test 6" +$VALGRIND pairix samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz chr10:1-4000000 > log1 +gunzip -c samples/old_index2/SRR1171591.variants.snp.vqsr.p.vcf.gz | awk '$1=="chr10" && $2>=1 && $2<=4000000' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 6 failed" + return 1; +fi + + +## 2D, space-delimited +echo "test 7" +$VALGRIND pairix samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz '10:1-1000000|20' > log1 +gunzip -c samples/old_index2/merged_nodups.space.chrblock_sorted.subsample1.txt.gz | awk '$2=="10" && $3>=1 && $3<=1000000 && $6=="20"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 7 failed" + return 1; +fi + + +## preset for pairs.gz +echo "test 8" +$VALGRIND pairix samples/old_index2/test_4dn.pairs.gz 'chr10|chr20' > log1 +gunzip -c samples/old_index2/test_4dn.pairs.gz | awk '$2=="chr10" && $4=="chr20"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test 8 failed" + return 1; +fi + +## linecount +echo "test linecount" +$VALGRIND pairix -n samples/old_index2/test_4dn.pairs.gz > log1 +gunzip -c samples/old_index2/test_4dn.pairs.gz |wc -l | sed "s/ //g" > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "linecount test failed" + return 1; +fi + + +## bgzf block count (currently no auto test for the accuracy of the result) +echo "test bgzf block count" +$VALGRIND pairix -B samples/old_index2/test_4dn.pairs.gz + +## check triangle +echo "test check triangle" +$VALGRIND pairix -Y samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz +$VALGRIND pairix -Y samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz +res=$(pairix -Y samples/old_index2/4dn.bsorted.chr21_22_only.nontriangle.pairs.gz) +if [ "$res" != "The file is not a triangle." ]; then + echo "test check triangle failed" + return 1; +fi + +echo "test check triangle #2" +res=$(pairix -Y samples/old_index2/4dn.bsorted.chr21_22_only.pairs.gz) +if [ "$res" != "The file is a triangle." ]; then + echo "test check triangle #2 failed" + return 1; +fi + + +# test large chromosome +echo "test large chr" +$VALGRIND pairix samples/old_index2/mock.largechr.pairs.gz 'chr21:800000000-900000000|chr22' > log1 +gunzip -c samples/old_index2/mock.largechr.pairs.gz | awk '$2=="chr21" && $3>800000000 && $3<900000000 && $4=="chr22"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test large chromosome failed" + return 1; +fi + +# test large chromosome +echo "test large chr2" +$VALGRIND pairix samples/old_index2/mock.largechr.pairs.gz 'chr22:800000000-997027270|chr22' > log1 +gunzip -c samples/old_index2/mock.largechr.pairs.gz | awk '$2=="chr22" && $3>=800000000 && $3<=997027270 && $4=="chr22"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test large chromosome2 failed" + return 1; +fi + +# test large chromosome +echo "test large chr3" +$VALGRIND pairix samples/old_index2/mock.largechr.pairs.gz 'chr22:1073741820-1073741824|chr22' > log1 +gunzip -c samples/old_index2/mock.largechr.pairs.gz | awk '$2=="chr22" && $3>=1073741820 && $3<=1073741824 && $4=="chr22"' > log2 +if [ ! -z "$(diff log1 log2)" ]; then + echo "test large chromosome3 failed" + return 1; +fi diff --git a/test/test_oldindex.py b/test/test_oldindex.py index 81a716f7..572727b9 100755 --- a/test/test_oldindex.py +++ b/test/test_oldindex.py @@ -245,10 +245,10 @@ def test_querys_2_bad_order(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # trigger a warning - it = self.pr.querys2D(query) + self.pr.querys2D(query) # verify some things about the warning - assert len(w) == 1 - assert issubclass(w[-1].category, pypairix.PairixWarning) + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[-1].category, pypairix.PairixWarning)) ## 2D query on 2D indexed file with chromosomes input in reverse order @@ -283,10 +283,10 @@ def test_query2_rev_fail(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # trigger a warning - it = self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) # verify some things about the warning - assert len(w) == 1 - assert issubclass(w[-1].category, pypairix.PairixWarning) + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[-1].category, pypairix.PairixWarning)) ## 2D query on 2D indexed file with chromosomes using a 4DN pairs file diff --git a/test/test_oldindex2.py b/test/test_oldindex2.py new file mode 100755 index 00000000..6cc45edb --- /dev/null +++ b/test/test_oldindex2.py @@ -0,0 +1,493 @@ +#!/usr/bin/env python +""" +test_oldindex.py +Used to run tests on the test files found in /samples/old_index2/ +From root, execute using `python test/test.py` +First, ensure you have fully installed the pypairix package: +`pip install pypairix --user` +OR +`sudo python setup.py install` + +If you're having trouble running this file, try installing +python-dev and zlib1g-dev. + +Note: tests are run to anticipate either juicer-formatted pairs files or 4DN- +formatted pairs files. +The columns (given in form =6 and is_str(fields[2]) and is_str(fields[6]): + is_juicer = True + if is_str(fields[2]) and is_str(fields[4]): + is_4DN = True + if not is_juicer and is_4DN: + return '4DN' + elif is_juicer: + return 'juicer' + return 'undetermined' + + +def is_str(s): + """Helper function to see if a string is an int. Return True if so""" + try: + int(s) + return True + except ValueError: + return False + + +def read_pairs(filename, file_type='undetermined', delimiter='\t'): + """Read a pairs file and return a list of [chrom1, start1, end1, chrom2, start2, end2] items.""" + # handle this a different way? + if file_type == 'undetermined': + return [] + retval = [] + for line in gzip.open(filename): + try: + line = line.decode('utf-8') + except AttributeError: + pass + if line.startswith('#'): + continue + fields = line.rstrip().split(delimiter) + if file_type == 'juicer': + chrom1 = fields[1] + start1 = fields[2] + chrom2 = fields[5] + start2 = fields[6] + elif file_type == '4DN': + chrom1 = fields[1] + start1 = fields[2] + chrom2 = fields[3] + start2 = fields[4] + retval.append([chrom1, start1, start1, chrom2, start2, start2]) + return retval + + +def overlap1(a0, a1, b0, b1): + return int(a0) <= int(b1) and int(a1) >= int(b0) + + +def get_result(regions, chrom, start, end): + retval = [] + for r in regions: + if r[0] == chrom and overlap1(r[1], r[2], start, end): + retval.append(r) + return retval + + +def get_result_2D(regions, chrom, start, end, chrom2, start2, end2): + retval = [] + for reg in regions: + if reg[0] == chrom and overlap1(reg[1], reg[2], start, end) and reg[3] == chrom2 and overlap1(reg[4], reg[5], start2, end2): + retval.append(reg) + return retval + + +def get_result_1D_on_2D(regions, chrom, start, end, chrom2, start2, end2): + retval = [] + for reg in regions: + if reg[0] == chrom and overlap1(reg[2], reg[2], start, end) and reg[3] == chrom2 and overlap1(reg[4], reg[4], start2, end2): + retval.append(reg) + return retval + + +def build_it_result(it, f_type): + """Build results using the pairix iterator based on the filetype""" + if f_type == 'juicer': + pr_result = [[x[1], x[2], x[2], x[5], x[6], x[6]] for x in it] + elif f_type == '4DN': + pr_result = [[x[1], x[2], x[2], x[3], x[4], x[4]] for x in it] + elif f_type == 'undetermined': + pr_result = [] + return pr_result + + +## 1D query on 1D indexed file +class PairixTest(unittest.TestCase): + regions = read_vcf(TEST_FILE_1D) + chrom = 'chr10' + start = 25944 + end = 27000000 + result = get_result(regions, chrom, start, end) + pr = pypairix.open(TEST_FILE_1D) + + def test_query(self): + it = self.pr.query(self.chrom, self.start, self.end) + pr_result = [[x[0], x[1], x[1]] for x in it] + self.assertEqual(self.result, pr_result) + + def test_querys(self): + query = '{}:{}-{}'.format(self.chrom, self.start, self.end) + it = self.pr.querys(query) + pr_result = [[x[0], x[1], x[1]] for x in it] + self.assertEqual(self.result, pr_result) + + +## semi-2D query on 2D indexed file +class PairixTest_2(unittest.TestCase): + f_type = find_pairs_type(TEST_FILE_2D) + regions = read_pairs(TEST_FILE_2D, f_type) + chrom = '10' + start = 25944 + end = 27000000 + chrom2 = '20' + result = get_result_2D(regions, chrom, start, end, chrom2, 0, sys.maxsize) + pr = pypairix.open(TEST_FILE_2D) + + def test_querys(self): + query = '{}:{}-{}|{}'.format(self.chrom, self.start, self.end, self.chrom2) + it = self.pr.querys2D(query) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + +## 2D query on 2D indexed file +class PairixTest2D(unittest.TestCase): + f_type = find_pairs_type(TEST_FILE_2D) + regions = read_pairs(TEST_FILE_2D, f_type) + chrom = '10' + start = 1 + end = 1000000 + chrom2 = '20' + start2 = 50000000 + end2 = 60000000 + result = get_result_2D(regions, chrom, start, end, chrom2, start2, end2) + pr = pypairix.open(TEST_FILE_2D) + + def test_query2(self): + it = self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + def test_querys_2(self): + query = '{}:{}-{}|{}:{}-{}'.format(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + it = self.pr.querys2D(query) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + def test_querys_2_bad_order(self): + # build the query with coordinates in the wrong order + query = '{}:{}-{}|{}:{}-{}'.format(self.chrom, self.end, self.start, self.chrom2, self.start2, self.end2) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + # trigger a warning + self.pr.querys2D(query) + # verify some things about the warning + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[-1].category, pypairix.PairixWarning)) + + +## 2D query on 2D indexed file with chromosomes input in reverse order +class PairixTest2D_reverse(unittest.TestCase): + f_type = find_pairs_type(TEST_FILE_2D) + regions = read_pairs(TEST_FILE_2D, f_type) + chrom2 = '10' + start2 = 1 + end2 = 1000000 + chrom = '20' + start = 50000000 + end = 60000000 + # reverse reversed results to get them in the required order here + result = get_result_2D(regions, chrom2, start2, end2, chrom, start, end) + pr = pypairix.open(TEST_FILE_2D) + + def test_query2_rev(self): + # 1 is included as last argument to test flipping chromosome order + it = self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2, 1) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + def test_querys_2_rev(self): + query = '{}:{}-{}|{}:{}-{}'.format(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + # 1 is included as last argument to test flipping chromosome order + it = self.pr.querys2D(query, 1) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + def test_query2_rev_fail(self): + # do not include 1 to test flipped order of chrs; expect this to hit a PairixWarning + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + # trigger a warning + self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + # verify some things about the warning + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[-1].category, pypairix.PairixWarning)) + + +## 2D query on 2D indexed file with chromosomes using a 4DN pairs file +class PairixTest2D_4DN(unittest.TestCase): + f_type = find_pairs_type(TEST_FILE_2D_4DN) + regions = read_pairs(TEST_FILE_2D_4DN, f_type) + chrom = 'chr21' + start = 1 + end = 48129895 + chrom2 = 'chr22' + start2 = 1 + end2 = 51304566 + # reverse reversed results to get them in the required order here + result = get_result_2D(regions, chrom, start, end, chrom2, start2, end2) + pr = pypairix.open(TEST_FILE_2D_4DN) + + def test_query2_4dn(self): + it = self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + def test_querys_2_4dn(self): + query = '{}:{}-{}|{}:{}-{}'.format(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + it = self.pr.querys2D(query) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + +## 2D query on 2D indexed space-delimited file +class PairixTest2DSpace(unittest.TestCase): + f_type = find_pairs_type(TEST_FILE_2D_SPACE, ' ') + regions = read_pairs(TEST_FILE_2D_SPACE, f_type, ' ') + chrom = '10' + start = 1 + end = 1000000 + chrom2 = '20' + start2 = 50000000 + end2 = 60000000 + result = get_result_2D(regions, chrom, start, end, chrom2, start2, end2) + pr = pypairix.open(TEST_FILE_2D_SPACE) + + def test_query2(self): + it = self.pr.query2D(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + def test_querys_2(self): + query = '{}:{}-{}|{}:{}-{}'.format(self.chrom, self.start, self.end, self.chrom2, self.start2, self.end2) + it = self.pr.querys2D(query) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + +## 1D query on 2D indexed file +class PairixTest_1_on_2(unittest.TestCase): + f_type='4DN' + regions = read_pairs(TEST_FILE_2D_4DN_2, f_type) + chrom = 'chrY' + start = 1 + end = 2000000 + chrom2 = chrom + start2 = start + end2 = end + result = get_result_1D_on_2D(regions, chrom, start, end, chrom2, start2, end2) + pr = pypairix.open(TEST_FILE_2D_4DN_2) + + def test_querys(self): + query = '{}:{}-{}'.format(self.chrom, self.start, self.end) + it = self.pr.querys2D(query) + pr_result = build_it_result(it, self.f_type) + self.assertEqual(self.result, pr_result) + + +class PairixTestBlocknames(unittest.TestCase): + + def test_blocknames(self): + + # block list obtained from get_blocknames() + pr = pypairix.open(TEST_FILE_2D) + retrieved_blocklist = pr.get_blocknames() + retrieved_blocklist.sort() + + # true block list + blocklist=[] + f_type = find_pairs_type(TEST_FILE_2D) + regions = read_pairs(TEST_FILE_2D, f_type) + for a in regions: + blocklist.append(a[0] + '|' + a[3]) + blocklist_uniq = list(set(blocklist)) + blocklist_uniq.sort() + + self.assertEqual(retrieved_blocklist, blocklist_uniq) + + +class PairixTestGetColumnIndex(unittest.TestCase): + + def test_columnindex(self): + pr = pypairix.open(TEST_FILE_2D) + pr2 = pypairix.open(TEST_FILE_2D_4DN) + + self.assertEqual(pr.get_chr1_col(),1) + self.assertEqual(pr.get_chr2_col(),5) + self.assertEqual(pr.get_startpos1_col(),2) + self.assertEqual(pr.get_startpos2_col(),6) + self.assertEqual(pr.get_endpos1_col(),2) + self.assertEqual(pr.get_endpos2_col(),6) + + self.assertEqual(pr2.get_chr1_col(),1) + self.assertEqual(pr2.get_chr2_col(),3) + self.assertEqual(pr2.get_startpos1_col(),2) + self.assertEqual(pr2.get_startpos2_col(),4) + self.assertEqual(pr2.get_endpos1_col(),2) + self.assertEqual(pr2.get_endpos2_col(),4) + + +class PairixTestExists(unittest.TestCase): + + def test_exists(self): + pr = pypairix.open(TEST_FILE_2D_4DN) + self.assertEqual(pr.exists("chr21|chr21"),1) + self.assertEqual(pr.exists("chr21|chr22"),1) + self.assertEqual(pr.exists("chr22|chr22"),1) + self.assertEqual(pr.exists("chr22|chr21"),0) + self.assertEqual(pr.exists("chr1|chr2"),0) + self.assertEqual(pr.exists("chr21"),0) + self.assertEqual(pr.exists("1|2"),0) + + +class PairixTestExists2(unittest.TestCase): + + def test_exists2(self): + pr = pypairix.open(TEST_FILE_2D_4DN) + self.assertEqual(pr.exists2("chr21","chr21"),1) + self.assertEqual(pr.exists2("chr21","chr22"),1) + self.assertEqual(pr.exists2("chr22","chr22"),1) + self.assertEqual(pr.exists2("chr22","chr21"),0) + self.assertEqual(pr.exists2("chr1","chr2"),0) + self.assertEqual(pr.exists2("1","2"),0) + + +class PairixTestBgzfBlockCounts(unittest.TestCase): + + def test_bgzf_block_count(self): + pr = pypairix.open(TEST_FILE_2D_4DN) + self.assertEqual(pr.bgzf_block_count("chr21","chr21"),8) + self.assertEqual(pr.bgzf_block_count("chr21","chr22"),1) + self.assertEqual(pr.bgzf_block_count("chr22","chr22"),12) + self.assertEqual(pr.bgzf_block_count("chr22","chr21"),0) + self.assertEqual(pr.bgzf_block_count("chr21","chrY"),0) + self.assertEqual(pr.bgzf_block_count("chr1","chr2"),0) + self.assertEqual(pr.bgzf_block_count("1","2"),0) + + +class PairixTestGetHeader(unittest.TestCase): + + def tet_get_header(self): + pr = pypairix.open(TEST_FILE_2D_4DN) + self.assertEqual(pr.get_header(), get_header(TEST_FILE_2D_4DN)) + pr = pypairix.open(TEST_FILE_2D_4DN_2) + self.assertEqual(pr.get_header(), get_header(TEST_FILE_2D_4DN_2)) + + +class PairixTestGetChromsize(unittest.TestCase): + + def tet_get_header(self): + pr = pypairix.open(TEST_FILE_2D_4DN) + self.assertEqual(pr.get_chromsize(), get_chromsize(TEST_FILE_2D_4DN)) + pr = pypairix.open(TEST_FILE_2D_4DN_2) + self.assertEqual(pr.get_chromsize(), get_chromsize(TEST_FILE_2D_4DN_2)) + + +class PairixTestGetLineCount(unittest.TestCase): + + def test_linecount(self): + pr= pypairix.open(TEST_FILE_2D_4DN_2) + self.assertEqual(pr.get_linecount(), 60204) + + +class PairixTestCheckTriangle(unittest.TestCase): + + def test_check_triangle(self): + pr= pypairix.open(TEST_FILE_2D_4DN) + self.assertEqual(pr.check_triangle(), 1) + + def test_check_triangle2(self): + pr= pypairix.open(TEST_FILE_2D_4DN_2) + self.assertEqual(pr.check_triangle(), 1) + + def test_check_triangle_false(self): + pr= pypairix.open(TEST_FILE_2D_4DN_NOT_TRIANGLE) + self.assertEqual(pr.check_triangle(), 0) + + +class PairixVersionCheck(unittest.TestCase): + + def test_linecount(self): + # version defined by PACKAGE_VERSION in src/pairix.h + pkg_version = pypairix.__version__ + # setup.py version defined in root VERSION file + py_version = open("VERSION.txt").readlines()[-1].split()[-1].strip("\"'") + self.assertEqual(pkg_version, py_version) + + +if __name__ == '__main__': + unittest.main() diff --git a/util/bam2pairs/bam2pairs b/util/bam2pairs/bam2pairs index e55b4227..407b985c 100755 --- a/util/bam2pairs/bam2pairs +++ b/util/bam2pairs/bam2pairs @@ -5,7 +5,7 @@ my $chrsizefile; &GetOptions( 'l|leftmost' => sub { $pos_is_5end=0 }, 'c|chromsize=s' => \$chrsizefile ); ## This defines ordering between mates -if(@ARGV<1){ &print_usage(); exit(); } +if(@ARGV<2){ &print_usage(); exit(); } my $input = $ARGV[0]; my $prefix = $ARGV[1]; my $outpairs = "$prefix.bsorted.pairs"; diff --git a/util/create_randompairs.pl b/util/create_randompairs.pl new file mode 100644 index 00000000..033c1183 --- /dev/null +++ b/util/create_randompairs.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl + +$nlines = shift @ARGV; +$i=0; +$chr=1; +$chr2=1; +$pos=int(rand(1000)); +$pos2=int(rand(1000)); +$nlines2 = sqrt($nlines); +$MAXCHR=25; +$MAXPOS=536000000; +while($i<$nlines){ + print "lalala\tchr$chr\t$pos\tchr$chr2\t$pos2\t+\t+\n"; + if(rand($nlines/10)<=1 && $chr<$MAXCHR){ + $chr++; + $chr2=$chr; + $pos=int(rand(1000)); + $pos2=int(rand(1000)); + } + if(rand($nlines/50)<=1 && $chr2<$MAXCHR){ + $chr2++; + $pos2=int(rand(1000)); + } + $pos+=int(rand(3)) if $pos<$MAXPOS; + $pos2+=int(rand(3)) if $pos2<$MAXPOS; + $i++; +}