Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev 1 #62

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
## pairs format v1.0
#sorted: chr1-chr2-pos1-pos2
#shape: upper triangle
#columns: readID chr1 pos1 chr2 pos2 strand1 strand2
#command: bam2pairs -5 /Users/soo/data/hic/bam/_1_out.sorted.bam samples/test_4dn_2
#chromsize: chr1 249250621
#chromsize: chr10 135534747
#chromsize: chr11 135006516
#chromsize: chr11_gl000202_random 40103
#chromsize: chr12 133851895
#chromsize: chr13 115169878
#chromsize: chr14 107349540
#chromsize: chr15 102531392
#chromsize: chr16 90354753
#chromsize: chr17 81195210
#chromsize: chr17_ctg5_hap1 1680828
#chromsize: chr17_gl000203_random 37498
#chromsize: chr17_gl000204_random 81310
#chromsize: chr17_gl000205_random 174588
#chromsize: chr17_gl000206_random 41001
#chromsize: chr18 78077248
#chromsize: chr18_gl000207_random 4262
#chromsize: chr19 59128983
#chromsize: chr19_gl000208_random 92689
#chromsize: chr19_gl000209_random 159169
#chromsize: chr1_gl000191_random 106433
#chromsize: chr1_gl000192_random 547496
#chromsize: chr2 243199373
#chromsize: chr20 63025520
#chromsize: chr21 48129895
#chromsize: chr21_gl000210_random 27682
#chromsize: chr22 51304566
#chromsize: chr3 198022430
#chromsize: chr4 191154276
#chromsize: chr4_ctg9_hap1 590426
#chromsize: chr4_gl000193_random 189789
#chromsize: chr4_gl000194_random 191469
#chromsize: chr5 180915260
#chromsize: chr6 171115067
#chromsize: chr6_apd_hap1 4622290
#chromsize: chr6_cox_hap2 4795371
#chromsize: chr6_dbb_hap3 4610396
#chromsize: chr6_mann_hap4 4683263
#chromsize: chr6_mcf_hap5 4833398
#chromsize: chr6_qbl_hap6 4611984
#chromsize: chr6_ssto_hap7 4928567
#chromsize: chr7 159138663
#chromsize: chr7_gl000195_random 182896
#chromsize: chr8 146364022
#chromsize: chr8_gl000196_random 38914
#chromsize: chr8_gl000197_random 37175
#chromsize: chr9 141213431
#chromsize: chr9_gl000198_random 90085
#chromsize: chr9_gl000199_random 169874
#chromsize: chr9_gl000200_random 187035
#chromsize: chr9_gl000201_random 36148
#chromsize: chrM 16571
#chromsize: chrUn_gl000211 166566
#chromsize: chrUn_gl000212 186858
#chromsize: chrUn_gl000213 164239
#chromsize: chrUn_gl000214 137718
#chromsize: chrUn_gl000215 172545
#chromsize: chrUn_gl000216 172294
#chromsize: chrUn_gl000217 172149
#chromsize: chrUn_gl000218 161147
#chromsize: chrUn_gl000219 179198
#chromsize: chrUn_gl000220 161802
#chromsize: chrUn_gl000221 155397
#chromsize: chrUn_gl000222 186861
#chromsize: chrUn_gl000223 180455
#chromsize: chrUn_gl000224 179693
#chromsize: chrUn_gl000225 211173
#chromsize: chrUn_gl000226 15008
#chromsize: chrUn_gl000227 128374
#chromsize: chrUn_gl000228 129120
#chromsize: chrUn_gl000229 19913
#chromsize: chrUn_gl000230 43691
#chromsize: chrUn_gl000231 27386
#chromsize: chrUn_gl000232 40652
#chromsize: chrUn_gl000233 45941
#chromsize: chrUn_gl000234 40531
#chromsize: chrUn_gl000235 34474
#chromsize: chrUn_gl000236 41934
#chromsize: chrUn_gl000237 45867
#chromsize: chrUn_gl000238 39939
#chromsize: chrUn_gl000239 33824
#chromsize: chrUn_gl000240 41933
#chromsize: chrUn_gl000241 42152
#chromsize: chrUn_gl000242 43523
#chromsize: chrUn_gl000243 43341
#chromsize: chrUn_gl000244 39929
#chromsize: chrUn_gl000245 36651
#chromsize: chrUn_gl000246 38154
#chromsize: chrUn_gl000247 36422
#chromsize: chrUn_gl000248 39786
#chromsize: chrUn_gl000249 38502
#chromsize: chrX 155270560
#chromsize: chrY 59373566
SRR1658581.49762205 chr1 106913 chr1 252815 - +
SRR1658581.42468411 chr1 368525 chr1 11314591 + +
SRR1658581.14061997 chr1 744857 chr1 15742582 - -
SRR1658581.23398543 chr1 1124378 chr1 1124689 + -
SRR1658581.32586461 chr1 1451660 chr1 226738572 + +
SRR1658581.6724634 chr1 1482177 chr1 27883240 - -
SRR1658581.29645549 chr1 1482728 chr1 1586645 + +
SRR1658581.36739926 chr1 1516913 chr1 54777610 + +
SRR1658581.19712598 chr1 1513682 chr2 1893613 - -
SRR1658581.50905637 chr2 748234 chr2 925345 - +
SRR1658581.41602119 chr2 423902 chr3 38346554 + -
SRR1658581.24980574 chr3 235258 chr4 464852 + +
Binary file added samples/tiny_unsorted.pairs.gz
Binary file not shown.
110 changes: 110 additions & 0 deletions test/files/output_sort_tiny_unsorted.pairs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
## pairs format v1.0
#sorted: chr1-chr2-pos1-pos2
#shape: upper triangle
#columns: readID chr1 pos1 chr2 pos2 strand1 strand2
#command: bam2pairs -5 /Users/soo/data/hic/bam/_1_out.sorted.bam samples/test_4dn_2
#chromsize: chr1 249250621
#chromsize: chr10 135534747
#chromsize: chr11 135006516
#chromsize: chr11_gl000202_random 40103
#chromsize: chr12 133851895
#chromsize: chr13 115169878
#chromsize: chr14 107349540
#chromsize: chr15 102531392
#chromsize: chr16 90354753
#chromsize: chr17 81195210
#chromsize: chr17_ctg5_hap1 1680828
#chromsize: chr17_gl000203_random 37498
#chromsize: chr17_gl000204_random 81310
#chromsize: chr17_gl000205_random 174588
#chromsize: chr17_gl000206_random 41001
#chromsize: chr18 78077248
#chromsize: chr18_gl000207_random 4262
#chromsize: chr19 59128983
#chromsize: chr19_gl000208_random 92689
#chromsize: chr19_gl000209_random 159169
#chromsize: chr1_gl000191_random 106433
#chromsize: chr1_gl000192_random 547496
#chromsize: chr2 243199373
#chromsize: chr20 63025520
#chromsize: chr21 48129895
#chromsize: chr21_gl000210_random 27682
#chromsize: chr22 51304566
#chromsize: chr3 198022430
#chromsize: chr4 191154276
#chromsize: chr4_ctg9_hap1 590426
#chromsize: chr4_gl000193_random 189789
#chromsize: chr4_gl000194_random 191469
#chromsize: chr5 180915260
#chromsize: chr6 171115067
#chromsize: chr6_apd_hap1 4622290
#chromsize: chr6_cox_hap2 4795371
#chromsize: chr6_dbb_hap3 4610396
#chromsize: chr6_mann_hap4 4683263
#chromsize: chr6_mcf_hap5 4833398
#chromsize: chr6_qbl_hap6 4611984
#chromsize: chr6_ssto_hap7 4928567
#chromsize: chr7 159138663
#chromsize: chr7_gl000195_random 182896
#chromsize: chr8 146364022
#chromsize: chr8_gl000196_random 38914
#chromsize: chr8_gl000197_random 37175
#chromsize: chr9 141213431
#chromsize: chr9_gl000198_random 90085
#chromsize: chr9_gl000199_random 169874
#chromsize: chr9_gl000200_random 187035
#chromsize: chr9_gl000201_random 36148
#chromsize: chrM 16571
#chromsize: chrUn_gl000211 166566
#chromsize: chrUn_gl000212 186858
#chromsize: chrUn_gl000213 164239
#chromsize: chrUn_gl000214 137718
#chromsize: chrUn_gl000215 172545
#chromsize: chrUn_gl000216 172294
#chromsize: chrUn_gl000217 172149
#chromsize: chrUn_gl000218 161147
#chromsize: chrUn_gl000219 179198
#chromsize: chrUn_gl000220 161802
#chromsize: chrUn_gl000221 155397
#chromsize: chrUn_gl000222 186861
#chromsize: chrUn_gl000223 180455
#chromsize: chrUn_gl000224 179693
#chromsize: chrUn_gl000225 211173
#chromsize: chrUn_gl000226 15008
#chromsize: chrUn_gl000227 128374
#chromsize: chrUn_gl000228 129120
#chromsize: chrUn_gl000229 19913
#chromsize: chrUn_gl000230 43691
#chromsize: chrUn_gl000231 27386
#chromsize: chrUn_gl000232 40652
#chromsize: chrUn_gl000233 45941
#chromsize: chrUn_gl000234 40531
#chromsize: chrUn_gl000235 34474
#chromsize: chrUn_gl000236 41934
#chromsize: chrUn_gl000237 45867
#chromsize: chrUn_gl000238 39939
#chromsize: chrUn_gl000239 33824
#chromsize: chrUn_gl000240 41933
#chromsize: chrUn_gl000241 42152
#chromsize: chrUn_gl000242 43523
#chromsize: chrUn_gl000243 43341
#chromsize: chrUn_gl000244 39929
#chromsize: chrUn_gl000245 36651
#chromsize: chrUn_gl000246 38154
#chromsize: chrUn_gl000247 36422
#chromsize: chrUn_gl000248 39786
#chromsize: chrUn_gl000249 38502
#chromsize: chrX 155270560
#chromsize: chrY 59373566
SRR1658581.49762205 chr1 106913 chr1 252815 - +
SRR1658581.42468411 chr1 368525 chr1 11314591 + +
SRR1658581.14061997 chr1 744857 chr1 15742582 - -
SRR1658581.23398543 chr1 1124378 chr1 1124689 + -
SRR1658581.32586461 chr1 1451660 chr1 226738572 + +
SRR1658581.6724634 chr1 1482177 chr1 27883240 - -
SRR1658581.29645549 chr1 1482728 chr1 1586645 + +
SRR1658581.36739926 chr1 1516913 chr1 54777610 + +
SRR1658581.19712598 chr1 1513682 chr2 1893613 - -
SRR1658581.50905637 chr2 748234 chr2 925345 - +
SRR1658581.41602119 chr2 423902 chr3 38346554 + -
SRR1658581.24980574 chr3 235258 chr4 464852 + +
11 changes: 11 additions & 0 deletions test/test_c.sh
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,14 @@ if [ ! -z "$(diff out.1d.pairs out2.1d.pairs)" ]; then
return 1;
fi
rm -f out.1d.pairs out2.1d.pairs


## sort-pairs test
echo "test sort-pairs"
. util/sort-pairs.sh samples/tiny_unsorted.pairs.gz out
if [ ! -z "$(gunzip -c out.pairs.gz | diff - test/files/output_sort_tiny_unsorted.pairs)"]; then
echo "test sort-pairs failed"
return 1;
fi
rm -f out.pairs.gz*

15 changes: 15 additions & 0 deletions util/sort-pairs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
f=$1 # input unsorted pairs file (gzipped)
outprefix=$2

# header
gunzip -c $f | grep "^#" > $outprefix.pairs

# sorting
gunzip -c $f | grep -v '^#' | sort -k2,2 -k4,4 -k3,3g -k5,5g >> $outprefix.pairs

# compressing
bgzip -f $outprefix.pairs

# indexing
pairix -f $outprefix.pairs.gz