-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmeth_hdf5_part2.jl
70 lines (51 loc) · 1.52 KB
/
meth_hdf5_part2.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
const IN_DIR = "bstmp/" # do not omit trailing slash
const OUT_FILE = "foo.hdf5"
using HDF5
function main()
hf = h5open( OUT_FILE, "w" )
# Write list of cell names
open( IN_DIR * "cells.lst" ) do f
hf["cells"] = readlines(f)
end
# Create groups
grp_val = create_group( hf, "values" )
grp_idx = create_group( hf, "indices" )
# Read chromosome lengths
chroms = []
open( IN_DIR * "chromosomes.csv" ) do f
prev_pos = 0
while !eof(f)
fields = split( readline(f), "," )
pos = parse( Int, fields[2] )
push!( chroms, ( fields[1], pos - prev_pos ) )
prev_pos = pos
end
end
# Write indices
for (chrom, len) in chroms
ds_chrom = create_dataset( grp_idx, chrom, datatype(UInt32), (len,2) )
open( IN_DIR * "values.bin" ) do f
for i in 1:len
ds_chrom[ i, 1 ] = read( f, UInt32 )
ds_chrom[ i, 2 ] = read( f, UInt32 )
end
end
println( "Index for chromosome ", chrom, " written." )
end
# Write values
num_vals = Int( stat( IN_DIR * "values.bin" ).size / 4 )
ds_cell = create_dataset( grp_val, "cell", datatype(UInt16), (num_vals,) )
ds_umeth = create_dataset( grp_val, "count_unmeth", datatype(UInt8), (num_vals,) )
ds_meth = create_dataset( grp_val, "count_meth", datatype(UInt8), (num_vals,) )
open( IN_DIR * "values.bin" ) do f
for i in 1:num_vals
ds_cell[i] = read( f, UInt16 )
ds_umeth[i] = read( f, UInt8 )
ds_meth[i] = read( f, UInt8 )
end
end
# The abvove loop is slow; should be changed to work with chunks
close( hf )
println( "All values written." )
end
main()