-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathh5recompress2.py
122 lines (118 loc) · 4.05 KB
/
h5recompress2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import sys
import numpy
import h5py
import hdf5plugin
import shutil
import time
import os
import subprocess
xbasename=sys.argv[1] # the basename without the h5 extension
oldname=xbasename+".h5"
compression=sys.argv[2] # the new compression
if len(sys.argv) > 3:
clevel=sys.argv[3]
else:
clevel="2"
if len(sys.argv) > 4:
firstimage=int(sys.argv[4])
else:
firstimage=0
if len(sys.argv) > 5:
numimages=int(sys.argv[5])
else:
numimages=0
if compression.lower()=="zstd":
compression="zstd"
elif compression.lower()=="bszstd":
compression="bszstd"
elif compression.lower()=="lz4":
compression="lz4"
elif compression.lower()=="bslz4":
compression="bslz4"
elif compression.lower()=="blosclz4":
compression="blosclz4"
elif compression.lower()=="bloscbslz4":
compression="bloscbslz4"
else:
print(" invalid compression choice: "+compression)
exit(1)
newname=os.path.join("/dev/shm",xbasename+"_"+compression+"_"+clevel+"_"+str(firstimage)+"_"+str(numimages)+".h5")
repackname=os.path.join("/dev/shm",xbasename+"_"+compression+"_"+clevel+"_"+str(firstimage)+"_"+str(numimages)+"_repack.h5")
print(" converting '"+oldname+"' to '"+newname+"'")
shutil.copyfile(oldname, newname)
with h5py.File(newname, 'r+') as hf: ## open in read/write mode
try:
myentry = hf['entry']
except:
print(" failed to find to level entry group ")
hf.close()
exit(1)
try:
mydatagroup = myentry["data"]
except:
print(" failed to find to entry/data group ")
hf.close()
exit(1)
try:
mydatadataset = mydatagroup["data"]
mydatasatesetattrs = mydatagroup["data"].attrs
print(mydatadataset)
mydatadataset_shape=mydatadataset.shape
mydatadataset_shape_0=mydatadataset.shape[0]
if numimages=="0":
numimages=mydatadataset_shape_0
mydatadataset_shape_1=mydatadataset.shape[1]
mydatadataset_shape_2=mydatadataset.shape[2]
print("mydatadataset.shape: ", mydatadataset_shape)
print("mydata images: ", numimages)
print("mydatadataset.shape_1: ", mydatadataset_shape_1)
print("mydatadataset.shape_2: ", mydatadataset_shape_2)
mydata = numpy.copy(mydatadataset[firstimage:firstimage+numimages])
del mydatagroup["data"] ## deleting dataset!
print("mydata.shape: ",mydata.shape)
t1=time.process_time()
### write with hdf5plugin
if compression=="zstd":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='zstd', clevel=clevel, shuffle=hdf5plugin.Blosc.NOSHUFFLE))
elif compression=="bszstd":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='zstd', clevel=clevel, shuffle=hdf5plugin.Blosc.SHUFFLE))
elif compression=="lz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.LZ4(nbytes=0))
elif compression=="bslz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Bitshuffle(nelems=0, lz4=True))
elif compression=="blosclz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4', clevel=clevel, shuffle=hdf5plugin.Blosc.NOSHUFFLE))
elif compression=="bloscbslz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4', clevel=clevel, shuffle=hdf5plugin.Blosc.SHUFFLE))
else:
print(" unrecognized compression on write ")
hf.close()
exit(1)
t2=time.process_time()
print( "recompression time: ", t2-t1)
except:
print(" failed to get the data dataset")
hf.close()
os.system("/nsls2/users/hbernstein/bin/h5repack "+newname+" "+repackname)
os.system("rm "+newname)
os.system("/nsls2/users/hbernstein/bin/h5dump -pH "+repackname+"|grep ' SIZE'")
os.system("ls -alt "+repackname)
os.system("rm "+repackname)