-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcmpr_file_formats.py
87 lines (82 loc) · 3.04 KB
/
cmpr_file_formats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import pyarrow as pa
import pyarrow.parquet as pq
import pandas
import numpy
from davitpy import pydarn
import datetime
# import the csv creation module
import os
import sys
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
sys.path.append(module_path)
import dmap_to_csv
import generate_parquet_files
import generate_hdf5_files
class CreateFiles(object):
"""
In this class we'll generate different
file formats such as Parquet, HDF5 and csv
to from fitacf data.
"""
def __init__(self, inpTime, inpRad):
"""
Initialize date, rad and other variables used.
NOTE : we'll create 1 file/day
"""
self.startTime = inpTime
self.inpRad = inpRad
self.endTime = self.startTime + datetime.timedelta(days=1)
def create_csv_fitacf_files(self, csvOutDir, fitOutDir):
"""
Generate csv files from fitacf data
using Muhammad's code.
"""
fOut = dmap_to_csv.main(sTime, inpRad)
# we'll need to compare the sizes
# of different file formats; have them
# in one location.
# move the csv file
csvName = fOut.split("/")[-1]
os.rename(fOut, csvOutDir + csvName)
# move the actual fitacf file
fitFileName = ".".join( csvName.split(".")[:-1] )
fitFilePath = "/".join( csvOutDir.split("/")[:-1] )
os.rename(fitFilePath + "/" + fitFileName, fitOutDir + fitFileName)
def create_parquet_files(self, pqOutDir, \
compression='brotli',version="2.0"):
"""
Generate Parquet files from fitacf data
"""
pqObj = generate_parquet_files.ParquetConverter(self.startTime,\
self.endTime, self.inpRad)
fData = pqObj.get_dmap_dicts()
paTab = pqObj.json_to_pyarrow_table(fData)
outParquetFile = pqOutDir + self.startTime.strftime("%Y%m%d") +\
self.inpRad + ".parquet"
pqObj.create_parquet_file(paTab, outParquetFile,\
compression=compression,version=version)
def create_hdf5_files(self, hdf5OutDir):
"""
Generate hdf5 files from fitacf data
"""
hdf5Obj = generate_hdf5_files.HDF5Converter(self.startTime,\
self.endTime, self.inpRad)
fData = hdf5Obj.get_dmap_dicts()
outFile = hdf5OutDir + self.startTime.strftime("%Y%m%d") +\
"."+self.inpRad + ".hdf5"
hdf5Obj.create_hdf5_file(fData, outFile)
if __name__ == "__main__":
sDate = datetime.datetime(2012,6,1)
eDate = datetime.datetime(2012,7,1)
selRadList = ["fhe"]
pqOutDir = "/home/bharat/Documents/data/fit_cmpr_formats/pq/"
while sDate <= eDate:
for sr in selRadList:
print "curr date--->", sDate
print sDate, sr
cfo = CreateFiles(sDate, sr)
cfo.create_parquet_files(pqOutDir)
sDate += datetime.timedelta(days=1)
print " ******* Created Parquet File *******"
# cfo = CreateFiles(currDate, selRad)