-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrb_processing.py
153 lines (119 loc) · 4.45 KB
/
grb_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""Process .grb files and extract data in table format
"""
import numpy as np
import pygrib
import pandas as pd
import grb_utils as utils
class GrbData(object):
"""
"""
def __init__(self, grb_file, lat_range=None, lon_range=None,
site=None, var_names=None, var_names_short=None):
self.fn = grb_file
self.site = site
if self.site is not None:
self.lat_range = utils.region_box[self.site]['lat']
self.lon_range = utils.region_box[self.site]['lon']
else:
self.lat_range = lat_range
self.lon_range = lon_range
self.site = site
self.grb = pygrib.open(grb_file)
# Analysis date
self.anal_date = self.grb.read(1)[0].analDate
self.reset_grb()
# Long variable names
if var_names is None:
self.var_names = utils.var_names
else:
self.var_names = var_names
# Variable dictionary keys
if var_names_short is None:
self.var_names_short = utils.var_names_short
else:
self.var_names_short = var_names_short
# Variable dictionary
# Match each variable to length of
self.vardict = {}
for (v, vshort) in zip(self.var_names, self.var_names_short):
temp_grb = self.grb.select(name=v)[0]
lat, lon = temp_grb.latlons()
lat_region, lon_region = _slice_latlon(
lat, lon, self.lat_range, self.lon_range)
colnames = _var_col_names_latlons(
vshort, lat_region.flatten(), lon_region.flatten())
self.vardict[vshort] = {
"name": v,
"df": pd.DataFrame(columns=colnames),
"index": list()
}
def format(self):
"""
Ignore the target mask, output variables within lat/lon range
using the mask within each grib. Use lat/lon coords for column
names; these are now identifiable by position.
"""
self.reset_grb()
# Iterate through variables in grb file
for gm in self.grb:
# Get full name and short name
# short name is the variable dict key
name = gm.name
sname = gm.shortName
# Read if variable is in vardict keys
if sname in self.vardict.keys():
# Get lat, lon
lat, lon = gm.latlons()
# Slice region
lat, lon = _slice_latlon(
lat, lon, self.lat_range, self.lon_range)
# Get variable data
var_data = _region_data(gm, self.lat_range, self.lon_range)
dt = gm.analDate
# Insert in variable dictionary dataframe
df_len = len(self.vardict[sname]["df"])
df = self.vardict[sname]["df"]
df.loc[df_len] = var_data.flatten()
self.vardict[sname]["index"].append(dt)
df.index = self.vardict[sname]["index"]
def reset_grb(self):
"""
Reset the pygrib iterator
"""
self.grb.seek(0)
def create_df(self):
"""
Merge dataframes in self.vardict
"""
df_list = [self.vardict[v]["df"] for v in self.var_names_short]
self.df = pd.concat(df_list, axis=1, sort=False)
def _slice_latlon(lat, lon, lat_range, lon_range):
"""
Provide grid of lat/lon and extract values within
lat/lon range
"""
mask = (lat >= lat_range[0]) & (lat <= lat_range[1]) & \
(lon >= lon_range[0]) & (lon <= lon_range[1])
dim1 = np.any(mask, axis=1).sum()
dim2 = np.any(mask, axis=0).sum()
lat_extract = lat[mask].reshape(dim1, dim2)
lon_extract = lon[mask].reshape(dim1, dim2)
return lat_extract, lon_extract
def _region_data(gribmessage, lat_range, lon_range):
"""
Extract data from pygrib gribmessage object
for lat/lon region
"""
return gribmessage.data(lat1=lat_range[0], lat2=lat_range[1],
lon1=lon_range[0], lon2=lon_range[1])[0]
def _var_col_names_latlons(var_name, in_lat, in_lon):
cnames = []
for lat, lon in zip(in_lat, in_lon):
latstr = "n" if lat < 0 else "p"
lonstr = "n" if lon < 0 else "p"
cstr = "{}_lat_{}{}_lon_{}{}".format(
var_name, latstr, lat, lonstr, lon)
cstr = cstr.replace("-", "")
cstr = cstr.replace(".", "_")
cnames.append(cstr)
return cnames