-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathGC-Net-level-1-data-processing.py
230 lines (199 loc) · 9.42 KB
/
GC-Net-level-1-data-processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 20:00:14 2020
tip list:
for plots in spyder command prompte
%matplotlib inline
for plots in a new window
%matplotlib qt
@author: bav
"""
import os, sys
import PROMICE_toolbox as ptb
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import os.path
import numpy as np
import jaws_tools
import tocgen
from os import path
import nead
class CustomError(Exception):
pass
if not hasattr(nead, 'build_header_obj'):
raise CustomError('Please install the latest version of pyNEAD. \nVisit https://github.com/GEUS-Glaciology-and-Climate/pyNEAD/')
try:
os.mkdir("figures")
os.mkdir("figures/L1_data_treatment")
os.mkdir("out")
except:
print("figures and output folders already exist")
f = open("out/Report.md", "w")
def Msg(txt):
f = open("out/Report.md", "a")
print(txt)
f.write(txt + "\n")
path_to_L0 = "L0M/"
site_list = pd.read_csv("L1/GC-Net_location.csv", header=0, skipinitialspace=True)
# print(site_list)
# uncomment for use at specific sites
# All station names: 'Swiss Camp 10m', 'Swiss Camp', 'Crawford Point 1', 'NASA-U',
# 'GITS', 'Humboldt', 'Summit', 'Tunu-N', 'DYE-2', 'JAR1', 'Saddle',
# 'South Dome', 'NASA-E', 'CP2', 'NGRIP', 'NASA-SE', 'KAR', 'JAR2',
# 'KULU', 'Petermann ELA', 'NEEM', 'EastGRIP'
# site_list = site_list.loc[site_list.Name.values == 'Saddle',:]
for site, ID in zip(site_list.Name, site_list.ID):
plt.close("all")
Msg("# " + str(ID) + " " + site)
filename = path_to_L0 + str(ID).zfill(2) + "-" + site + ".csv"
if not path.exists(filename):
Msg("Warning: No file for station " + str(ID) + " " + site)
continue
ds = nead.read(filename)
df = ds.to_dataframe()
df = df.reset_index(drop=True)
df.timestamp = pd.to_datetime(df.timestamp, utc=True)
df = df.set_index("timestamp")
# removing unneeded fields
col_drop = [f for f in df.columns if '_min' in f] + \
[f for f in df.columns if '_max' in f] + \
[f for f in df.columns if '_stdev' in f]+ \
[f for f in df.columns if '_std' in f]
df = df.drop(columns=col_drop)
if site == "Swiss Camp 10m":
df["TA2"] = np.nan
df["TA4"] = np.nan
if "HW1" not in df.columns:
df.loc[df.HS1>900,"HS1"] = np.nan
df["HW1"] = 2 + df["HS1"].max() - df["HS1"]
if ("HW2" not in df.columns) & ('HS2' in df.columns):
df.loc[df.HS2>900,"HS2"] = np.nan
df["HW2"] = 3.4 + df["HS2"].max() - df["HS2"]
df = df.resample("H").mean()
Msg("## Manual flagging of data at " + site)
df_out = ptb.flag_data(df,
site,
var_list=['HW1','HW2'],
)
Msg("## Adjusting data at " + site)
# we start by adjusting and filtering all variables except surface height
df_v4 = ptb.adjust_data(df_out, site,
# var_list=['HW1','HW2'],
skip_var=["HS1", "HS2"])
# Applying standard filters again
df_v4 = df_v4.resample("H").asfreq()
df_v5 = ptb.filter_data(df_v4, site)
ptb.plot_flagged_data(df_v5, df_out, site,
# var_list=['HW1','HW2'],
)
df_v5 = ptb.remove_flagged_data(df_v5)
# correction of the net radiometer for windspeed
if 'NR' in df_v5.columns:
df_v5 = ptb.correct_net_rad(df_v5,site)
# interpolating short gaps and calculating added variables
df_v5b = ptb.augment_data(
df_v5,
site_list.loc[site_list.Name == site, "Latitude (°N)"].values[0],
site_list.loc[site_list.Name == site, "Longitude (°E)"].values[0],
site_list.loc[site_list.Name == site, "Elevation (wgs84 m)"].values[0],
site,
)
# plt.figure()
# ax1=plt.subplot(2,1,1)
# df_v5b[['HW1','HW2']].plot(ax=ax1)
# ax2=plt.subplot(2,1,2)
# df_v5b[['HS1','HS2']].plot(ax=ax2)
# ax1.set_title(site)
# print(wtf)
if df_v5b[[v for v in df_v5b.columns if 'TS' in v]].notnull().any().any():
df_v6 = ptb.therm_depth(df_v5b, site)
else:
df_v6= df_v5b.copy()
# removing empty rows:
useful_var_list = [
"ISWR", "OSWR", "NR", "TA1", "TA2", "TA3", "TA4", "RH1" "RH2", "P",
] + ["TS" + str(i) for i in range(1, 11)]
ind_first = df_v6[
[v for v in useful_var_list if v in df_v6.columns]
].first_valid_index()
df_v6 = df_v6.loc[ind_first:, :]
if len(df_v6) > 0:
# get info related to the new fields
(
units,
display_description,
database_fields,
database_fields_data_types,
) = ptb.field_info(df_v6.reset_index().columns)
hourly_attrs = ds.attrs
hourly_attrs['station_name'] = site
hourly_attrs['geometry'] = 'POINTZ (%0.4f %0.4f %0.0f)'%(df_v6.longitude.mean(),
df_v6.latitude.mean(),
df_v6.elevation.mean())
hourly_attrs['reference'] = 'Steffen, K.; Vandecrux, B.; Houtz, D.; Abdalati, W.; Bayou, N.; Box, J.E.; Colgan, W.T.; Espona Pernas, L.; Griessinger, N.; Haas-Artho, D.; Heilig, A.; Hubert, A.; Iosifescu Enescu, I.; Johnson-Amin, N.; Karlsson, N.B.; Kurup Buchholz, R.; McGrath, D.; Cullen, N.J.; Naderpour, R.; Molotch, N.P.; Pedersen, A.Ø.; Perren, B.; Philipps, T.; Plattner, G.-K.; Proksch, M.; Revheim, M.K.; Særrelse, M.; Schneebli, M.; Sampson, K.; Starkweather, S.; Steffen, S.; Stroeve, J.; Watler, B.; Winton, Ø.A.; Zwally, J.; Ahlstrøm, A., 2022, "GC-Net Level 1 historical automated weather station data", https://doi.org/10.22008/FK2/VVXGUT, GEUS Dataverse'
hourly_attrs['doi'] = '10.22008/FK2/VVXGUT'
hourly_attrs['dataset_description'] = 'Vandecrux, B., Box, J. E., Ahlstrøm, A. P., Andersen, S. B., Bayou, N., Colgan, W. T., Cullen, N. J., Fausto, R. S., Haas-Artho, D., Heilig, A., Houtz, D. A., How, P., Iosifescu Enescu, I., Karlsson, N. B., Kurup Buchholz, R., Mankoff, K. D., McGrath, D., Molotch, N. P., Perren, B., Revheim, M. K., Rutishauser, A., Sampson, K., Schneebeli, M., Starkweather, S., Steffen, S., Weber, J., Wright, P. J., Zwally, H. J., and Steffen, K.: The historical Greenland Climate Network (GC-Net) curated and augmented level-1 dataset, Earth Syst. Sci. Data, 15, 5467–5489, https://doi.org/10.5194/essd-15-5467-2023, 2023. '
hourly_attrs['averaging'] = 'hourly'
# build header object
header_obj = nead.build_header_obj(
df_v6.reset_index(),
metadata=hourly_attrs,
units=units,
display_description=display_description,
database_fields=database_fields,
database_fields_data_types=database_fields_data_types,
)
# formatting and saving to file
df_v6_formatted = df_v6.copy()
for col in [c for c in df_v6_formatted.columns if c not in ['latitude','longitude', 'elevation','timestamp']]:
df_v6_formatted[col] = df_v6_formatted[col].map(lambda x: \
'' if np.isnan(x) \
else '0' if abs(x)<0.005 \
else '1' if abs(x-1)<0.005 \
else '%0.2f'%x)
df_v6_formatted['elevation'] = df_v6_formatted['elevation'].map(lambda x: \
'' if np.isnan(x) else '%0.2f'%x)
for col in ['latitude','longitude']:
df_v6_formatted[col] = df_v6_formatted[col].map(lambda x: \
'' if np.isnan(x) else '%0.6f'%x)
print('writing hourly values')
nead.write(
df_v6_formatted.reset_index(),
header_obj,
"L1/hourly/" + site.replace(" ", "") + ".csv",
)
# daily average
print('calculating daily averages')
df_v7 = ptb.daily_average(df_v6)
daily_attrs = hourly_attrs
daily_attrs['averaging'] = 'daily'
# write ini file
header_obj = nead.build_header_obj(
df_v7.reset_index(),
metadata=ds.attrs,
units=units,
display_description=display_description,
database_fields=database_fields,
database_fields_data_types=database_fields_data_types,
)
# formatting and saving to file
for col in [c for c in df_v7.columns if c not in ['latitude','longitude', 'elevation','timestamp']]:
df_v7[col] = df_v7[col].map(lambda x: \
'' if np.isnan(x) \
else '0' if abs(x)<0.005 \
else '1' if abs(x-1)<0.005 \
else '%0.2f'%x)
df_v7['elevation'] = df_v7['elevation'].map(lambda x: \
'' if np.isnan(x) else '%0.2f'%x)
for col in ['latitude','longitude']:
df_v7[col] = df_v7[col].map(lambda x: '' if np.isnan(x) else '%0.6f'%x)
print('writing daily averages')
nead.write(
df_v7.reset_index(),
header_obj,
"L1/daily/" + site.replace(" ", "") + "_daily.csv",
)
tocgen.processFile("out/Report.md", "out/report_with_toc.md")
f.close()