-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathbackfill.py
189 lines (159 loc) · 7.16 KB
/
backfill.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# REDPy - Repeating Earthquake Detector in Python
# Copyright (C) 2016-2020 Alicia Hotovec-Ellis (ahotovec-ellis@usgs.gov)
# Licensed under GNU GPLv3 (see LICENSE.txt)
import argparse
import redpy
import numpy as np
import obspy
from obspy import UTCDateTime
import time
"""
Run this script to fill the table with data from the past. If a start time is not
specified, it will check the attributes of the repeater table to pick up where it left
off. Additionally, if this is the first run and a start time is not specified, it will
assume one time chunk prior to the end time. If an end time is not specified, "now" is
assumed. The end time updates at the end of each time chunk processed (default: by hour,
set in configuration). This script can be run as a cron job that will pick up where it
left off if a chunk is missed. Use -n if you are backfilling with a large amount of time;
it will consume less time downloading the data in small chunks if NSEC is an hour or a day
instead of a few minutes, but at the cost of keeping orphans for longer.
usage: backfill.py [-h] [-v] [-t] [-s STARTTIME] [-e ENDTIME] [-c CONFIGFILE] [-n NSEC]
optional arguments:
-h, --help show this help message and exit
-v, --verbose increase written print statements
-t, --troubleshoot run in troubleshoot mode (without try/except)
-s STARTTIME, --starttime STARTTIME
optional start time to begin filling (YYYY-MM-DDTHH:MM:SS)
-e ENDTIME, --endtime ENDTIME
optional end time to end filling (YYYY-MM-DDTHH:MM:SS)
-c CONFIGFILE, --configfile CONFIGFILE
use configuration file named CONFIGFILE instead of
default settings.cfg
-n NSEC, --nsec NSEC overwrite opt.nsec from configuration file with NSEC this run only
"""
t = time.time()
parser = argparse.ArgumentParser(description=
"Backfills table with data from the past")
parser.add_argument("-v", "--verbose", action="count", default=0,
help="increase written print statements")
parser.add_argument("-t", "--troubleshoot", action="count", default=0,
help="run in troubleshoot mode (without try/except)")
parser.add_argument("-s", "--starttime",
help="optional start time to begin filling (YYYY-MM-DDTHH:MM:SS)")
parser.add_argument("-e", "--endtime",
help="optional end time to end filling (YYYY-MM-DDTHH:MM:SS)")
parser.add_argument("-c", "--configfile",
help="use configuration file named CONFIGFILE instead of default settings.cfg")
parser.add_argument("-n", "--nsec", type=int,
help="overwrite opt.nsec from configuration file with NSEC this run only")
args = parser.parse_args()
if args.configfile:
opt = redpy.config.Options(args.configfile)
if args.verbose: print("Using config file: {0}".format(args.configfile))
else:
opt = redpy.config.Options("settings.cfg")
if args.verbose: print("Using config file: settings.cfg")
if args.nsec:
opt.nsec = args.nsec
if args.verbose: print("Opening hdf5 table: {0}".format(opt.filename))
h5file, rtable, otable, ttable, ctable, jtable, dtable, ftable = redpy.table.openTable(opt)
# Check for MPL version mismatch
redpy.table.checkMPL(rtable, ftable, ttable, otable, dtable, opt)
if args.endtime:
tend = UTCDateTime(args.endtime)
else:
tend = UTCDateTime()
if args.starttime:
tstart = UTCDateTime(args.starttime)
if rtable.attrs.ptime:
rtable.attrs.ptime = UTCDateTime(tstart)
else:
if rtable.attrs.ptime:
tstart = UTCDateTime(rtable.attrs.ptime)
else:
tstart = tend-opt.nsec
if len(ttable) > 0:
ttimes = ttable.cols.startTimeMPL[:]
else:
ttimes = 0
n = 0
rlen = len(rtable)
while tstart+n*opt.nsec < tend:
ti = time.time()
print(tstart+n*opt.nsec)
# Download and trigger
if args.troubleshoot:
endtime = tstart+(n+1)*opt.nsec+opt.atrig
if endtime > tend:
endtime = tend
st, stC = redpy.trigger.getData(tstart+n*opt.nsec-opt.atrig, endtime, opt)
alltrigs = redpy.trigger.trigger(st, stC, rtable, opt)
else:
try:
endtime = tstart+(n+1)*opt.nsec+opt.atrig
if endtime > tend:
endtime = tend
st, stC = redpy.trigger.getData(tstart+n*opt.nsec-opt.atrig, endtime, opt)
alltrigs = redpy.trigger.trigger(st, stC, rtable, opt)
except (TypeError, obspy.clients.fdsn.header.FDSNException, Exception):
print('Could not download or trigger data... moving on')
alltrigs = []
# Clean out data spikes etc.
trigs, junk, junkFI, junkKurt = redpy.trigger.dataClean(alltrigs, opt, flag=1)
# Save junk triggers in separate table for quality checking purposes
for i in range(len(junk)):
redpy.table.populateJunk(jtable, junk[i], 2, opt) # Both types of junk
for i in range(len(junkKurt)):
redpy.table.populateJunk(jtable, junkKurt[i], 1, opt) # Just kurtosis junk
for i in range(len(junkFI)):
redpy.table.populateJunk(jtable, junkFI[i], 0, opt) # Just 'teleseisms'
# Append times of triggers to ttable to compare total seismicity later
redpy.table.populateTriggers(ttable, trigs, ttimes, opt)
# Check triggers against deleted events
if len(dtable) > 0:
trigs = redpy.correlation.compareDeleted(trigs, dtable, opt)
if len(trigs) > 0:
id = rtable.attrs.previd
if len(trigs) == 1:
ostart = 0
if len(otable) == 0:
# First trigger goes to orphans table
redpy.table.populateOrphan(otable, 0, trigs[0], opt)
ostart = 1
else:
id = id + 1
redpy.correlation.runCorrelation(rtable, otable, ctable, ftable, ttimes,
trigs[0], id, opt)
else:
ostart = 0
if len(otable) == 0:
# First trigger goes to orphans table
redpy.table.populateOrphan(otable, 0, trigs[0], opt)
ostart = 1
# Loop through remaining triggers
for i in range(ostart,len(trigs)):
id = id + 1
redpy.correlation.runCorrelation(rtable, otable, ctable, ftable, ttimes,
trigs[i], id, opt)
rtable.attrs.previd = id
redpy.table.clearExpiredOrphans(otable, opt, tstart+(n+1)*opt.nsec)
# Print some stats
if args.verbose:
print("Length of Orphan table: {}".format(len(otable)))
if len(rtable) > 1:
print("Number of repeaters: {}".format(len(rtable)))
print("Number of clusters: {}".format(ftable.attrs.nClust))
# Update tend if an end date is not specified so this will run until it is fully
# caught up, instead of running to when the script was originally run.
if not args.endtime:
tend = UTCDateTime()
n = n+1
if args.verbose: print("Time spent this iteration: {} minutes".format(
(time.time()-ti)/60))
print("Caught up to: {}".format(endtime-opt.atrig))
if args.verbose: print("Updating plots...")
redpy.plotting.createPlots(rtable, ftable, ttable, ctable, otable, opt)
if args.verbose: print("Closing table...")
h5file.close()
print("Total time spent: {} minutes".format((time.time()-t)/60))
if args.verbose: print("Done")