-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfdsnws-download.py
executable file
·461 lines (394 loc) · 15.9 KB
/
fdsnws-download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
#!/usr/bin/env python3
import sys
import re
from pathlib import Path
from urllib.parse import urlparse
import pandas as pd
import obspy as ob
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
from obspy.core.event import Catalog
from obspy.core.stream import Stream
from obspy.clients.fdsn.header import FDSNTimeoutException
from obspy.clients.fdsn.header import FDSNRequestTooLargeException
from obspy.clients.fdsn.header import FDSNNoDataException
#
# Class to parse and verify a station filter. A station filter is
# something in the form:
# net, net.sta, net.sta.loc, net.sta.loc.cha
# Each of net,sta,loc,cha supports wildcards such as *,?,(,),|
#
class StationNameFilter:
def __init__(self):
self.rules = {}
def set_rules(self, rules):
self.rules = {}
return self.add_rules(rules)
def add_rules(self, rules):
#
# split multiple comma separated rules
#
for singleRule in rules.split(","):
# split the rule in NET STA LOC CHA
tokens = singleRule.split(".")
if len(tokens) == 1: # only network
tokens.append("*")
tokens.append("*")
tokens.append("*")
elif len(tokens) == 2: # only network.station
tokens.append("*")
tokens.append("*")
elif len(tokens) == 3: # only network.station.location
tokens.append("*")
elif len(tokens) == 4: # all network.station.location.channel
pass
else:
print(f"Error: check station filter syntax ({rules})",
file=sys.stderr)
return False
#
# Check for valid characters
#
valid_str = re.compile("[A-Z|a-z|0-9|\?|\*|\||\(|\)]*")
for tok in tokens:
if valid_str.fullmatch(tok) is None:
print(f"Error: check station filter syntax ({rules})",
file=sys.stderr)
return False
fullRule = f"{tokens[0]}.{tokens[1]}.{tokens[2]}.{tokens[3]}"
#
# convert user special characters (* ? .) to regex equivalent
#
reRule = re.sub(r"\.", r"\.", fullRule) # . becomes \.
reRule = re.sub(r"\?", ".", reRule) # ? becomes .
reRule = re.sub(r"\*", ".*", reRule) # * becomes .*
reRule = re.compile(reRule)
self.rules[fullRule] = reRule
return True
def match(self, waveform_id):
for (fullRule, reRule) in self.rules.items():
if reRule.fullmatch(waveform_id) is not None:
return True
return False
def print(self):
for (fullRule, reRule) in self.rules.items():
print(f"{fullRule} (regular expression: {reRule.pattern})")
#
# Utility function that returns the stations active at a specific point in time
#
def get_stations(inventory, ref_time, sta_filters):
stations = []
for net in inventory.networks:
if ref_time < net.start_date or ref_time > net.end_date:
continue
for sta in net.stations:
if ref_time < sta.start_date or ref_time > sta.end_date:
continue
for cha in sta.channels:
if ref_time < cha.start_date or ref_time > cha.end_date:
continue
wfid = f"{net.code}.{sta.code}.{cha.location_code}.{cha.code}"
if sta_filters.match(wfid):
stations.append( (net.code, sta.code, cha.location_code, cha.code) )
return stations
#
# Define a client that connects to a FDSN Web Service
# https://docs.obspy.org/packages/autogen/obspy.clients.fdsn.client.Client.html
#
def create_client(url, timeout):
parsed = urlparse(url)
base_url = parsed._replace(netloc=f"{parsed.hostname}:{parsed.port}").geturl()
if parsed.username and parsed.password:
print(f"Connecting to {base_url} with credentials", file=sys.stderr)
return Client(base_url=base_url, user=parsed.username, password=parsed.password, timeout=timeout)
else:
print(f"Connecting to {base_url}", file=sys.stderr)
return Client(base_url=base_url, timeout=timeout)
def usage():
print("Usage:")
print(f" {sys.argv[0]} fdsnws start-date end-date [--timeout sec]")
print(f" {sys.argv[0]} fdsnws start-date end-date output-catalog-dir [--timeout sec]")
print(f" {sys.argv[0]} fdsnws --waveforms catalog-dir catalog.csv [length-before-event:length-after-event] [station-filter] [--timeout sec]")
print("\nNotes:")
print(" Dates should be given in any format supported by obspy.UTCDateTime e.g. 2023-04-19T12:00:00 (UTC)")
print(" --timeout is optional and specify the fdsnws connection timeout in seconds. Useful to deal with large requests that require a long time to be fulfilled")
def main():
if len(sys.argv) < 4:
usage()
sys.exit(0)
timeout = 300
if sys.argv[-2] == "--timeout":
timeout = float(sys.argv[-1])
sys.argv = sys.argv[:-2]
if sys.argv[2] == "--waveforms" and len(sys.argv) >= 5:
client = create_client(sys.argv[1], timeout)
catdir = sys.argv[3]
catfile = sys.argv[4]
length_before = None
length_after = None
if len(sys.argv) >= 6:
tokens = sys.argv[5].split(":")
if len(tokens) != 2:
print(f"Wrong format for waveform length option: {sys.argv[5]}", file=sys.stderr)
return
length_before = float(tokens[0]) # [sec]
length_after = float(tokens[1]) # [sec]
sta_filters = None
if len(sys.argv) >= 7:
sta_filters = StationNameFilter()
if not sta_filters.set_rules(sys.argv[6]):
return
download_waveform(client, catdir, catfile, length_before, length_after, sta_filters)
elif len(sys.argv) == 4 or len(sys.argv) == 5:
client = create_client(sys.argv[1], timeout)
starttime = UTCDateTime(sys.argv[2])
endtime = UTCDateTime(sys.argv[3])
catdir = None
if len(sys.argv) == 5:
catdir = sys.argv[4]
download_catalog(client, catdir, starttime, endtime)
else:
print("Wrong syntax", file=sys.stderr)
usage()
def download_catalog(client, catdir, starttime, endtime):
if catdir:
# create folder if it doesn't exist
Path(catdir).mkdir(parents=True, exist_ok=True)
#
# Download the inventory
# https://docs.obspy.org/packages/autogen/obspy.core.inventory.inventory.Inventory.html
#
if catdir:
inv_file = Path(catdir, "inventory.xml")
if not inv_file.is_file():
print(f"Downloading inventory {inv_file}...", file=sys.stderr)
try:
inventory = client.get_stations(network="*", station="*", location="*", channel="*",
starttime=starttime, endtime=endtime,
level="response")
inventory.write(inv_file, format="STATIONXML")
except Exception as e:
print(f"Cannot download inventory: skip it ({e})", file=sys.stderr)
else:
print(f"Inventory file {inv_file} exists: do not download it again", file=sys.stderr)
# csv file header
print("id,event,origin,isotime,latitude,longitude,depth,event_type,mag_type,magnitude,method_id,evaluation_mode,author,rms,az_gap,num_phase")
chunkstart = starttime
chunkend = endtime
id = 0
while chunkstart < endtime:
if chunkend > endtime:
chunkend = endtime
print(f"Downloading {chunkstart} ~ {chunkend}...", file=sys.stderr)
#
# Load catalog:
# includeallorigins=False: we only want preferred origins, too heavy to load all orgins
# includearrivals=False: too slow to load, we can load the picks later in the loop
# includeallmagnitudes=False: optional, it depends on what magnitude we want to work with
cat = None
try:
cat = client.get_events(starttime=chunkstart, endtime=chunkend, orderby="time-asc",
includeallorigins=False,
includearrivals=False,
includeallmagnitudes=False)
except (FDSNRequestTooLargeException, FDSNTimeoutException) as e:
if isinstance(e, FDSNTimeoutException):
print("FDSNTimeoutException: splitting request in smaller ones (consider --timeout option)...", file=sys.stderr)
else:
print("FDSNRequestTooLargeException: splitting request in smaller ones...", file=sys.stderr)
chunklen = (chunkend - chunkstart) / 2.
chunkend = chunkstart + chunklen
continue
except FDSNNoDataException as e:
print(f"FDSNNoDataException. No data between {chunkstart} ~ {chunkend}...", file=sys.stderr)
#
# Prepare next chunk to load
#
chunklen = (chunkend - chunkstart)
chunkstart = chunkend
chunkend += chunklen
if (endtime - chunkend) < chunklen: # join leftover
chunkend = endtime
if cat is None:
continue
print(f"{len(cat.events)} events found", file=sys.stderr)
#
# Loop through the catalog and extract the information we need
#
for ev in cat.events:
# ev: https://docs.obspy.org/packages/autogen/obspy.core.event.event.Event.html
id += 1
#
# get preferred origin from event (an events might contain multiple origins: e.g. different locators or
# multiple updates for the same origin, we only care about the preferred one)
# https://docs.obspy.org/packages/autogen/obspy.core.event.origin.Origin.html
#
o = ev.preferred_origin()
if o is None:
print(f"No preferred origin for event {ev.resource_id}: skip it", file=sys.stderr)
continue
#
# get preferred magnitude from event (multiple magnitude might be present, we only care about the preferred one)
# https://docs.obspy.org/packages/autogen/obspy.core.event.magnitude.Magnitude.html
#
m = ev.preferred_magnitude()
mag = None
mag_type = "?"
if m is not None:
mag = m.mag
mag_type = m.magnitude_type
#
# Write csv entry for this event
#
print(f"{id},{ev.resource_id},{o.resource_id},{o.time},{o.latitude},{o.longitude},{o.depth},"
f"{ev.event_type if ev.event_type else ''},"
f"{mag_type},{mag},"
f"{o.method_id},{o.evaluation_mode},"
f"{o.creation_info.author if o.creation_info else ''},"
f"{o.quality.standard_error if o.quality else ''},"
f"{o.quality.azimuthal_gap if o.quality else ''},"
f"{o.quality.used_phase_count if o.quality else ''}")
if catdir is None:
continue
ev_file = Path(catdir, f"ev{id}.xml")
if ev_file.is_file():
print(f"File {ev_file} exists: skip event", file=sys.stderr)
continue
#
# Since `cat` doesn't contain arrivals (for performance reason), we need to load them now
#
ev_id = str(ev.resource_id)
origin_cat = None
while origin_cat is None:
try:
origin_cat = client.get_events(eventid=ev_id, includeallorigins=False, includearrivals=True)
except FDSNTimeoutException as e:
print(f"FDSNTimeoutException while retrieving event {ev_id} (consider --timeout option). Trying again...", file=sys.stderr)
except Exception as e:
print(f"While retrieving event {ev_id} an exception occurred: {e}", file=sys.stderr)
break
if origin_cat is None or len(origin_cat.events) != 1:
print(f"Something went wrong with event {ev_id}: skip it", file=sys.stderr)
continue
ev_with_picks = origin_cat.events[0]
if ev.resource_id != ev_with_picks.resource_id:
print(f"Something went wrong with event {ev_id}: skip it", file=sys.stderr)
continue
#
# Write extended event information as xml and waveform data
#
cat_out = Catalog()
cat_out.append(ev_with_picks)
cat_out.write(ev_file, format="QUAKEML")
def download_waveform(client, catdir, catfile, length_before=None, length_after=None, sta_filters=None):
#
# Load the csv catalog
#
print(f"Loading catalog {catfile}...", file=sys.stderr)
cat = pd.read_csv(catfile, dtype=str, na_filter=False)
print(f"Found {len(cat)} events", file=sys.stderr)
#
# Load the inventory
#
print(f"Loading inventory {Path(catdir, 'inventory.xml')}...", file=sys.stderr)
inv = ob.core.inventory.inventory.read_inventory( Path(catdir, "inventory.xml") )
if sta_filters:
print(f"Using the following station filters...", file=sys.stderr)
sta_filters.print()
#
# Loop through the catalog by event
#
for row in cat.itertuples():
ev_id = row.id
evtime = UTCDateTime(row.isotime)
#
# you can fetch any csv column with 'row.column'
#
print(f"Processing event {ev_id} {evtime}", file=sys.stderr)
wf_file = Path(catdir, f"ev{ev_id}.mseed")
if wf_file.is_file():
print(f"File {wf_file} exists: skip event", file=sys.stderr)
continue
#
# loop trough origin arrivals to find the used stations and the
# latest pick time
#
last_pick_time = None
auto_sta_filters = StationNameFilter()
if sta_filters is None or length_before is None or length_after is None:
#
# We want to access all event information, so we load its XML file
# with obspy
#
evfile = Path(catdir, f"ev{ev_id}.xml")
print(f"Reading {evfile}", file=sys.stderr)
tmpcat = ob.core.event.read_events(evfile)
ev = tmpcat[0] # we know we stored only one event in this catalog
#
# get preferred origin from event
#
o = ev.preferred_origin()
if o is None:
print(f"No preferred origin for event {ev_id}: skip", file=sys.stderr)
continue
if o.time != evtime: # this should never happen
print(f"Event {ev_id} prferred origin time is not the same as event time: skip event", file=sys.stderr)
continue
for a in o.arrivals:
#
# find the pick associated with the current arrival
#
for p in ev.picks:
if p.resource_id == a.pick_id:
#
# Keep track of the last pick time
#
if last_pick_time is None or p.time > last_pick_time:
last_pick_time = p.time
#
# Keep track of the stations used by the picks
#
auto_sta_filters.add_rules(p.waveform_id.id)
break
#
# Find the stations list that were active at this event time
#
stations = get_stations(
inventory=inv, ref_time=evtime,
sta_filters=(auto_sta_filters if sta_filters is None else sta_filters)
)
#
# Fix the time window for all waveforms
#
if length_before is None:
starttime = evtime
else:
starttime = evtime - length_before
if length_after is None:
endtime = last_pick_time
endtime += (endtime - starttime) * 0.3 # add extra 30%
else:
endtime = evtime + length_after
bulk = []
for (network_code, station_code, location_code, channel_code) in stations:
bulk.append( (network_code, station_code, location_code, channel_code, starttime, endtime) )
#
# Download the waveforms
#
waveforms = None
if bulk:
print(f"Downloading {len(bulk)} waveforms between {starttime} ~ {endtime} ", file=sys.stderr)
try:
# https://docs.obspy.org/packages/autogen/obspy.clients.fdsn.client.Client.get_waveforms_bulk.html
# https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.html
waveforms = client.get_waveforms_bulk(bulk)
except FDSNTimeoutException as e:
print(f"FDSNTimeoutException: Cannot fetch waveforms for event {id} (consider --timeout option)", file=sys.stderr)
except Exception as e:
print(f"Cannot fetch waveforms for event {id}: {e}", file=sys.stderr)
if waveforms:
waveforms.trim(starttime=starttime, endtime=endtime)
print(f"Saving {wf_file}", file=sys.stderr)
waveforms.write(wf_file, format="MSEED")
if __name__ == '__main__':
main()