Adding new response test.

krischer · krischer · commit 2a7b181c7276 · 2020-07-13T20:37:35.000+02:00
diff --git a/the_great_response_test_scipy/00_download_data.py b/the_great_response_test_scipy/00_download_data.py
@@ -0,0 +1,74 @@
+import obspy
+from obspy.clients.fdsn import Client
+from obspy.clients.fdsn.header import URL_MAPPINGS
+
+import pathlib
+
+DATA_PATH = pathlib.Path("./data")
+PROVIDERS = sorted(URL_MAPPINGS.keys())
+NETWORK = None
+STATION = None
+
+# XXX: Overwritten here to only download a subset. For the full
+# scale test just comment these lines.
+PROVIDERS = ["IRIS"]
+NETWORK = "IU"
+STATION = "A*"
+
+
+def download_stationxml_files_for_provider(
+    provider: str, output_folder: pathlib.Path
+) -> None:
+    def _p(msg):
+        print(f"Provider '{provider}': {msg}")
+
+    output_folder.mkdir(exist_ok=True)
+
+    # Get inventory for provider.
+    client = Client(provider)
+    _p("Retrieving inventory ...")
+    try:
+        inv = client.get_stations(
+            level="station", format="text", network=NETWORK, station=STATION
+        )
+    except Exception as e:
+        print(f"Failed to initialize client '{provider}' due to: {str(e)}")
+        return
+    _p("Done retrieving inventory ...")
+
+    # Loop over all stations and retrieve the full response level dictionary.
+    net_sta = []
+    for network in inv:
+        for station in network:
+            net_sta.append((network.code, station.code))
+
+    # Unique list to get rid of station epochs.
+    net_sta = sorted(set(net_sta))
+
+    for _i, (network, station) in enumerate(net_sta):
+        filename = output_folder / f"{network}_{station}.xml"
+        if filename.exists():
+            _p(f"File '{filename} already exists.")
+            continue
+        _p(f"Downloading file {_i + 1} of {len(net_sta)}: {filename}")
+        try:
+            client.get_stations(
+                network=network,
+                station=station,
+                level="response",
+                filename=str(filename),
+            )
+        except Exception as e:
+            _p(f"Failed to download '{filename}' due to: {str(e)}")
+            continue
+
+
+def main():
+    for provider in PROVIDERS:
+        download_stationxml_files_for_provider(
+            provider=provider, output_folder=DATA_PATH
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/the_great_response_test_scipy/01_run_test.py b/the_great_response_test_scipy/01_run_test.py
@@ -0,0 +1,162 @@
+import pathlib
+
+import matplotlib.pyplot as plt
+import numpy as np
+import obspy
+from obspy.core.inventory.response import Response
+
+CACHE_PATH = pathlib.Path("./cache")
+DATA_PATH = pathlib.Path("./data")
+
+# Number of frequencies to test at.
+N_FREQUENCIES = 100
+
+# TOLERANCES
+RTOL = 1e-4
+ATOL_AS_FRAC_OF_ABS_MAX = 5e-4
+
+# Collect phase responses here that don't compare favourable to evalresp
+# but which have been manually verified to be at least as correct in the
+# new scipy implementation.
+# This will have to be expanded in the course of this test.
+SKIP_VALIDATING_PHASE_REPONSE = ["IU.AFI..UHE", "IU.AFI..UHN", "IU.AFI..UHZ"]
+
+
+def compare_single_response(channel_id: str, response: Response):
+    # detect sampling rate from response stages
+    for stage in response.response_stages[::-1]:
+        if (
+            stage.decimation_input_sample_rate is not None
+            and stage.decimation_factor is not None
+        ):
+            sampling_rate = stage.decimation_input_sample_rate / stage.decimation_factor
+            break
+    else:
+        # XXX: Something has to be done here.
+        msg = "Failed to autodetect sampling rate of channel from " "response stages."
+        raise Exception(msg)
+
+    # Compute up to the Nyquist frequency - evalresp's phase usually goes crazy
+    # afterwards.
+    FREQUENCIES = np.logspace(-2, np.log10(0.5 * sampling_rate), N_FREQUENCIES)
+
+    # Compute for evalresp as well as the scipy response.
+    try:
+        eval_resp = response.get_evalresp_response_for_frequencies(
+            FREQUENCIES, output="VEL"
+        )
+    except Exception as e:
+        print(
+            "  evalresp failed to compute response. Thus no comparision can be"
+            f"performed. Reason for evalresp failure: {str(e)}"
+        )
+        return
+
+    scipy_resp = response.get_response(FREQUENCIES, output="VEL")
+
+    # Use amplitude and phase for the comparison just because it is more
+    # intuitive.
+    eval_resp_amplitude = np.abs(eval_resp)
+    eval_resp_phase = np.angle(eval_resp)
+
+    scipy_resp_amplitude = np.abs(scipy_resp)
+    scipy_resp_phase = np.angle(scipy_resp)
+
+    atol_amplitude = scipy_resp_amplitude.max() * ATOL_AS_FRAC_OF_ABS_MAX
+    atol_phase = np.abs(scipy_resp_phase).max() * ATOL_AS_FRAC_OF_ABS_MAX
+
+    try:
+        np.testing.assert_allclose(
+            eval_resp_amplitude,
+            scipy_resp_amplitude,
+            rtol=RTOL,
+            atol=atol_amplitude,
+            err_msg="amplitude mismatch",
+        )
+        # Skip if manually verified.
+        if channel_id not in SKIP_VALIDATING_PHASE_REPONSE:
+            np.testing.assert_allclose(
+                eval_resp_phase,
+                scipy_resp_phase,
+                rtol=RTOL,
+                atol=atol_phase,
+                err_msg="amplitude mismatch",
+            )
+    except Exception as e:
+        print(f"Failed comparison due to: {str(e)}")
+        print("Will now produce a plot to help diagnose the issue.")
+
+        plt.subplot(411)
+        plt.title("Amplitude response")
+        plt.loglog(FREQUENCIES, eval_resp_amplitude, label="Evalresp")
+        plt.loglog(FREQUENCIES, scipy_resp_amplitude, label="scipy")
+        plt.legend()
+
+        plt.subplot(412)
+        plt.title("Amplitude response difference")
+        plt.loglog(FREQUENCIES, eval_resp_amplitude - scipy_resp_amplitude)
+
+        plt.subplot(413)
+        plt.title("Phase response")
+        plt.semilogx(FREQUENCIES, eval_resp_phase, label="Evalresp")
+        plt.semilogx(FREQUENCIES, scipy_resp_phase, label="scipy")
+        plt.legend()
+
+        plt.title("Phase response difference")
+        plt.subplot(414)
+        plt.semilogx(FREQUENCIES, eval_resp_phase - scipy_resp_phase)
+
+        plt.show()
+        raise e
+
+
+def test_single_stationxml_file(filename: pathlib.Path):
+    def _p(msg, indent: int = 0):
+        print(f"{' ' * indent}File '{filename}': {msg}")
+
+    # Simplistic cache to be able to rerun this a lot and fix
+    # bugs as they appear.
+    cache_file = CACHE_PATH / filename.name
+    if cache_file.exists():
+        _p("Already has been tested. Skipping ...", indent=2)
+        return
+
+    try:
+        inv = obspy.read_inventory(str(filename))
+    except Exception as e:
+        _p(f"Failed to parse due to: {str(e)}")
+        raise e
+
+    all_responses = []
+    for net in inv:
+        for sta in net:
+            for cha in sta:
+                all_responses.append(
+                    [
+                        net.code,
+                        sta.code,
+                        cha.location_code,
+                        cha.code,
+                        cha.start_date,
+                        cha.end_date,
+                        cha.response,
+                    ]
+                )
+    for _i, c in enumerate(all_responses):
+        _p(f"Comparing responses for {c[:-1]} ...", indent=2)
+        compare_single_response(channel_id=".".join(c[:4]), response=c[-1])
+
+    # Finally just touch the cache file so it will be skipped the next run.
+    cache_file.touch()
+
+
+def main():
+    CACHE_PATH.mkdir(exist_ok=True)
+    all_files = list(DATA_PATH.glob("*.xml"))
+    for _i, filename in enumerate(all_files):
+        print(f"Reading StationXML file {_i + 1} of {len(all_files)}: {filename}")
+        test_single_stationxml_file(filename)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/the_great_response_test_scipy/README.md b/the_great_response_test_scipy/README.md
@@ -0,0 +1,28 @@
+# Brute-Force Response Test
+
+This folder contains a collection of scripts to
+
+(1) Download a large number of StationXML files.
+(2) Compare the response calculation in evalresp to a new one in scipy.
+
+The goal is to run this on a significant fraction of the data out there and
+document any difference.
+
+## Usage
+
+First edit the script and then run it. By default it only downloads a fairly
+small number of files - should be increased down the line.
+
+```bash
+python 00_download_data.py
+```
+
+Then run the tests on the downloaded files.
+
+```bash
+python 01_run_test.py
+```
+
+This will require a fair bit of manual work to get to work. The scripts are
+designed in a way so they can be rerun and already performed work will be
+skipped.