From 7aa09e9d125402025e2a544eb7ddbd499280c247 Mon Sep 17 00:00:00 2001 From: Leonardo Miquelutti Date: Tue, 17 Aug 2021 09:03:06 -0300 Subject: [PATCH] Criado usando o Colaboratory --- .../survey_study_colab_version.ipynb | 679 ++++++++++++++++++ 1 file changed, 679 insertions(+) create mode 100644 docs/source/tutorials/survey_study_colab_version.ipynb diff --git a/docs/source/tutorials/survey_study_colab_version.ipynb b/docs/source/tutorials/survey_study_colab_version.ipynb new file mode 100644 index 0000000..6cea493 --- /dev/null +++ b/docs/source/tutorials/survey_study_colab_version.ipynb @@ -0,0 +1,679 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "survey-study-colab-version.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true, + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iburFhms7uqT" + }, + "source": [ + "# Survey Study: a tutorial\n", + "\n", + "This Jupyter notebook aims to help razorback users to compute impedance estimates from the data set shown in the paper in different ways for a two stage remote reference configuration:\n", + "\n", + "1- Ordinary Least Squares\n", + "\n", + "2- M-Estimator \n", + "\n", + "3- Bounded Influence \n", + "\n", + "This tutorial is designed for Metronix data format (.ats files).\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fHUczNHoQgvz" + }, + "source": [ + "## Preparing the environment\n", + "\n", + "After you run the cell below, click on `RESTART RUNTIME` before moving in within the notebook. When you restart the runtime, you allow for the changes on the upgraded `matplotlib` package to take effect. \n", + "\n", + "**MAKE SURE YOU RESTART THE RUNTIME BEFORE MOVING ON**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jiZZkfgodONN" + }, + "source": [ + "!pip install razorback\n", + "!pip install gitpython\n", + "!pip install --upgrade matplotlib" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6VnDfTcHWQI5" + }, + "source": [ + "Now that you restarted the runtime, go to the next section to download the data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G1LS6jOrQq-m" + }, + "source": [ + "## Downloading the data\n", + "\n", + "The cells below both:\n", + "* download the data to your environment\n", + "* put the calibration files in the proper place" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uiUub745FuGi" + }, + "source": [ + "# download the data\n", + "import git\n", + "git.Git(\"/content\").clone(\"git://github.com/BRGM/razorback-tutorial-data.git\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8GUj2GiVciMQ" + }, + "source": [ + "# put a folder of the calibration files in the right place\n", + "!mkdir /usr/local/lib/python3.7/dist-packages/razorback/data/\n", + "!cp -r /content/razorback-tutorial-data/metronix_calibration /usr/local/lib/python3.7/dist-packages/razorback/data/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AwHgwBGJRo_w" + }, + "source": [ + "## Data processing" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_oxiFgg37uqa" + }, + "source": [ + "import razorback as rb # importing the razorback library\n", + "import numpy as np # importing the numpy library as np\n", + "import matplotlib.pyplot as plt # importing the matplotlib.pyplot library as plt\n", + "import urllib.request\n", + "import glob\n", + "%matplotlib inline" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "xF_z8b1a7uqd" + }, + "source": [ + "# function for getting sensor information from .ats file header\n", + "def sensor(ats_file):\n", + " header = rb.io.ats.read_ats_header(ats_file) #razorback function for ats file data importation\n", + " chan = header['channel_type'].decode()\n", + " stype = ''.join(c for c in header['sensor_type'].decode() if c.isprintable())\n", + " snum = header['sensor_serial_number']\n", + " sampling_rate = header['sampling_rate']\n", + " x1, y1, z1 = header['x1'], header['y1'], header['z1']\n", + " x2, y2, z2 = header['x2'], header['y2'], header['z2']\n", + " L = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2)**.5\n", + " return chan, L, stype, snum, sampling_rate" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "dYxTjn0c7uqf" + }, + "source": [ + "# function for getting calibration function from sensor information\n", + "def calibration(ats_file, name_converter=None):\n", + " chan, L, stype, snum, sampling_rate = sensor(ats_file)\n", + " if chan in ('Ex', 'Ey'):\n", + " return L\n", + " elif chan in ('Hx', 'Hy', 'Hz'):\n", + " calib_name = f\"{stype}{snum:03d}.txt\"\n", + " if name_converter:\n", + " calib_name = name_converter.get(calib_name, calib_name)\n", + " return rb.calibrations.metronix(calib_name, sampling_rate)\n", + " raise Exception(f\"Unknown channel name: {chan}\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H7XKo_947uqi" + }, + "source": [ + "The functions `sensor()` and `calibration()` will help to load the metadata of the data set (electric dipoles length, orientation, calibration files...).\n", + "\n", + "Other strategies for handling these metadata could be used, it's up to you to design your own.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MtH3SstgFH-T" + }, + "source": [ + "## Create the inventory containing all the time series\n", + "\n", + "# - files is the list of data files to load\n", + "# - pattern is applied to each data file to extract the strings {site} and {channel}\n", + "# - tag_template create the tag for the data file using {site} and {channel}\n", + "files = glob.glob(\"*/site*/*/*.ats\")\n", + "pattern = \"**/site{site}/*/*_T{channel}_*.ats\"\n", + "tag_template = \"site{site}_{channel}\"\n", + "\n", + "# correcting incorrect information about calibration files in file headers\n", + "name_converter = {\n", + " 'UNKN_H104.txt': 'MFS07104.txt',\n", + " 'UNKN_H105.txt': 'MFS07105.txt',\n", + "}\n", + "files" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "2CfFH1Ln_tNK" + }, + "source": [ + "# creating and filling the inventory\n", + "inv = rb.Inventory()\n", + "for fname, [tag] in rb.utils.tags_from_path(files, pattern, tag_template):\n", + " calib = calibration(fname, name_converter) # getting calibration for data file\n", + " signal = rb.io.ats.load_ats([fname], [calib], lazy=True) # loading data file\n", + " inv.append(rb.SignalSet({tag:0}, signal)) # tagging and storing the signal" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oWP1arjG7uqp" + }, + "source": [ + "All the data are now loaded in `inventory`.\n", + "We can use `inventory` to explore and handle the data set." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RtQEhLby7uqq" + }, + "source": [ + "You can check the number of files in your inventory `inv`:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kurXP0hE7uqs" + }, + "source": [ + "len(inv)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i45EgB6r7uqv" + }, + "source": [ + "You can display the tags/labels included in the inventory `inv`" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "bgP-qA4L7uqw" + }, + "source": [ + "inv.tags" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vpYPoWlR7uqx" + }, + "source": [ + "Creating (using `pack()`) and showing (using `print()`) the SignalSet object for site004 only:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "l0uBsHSS7uqx", + "scrolled": true + }, + "source": [ + "print(inv.filter('site004*').pack())" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q8Vb2xQ37uqy" + }, + "source": [ + "Same operation for site099 (magnetic remote reference only):" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XfX8GvuH7uqz" + }, + "source": [ + "print(inv.filter('site099*').pack())" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S1DOozs37uq4" + }, + "source": [ + "Creating and showing the SignalSet object content for the full inventory (including sites 002, 004, 006, 009, 100, 099).\n", + "The full data set is reduced to maximal synchronous time section. The `pack()` function is narrowing the time range to the window of common synchronousness of the whole inventory." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WXh0vP867uq6", + "scrolled": true + }, + "source": [ + "print(inv.pack())" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "lFQDa_hT7uq7" + }, + "source": [ + "# Function to prepare signal set from inventory to get it ready for TF estimation procedure\n", + "from itertools import chain\n", + "\n", + "def prepare_signalset(inventory, local_site, remote_sites):\n", + " patterns = (f\"{e}*\" for e in [local_site, *remote_sites])\n", + " signalset = inventory.filter(*patterns).pack()\n", + " tags = signalset.tags\n", + " tags[\"E\"] = tags[f\"{local_site}_Ex\"] + tags[f\"{local_site}_Ey\"]\n", + " tags[\"B\"] = tags[f\"{local_site}_Hx\"] + tags[f\"{local_site}_Hy\"]\n", + " if remote_sites:\n", + " remote_names = tags.filter(*chain(*(\n", + " (f\"{e}_Hx\", f\"{e}_Hy\") for e in remote_sites\n", + " )))\n", + " tags[\"Bremote\"] = sum((tags[n] for n in remote_names), ())\n", + " return signalset" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nP1dcZ7q7uq9" + }, + "source": [ + "The function `prepare_signalset()` build the SignalSet needed for processing a given `local_site` along with some `remote_sites`.\n", + "\n", + "The function starts by extracting the channels of interest and `pack` them in a SignalSet.\n", + "Then that signaset is enriched with specific tags (`'E'`, `'B'` and `'Bremote'`) that will be used later in the TF estimate function." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X6VD8pIF7uq_" + }, + "source": [ + "Showing the SignalSet object with `'E'`, `'B'` and `'Bremote'` tags for processing site004 using sites 100 and 099 as remote references" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Mc0Kz8Ik7urA", + "scrolled": true + }, + "source": [ + "print(prepare_signalset(inv, 'site004', ['site100', 'site099']))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mu_pIqoC7urC" + }, + "source": [ + "Defining a frequency array in logscale for TF computation" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7USAhiAQ7urD" + }, + "source": [ + "# Definining your output frequency in logscale / you can reduce nb_freq if you want to make a quick test\n", + "# as sampling frequency is 128, we go up to half a nyquist frequency which is 32 Hz\n", + "# recordings are long enough to try to reach 1 mHz\n", + "nb_freq=32\n", + "freq = np.logspace(-3, np.log10(32), nb_freq)\n", + "print(freq)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VpgAVfJi7urF" + }, + "source": [ + "### Computing two-stage OLS Impedance estimate for site004 with sites 100 and 99 as remote references " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8a5oT5rn7urG" + }, + "source": [ + "First stage: a regression is performed to estimate the TF between magnetic field at site 4 and magnetic field at (sites 99 + 100) . Second Stage: the first stage TF is used to produce a synthetic magnetic field and a second regression is operated between the latter and site 4 electric field. " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YkJUjaON7urH" + }, + "source": [ + "sig = prepare_signalset(inv, 'site004', ['site100', 'site099'])\n", + "print(sig)\n", + "ImpOLS = rb.utils.impedance(sig, freq ,remote='Bremote' )\n", + "print(ImpOLS.impedance.shape)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BN1KNosE7urI" + }, + "source": [ + "Showing typical apparent resistivity and phase results using matplotlib" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "d0EkUf6_7urI" + }, + "source": [ + "res = ImpOLS\n", + "rho = 1e12 * np.abs(res.impedance)**2 / freq[:, None, None]\n", + "rho_err = 1e12 * np.abs(res.error)**2 / freq[:, None, None]\n", + "phi = np.angle(res.impedance, deg=True)\n", + "rad_err = np.arcsin(res.error/abs(res.impedance))\n", + "rad_err[np.isnan(rad_err)] = np.pi\n", + "phi_err = np.rad2deg(rad_err)\n", + "\n", + "fig = plt.figure()\n", + "ax = plt.subplot(2, 1, 1)\n", + "ax.set_xscale(\"log\", nonpositive='clip')\n", + "ax.set_yscale(\"log\", nonpositive='clip')\n", + "ax.errorbar(freq, rho[:,0,0], yerr=rho_err[:,0,0], fmt='k.', label=r'$\\rho_{xx}$')\n", + "ax.errorbar(freq, rho[:,1,1], yerr=rho_err[:,1,1], fmt='g.', label=r'$\\rho_{yy}$')\n", + "ax.errorbar(freq, rho[:,0,1], yerr=rho_err[:,0,1], fmt='r.', label=r'$\\rho_{xy}$')\n", + "ax.errorbar(freq, rho[:,1,0], yerr=rho_err[:,1,0], fmt='b.', label=r'$\\rho_{yx}$')\n", + "plt.xlabel('freq')\n", + "plt.ylabel(r'apparent resistivity $\\rho$ ($\\Omega.m$)');\n", + "plt.legend()\n", + "\n", + "plt.title('Site 002 results in 2-stage RR OLS\\n configuration with sites 99 and 100 as remote references')\n", + "ax = plt.subplot(2, 1, 2)\n", + "ax.set_xscale(\"log\", nonpositive='clip')\n", + "ax.errorbar(freq, phi[:,0,0], yerr=phi_err[:,0,0], fmt='k.', label=r'$\\phi_{xx}$')\n", + "ax.errorbar(freq, phi[:,1,1], yerr=phi_err[:,1,1], fmt='g.', label=r'$\\phi_{yy}$')\n", + "ax.errorbar(freq, phi[:,0,1], yerr=phi_err[:,0,1], fmt='r.', label=r'$\\phi_{xy}$')\n", + "ax.errorbar(freq, phi[:,1,0], yerr=phi_err[:,1,0], fmt='b.', label=r'$\\phi_{yx}$')\n", + "plt.xlabel('freq')\n", + "plt.ylabel(r'phase $\\phi$ (degrees)');\n", + "plt.legend()\n", + "plt.ylim(-180, 180)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dbFPFxRo7urL" + }, + "source": [ + "### Now computing 2-stage M-Estimator Transfer Function for site004 with sites 100 and 99 as remote references " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iUHRzVfa7urM" + }, + "source": [ + "from razorback.weights import mest_weights\n", + "from razorback.prefilters import cod_filter\n", + "\n", + "sig = prepare_signalset(inv, 'site004', ['site100', 'site099'])\n", + "print(sig)\n", + "ImpME = rb.utils.impedance(\n", + " sig, freq,\n", + " weights= mest_weights,\n", + " remote='Bremote', # including the remotes references in the computation,\n", + " prefilter=cod_filter(0.0), # no coherency prefilter...\n", + " fourier_opts=dict( Nper= 8, overlap= 0.71) # fourier options with 8 periods by window, and 71% of overlap\n", + ")\n", + "print(ImpME.impedance.shape)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "w4C4PwK07urP" + }, + "source": [ + "res = ImpME\n", + "rho = 1e12 * np.abs(res.impedance)**2 / freq[:, None, None]\n", + "rho_err = 1e12 * np.abs(res.error)**2 / freq[:, None, None]\n", + "phi = np.angle(res.impedance, deg=True)\n", + "rad_err = np.arcsin(res.error/abs(res.impedance))\n", + "rad_err[np.isnan(rad_err)] = np.pi\n", + "phi_err = np.rad2deg(rad_err)\n", + "\n", + "fig = plt.figure()\n", + "ax = plt.subplot(2, 1, 1)\n", + "ax.set_xscale(\"log\", nonpositive='clip')\n", + "ax.set_yscale(\"log\", nonpositive='clip')\n", + "ax.errorbar(freq, rho[:,0,0], yerr=rho_err[:,0,0], fmt='k.', label=r'$\\rho_{xx}$')\n", + "ax.errorbar(freq, rho[:,1,1], yerr=rho_err[:,1,1], fmt='g.', label=r'$\\rho_{yy}$')\n", + "ax.errorbar(freq, rho[:,0,1], yerr=rho_err[:,0,1], fmt='r.', label=r'$\\rho_{xy}$')\n", + "ax.errorbar(freq, rho[:,1,0], yerr=rho_err[:,1,0], fmt='b.', label=r'$\\rho_{yx}$')\n", + "plt.xlabel('freq')\n", + "plt.ylabel(r'apparent resistivity $\\rho$ ($\\Omega.m$)');\n", + "plt.legend()\n", + "\n", + "plt.title('Site 002 results in 2-stage RR ME\\n configuration with sites 99 and 100 as remote references')\n", + "ax = plt.subplot(2, 1, 2)\n", + "ax.set_xscale(\"log\", nonpositive='clip')\n", + "ax.errorbar(freq, phi[:,0,0], yerr=phi_err[:,0,0], fmt='k.', label=r'$\\phi_{xx}$')\n", + "ax.errorbar(freq, phi[:,1,1], yerr=phi_err[:,1,1], fmt='g.', label=r'$\\phi_{yy}$')\n", + "ax.errorbar(freq, phi[:,0,1], yerr=phi_err[:,0,1], fmt='r.', label=r'$\\phi_{xy}$')\n", + "ax.errorbar(freq, phi[:,1,0], yerr=phi_err[:,1,0], fmt='b.', label=r'$\\phi_{yx}$')\n", + "plt.xlabel('freq')\n", + "plt.ylabel(r'phase $\\phi$ (degrees)');\n", + "plt.legend()\n", + "plt.ylim(-180, 180)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZpDxmrLd7urS" + }, + "source": [ + "### Now computing 2-stage Bounded Influence Transfer Function for site004 with sites 100 and 99 as remote references for a rejection percentage of 1% and 3 bounded influence steps" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5KHwqLPp7urS" + }, + "source": [ + "from razorback.weights import bi_weights\n", + "from razorback.prefilters import cod_filter\n", + "\n", + "sig = prepare_signalset(inv, 'site004', ['site100', 'site099'])\n", + "print(sig)\n", + "ImpBI = rb.utils.impedance(\n", + " sig, freq,\n", + " weights= bi_weights(0.01, 3), # bounded influence with reject probability of 1% and 3 steps\n", + " remote='Bremote', # including the remotes references in the computation,\n", + " prefilter=cod_filter(0.0), # prefilter: cod_filter(0.0)\n", + " fourier_opts=dict( Nper= 8, overlap= 0.71) # fourier options with 8 periods by window, and 71% of overlap\n", + ")\n", + "print(ImpBI.impedance.shape)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "9-KGOX7s7urU" + }, + "source": [ + "res = ImpBI\n", + "rho = 1e12 * np.abs(res.impedance)**2 / freq[:, None, None]\n", + "rho_err = 1e12 * np.abs(res.error)**2 / freq[:, None, None]\n", + "phi = np.angle(res.impedance, deg=True)\n", + "rad_err = np.arcsin(res.error/abs(res.impedance))\n", + "rad_err[np.isnan(rad_err)] = np.pi\n", + "phi_err = np.rad2deg(rad_err)\n", + "\n", + "fig = plt.figure()\n", + "ax = plt.subplot(2, 1, 1)\n", + "ax.set_xscale(\"log\", nonpositive='clip')\n", + "ax.set_yscale(\"log\", nonpositive='clip')\n", + "ax.errorbar(freq, rho[:,0,0], yerr=rho_err[:,0,0], fmt='k.', label=r'$\\rho_{xx}$')\n", + "ax.errorbar(freq, rho[:,1,1], yerr=rho_err[:,1,1], fmt='g.', label=r'$\\rho_{yy}$')\n", + "ax.errorbar(freq, rho[:,0,1], yerr=rho_err[:,0,1], fmt='r.', label=r'$\\rho_{xy}$')\n", + "ax.errorbar(freq, rho[:,1,0], yerr=rho_err[:,1,0], fmt='b.', label=r'$\\rho_{yx}$')\n", + "plt.xlabel('freq')\n", + "plt.ylabel(r'apparent resistivity $\\rho$ ($\\Omega.m$)');\n", + "plt.legend()\n", + "\n", + "plt.title('Site 002 results in 2-stage RR BOUNDED INFLUENCE \\n configuration with sites 99 and 100 as remote references')\n", + "ax = plt.subplot(2, 1, 2)\n", + "ax.set_xscale(\"log\", nonpositive='clip')\n", + "ax.errorbar(freq, phi[:,0,0], yerr=phi_err[:,0,0], fmt='k.', label=r'$\\phi_{xx}$')\n", + "ax.errorbar(freq, phi[:,1,1], yerr=phi_err[:,1,1], fmt='g.', label=r'$\\phi_{yy}$')\n", + "ax.errorbar(freq, phi[:,0,1], yerr=phi_err[:,0,1], fmt='r.', label=r'$\\phi_{xy}$')\n", + "ax.errorbar(freq, phi[:,1,0], yerr=phi_err[:,1,0], fmt='b.', label=r'$\\phi_{yx}$')\n", + "plt.xlabel('freq')\n", + "plt.ylabel(r'phase $\\phi$ (degrees)');\n", + "plt.legend()\n", + "plt.ylim(-180, 180)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file