From 0a56dac44ab4314987fb89eca47d78eb88c1848e Mon Sep 17 00:00:00 2001 From: enarjord Date: Sat, 4 Jun 2022 20:24:30 +0200 Subject: [PATCH] refresh correlation.ipynb --- correlation.ipynb | 61 +++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/correlation.ipynb b/correlation.ipynb index d4998daf1..d6f26c1a5 100644 --- a/correlation.ipynb +++ b/correlation.ipynb @@ -22,6 +22,7 @@ "from time import time, sleep\n", "import os\n", "import pandas as pd\n", + "import asyncio\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] @@ -64,35 +65,19 @@ "outputs": [], "source": [ "info = await bot.public_get(bot.endpoints[\"exchange_info\"])\n", - "symbols = [e[\"symbol\"] for e in info[\"symbols\"] if e[\"symbol\"].endswith(\"USDT\")]\n", - "\n", - "interval = \"5m\"\n", - "\n", - "fpath = f\"logs/binance/ohlcvs_{interval}/\"\n", - "if os.path.exists(fpath):\n", - " print(\"loading cache\")\n", - " csvs = {}\n", - " for f in [x for x in os.listdir(fpath) if x.endswith(\".csv\")]:\n", - " csvs[f[:-4]] = pd.read_csv(fpath + f)\n", - "else:\n", - " fpath = make_get_filepath(f\"logs/binance/ohlcvs_{interval}/\")\n", - "\n", - " data = {}\n", - " for s in sorted(symbols):\n", - " print(s, end=\" \")\n", - " ohlcvs = await bot.fetch_ohlcvs(symbol=s, interval=interval)\n", - " data[s] = ohlcvs\n", - " sleep(0.5)\n", - " # cache as csv for later use\n", - " csvs = {}\n", - " fpath = make_get_filepath(f\"logs/binance/ohlcvs_{interval}/\")\n", - " for s in data:\n", - " csvs[s] = pd.DataFrame(data[s])\n", - " csvs[s].to_csv(f\"{fpath}{s}.csv\")\n", - "n_days = (\n", - " (csvs[symbols[0]].timestamp.iloc[-1] - csvs[symbols[0]].timestamp.iloc[0]) / 1000 / 60 / 60 / 24\n", - ")\n", - "print(\"n days\", n_days)" + "csvs = {}\n", + "syms = [x['symbol'] for x in info['symbols'] if x['symbol'].endswith('USDT')]\n", + "n = 7\n", + "interval = '5m'\n", + "while True:\n", + " if not syms:\n", + " break\n", + " subset = syms[:n]\n", + " syms = syms[n:]\n", + " res = await asyncio.gather(*[bot.fetch_ohlcvs(symbol=s, interval=interval) for s in subset])\n", + " for s, r in zip(subset, res):\n", + " csvs[s] = pd.DataFrame(r)\n", + " print(subset, len(syms))" ] }, { @@ -163,17 +148,8 @@ "outputs": [], "source": [ "# sum correlation of each symbol to every other symbol\n", - "sorted([(sum(corrs_dict[s].values()), s) for s in corrs_dict])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# sum abs correlation of each symbol to every other symbol\n", - "sorted([(sum(corrs_dict_abs[s].values()), s) for s in corrs_dict])" + "corr_sorted = sorted([(sum(corrs_dict[s].values()), s) for s in corrs_dict])\n", + "corr_sorted" ] }, { @@ -186,7 +162,8 @@ "mean_prices = pd.concat([e.close / e.close.iloc[0] for e in csvs.values()], axis=1).mean(axis=1)\n", "\n", "# compare individual symbol to mean\n", - "symbol = \"SOLUSDT\"\n", + "symbol = \"SXPUSDT\"\n", + "# symbol = corr_sorted[0][1] # least correlated symbol\n", "mean_prices.rename(\"mean_prices\").plot()\n", "(csvs[symbol].close / csvs[symbol].close.iloc[0]).rename(symbol).plot()\n", "plt.legend()" @@ -230,7 +207,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.4" } }, "nbformat": 4,