From ba1c5f251f4bdbaa33dd984e1cad5b23dd324424 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Fri, 12 Jan 2024 19:42:39 +0100 Subject: [PATCH] closes #8 --- dumper/utils.py | 12 ++++ issue__8.ipynb | 128 +++++++++++++++++++++++++++++++++++++++++++ issue__87.ipynb | 64 ++++++++++++++++++++++ set_env_variables.sh | 2 +- 4 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 issue__8.ipynb create mode 100644 issue__87.ipynb diff --git a/dumper/utils.py b/dumper/utils.py index 7132611..b647002 100644 --- a/dumper/utils.py +++ b/dumper/utils.py @@ -1,7 +1,9 @@ import os import owncloud +import pandas as pd import requests +from io import BytesIO from AcdhArcheAssets.uri_norm_rules import get_normalized_uri from django.conf import settings from django.core.exceptions import ObjectDoesNotExist @@ -80,3 +82,13 @@ def process_beacon(beacon_url, domain): new_uri.save() created += 1 return created + + +def gsheet_to_df(sheet_id): + GDRIVE_BASE_URL = "https://docs.google.com/spreadsheet/ccc?key=" + url = f"{GDRIVE_BASE_URL}{sheet_id}&output=csv" + r = requests.get(url) + print(r.status_code) + data = r.content + df = pd.read_csv(BytesIO(data)) + return df diff --git a/issue__8.ipynb b/issue__8.ipynb new file mode 100644 index 0000000..9296617 --- /dev/null +++ b/issue__8.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2fe08400", + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm\n", + "from dumper.utils import gsheet_to_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a98754b2", + "metadata": {}, + "outputs": [], + "source": [ + "pmb_uri = \"https://pmb.acdh.oeaw.ac.at/entity/{}/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08522f35", + "metadata": {}, + "outputs": [], + "source": [ + "df = gsheet_to_df(\"14pqKPvNUFn-U2TBMAP1PpToGOSgI6_fwlhqDu-o4YtQ\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "765494d8", + "metadata": {}, + "outputs": [], + "source": [ + "places = Place.objects.filter(uri__uri__icontains=\"schnitzler-tagebuch\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f47bcd68", + "metadata": {}, + "outputs": [], + "source": [ + "uris = []\n", + "for x in tqdm(places):\n", + " for y in x.uri_set.all():\n", + " if \"schnitzler-tagebuch\" in y.uri:\n", + " uris.append(y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f820cd7f", + "metadata": {}, + "outputs": [], + "source": [ + "len(uris)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe06fca6", + "metadata": {}, + "outputs": [], + "source": [ + "for x in tqdm(uris):\n", + " x.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8fc5b7d", + "metadata": {}, + "outputs": [], + "source": [ + "for i, row in tqdm(df.iterrows(), total=len(df)):\n", + " ent_uri = pmb_uri.format(row[\"ID\"])\n", + " uri = Uri.objects.get(uri=ent_uri)\n", + " temp_ent = uri.entity\n", + " ent = temp_ent.get_child_entity()\n", + " new_uri, _ = Uri.objects.get_or_create(\n", + " uri=row[\"URL\"],\n", + " domain=\"schnitzler-tagebuch\",\n", + " entity=ent\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04f28151", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/issue__87.ipynb b/issue__87.ipynb new file mode 100644 index 0000000..348d08f --- /dev/null +++ b/issue__87.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "73b5462d", + "metadata": {}, + "outputs": [], + "source": [ + "uris = Uri.objects.filter(domain__icontains=\"default\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6fe59152", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ", ]>" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uris" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3046b3cd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/set_env_variables.sh b/set_env_variables.sh index e0a3156..77758e8 100644 --- a/set_env_variables.sh +++ b/set_env_variables.sh @@ -1 +1 @@ -export $(grep -v '^#' .env | xargs) \ No newline at end of file +export $(grep -v '^#' .secret | xargs) \ No newline at end of file