Skip to content

Commit

Permalink
closes #8
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Jan 12, 2024
1 parent b1e7d7f commit ba1c5f2
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 1 deletion.
12 changes: 12 additions & 0 deletions dumper/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os

import owncloud
import pandas as pd
import requests
from io import BytesIO
from AcdhArcheAssets.uri_norm_rules import get_normalized_uri
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
Expand Down Expand Up @@ -80,3 +82,13 @@ def process_beacon(beacon_url, domain):
new_uri.save()
created += 1
return created


def gsheet_to_df(sheet_id):
GDRIVE_BASE_URL = "https://docs.google.com/spreadsheet/ccc?key="
url = f"{GDRIVE_BASE_URL}{sheet_id}&output=csv"
r = requests.get(url)
print(r.status_code)
data = r.content
df = pd.read_csv(BytesIO(data))
return df
128 changes: 128 additions & 0 deletions issue__8.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2fe08400",
"metadata": {},
"outputs": [],
"source": [
"from tqdm import tqdm\n",
"from dumper.utils import gsheet_to_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a98754b2",
"metadata": {},
"outputs": [],
"source": [
"pmb_uri = \"https://pmb.acdh.oeaw.ac.at/entity/{}/\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08522f35",
"metadata": {},
"outputs": [],
"source": [
"df = gsheet_to_df(\"14pqKPvNUFn-U2TBMAP1PpToGOSgI6_fwlhqDu-o4YtQ\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "765494d8",
"metadata": {},
"outputs": [],
"source": [
"places = Place.objects.filter(uri__uri__icontains=\"schnitzler-tagebuch\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f47bcd68",
"metadata": {},
"outputs": [],
"source": [
"uris = []\n",
"for x in tqdm(places):\n",
" for y in x.uri_set.all():\n",
" if \"schnitzler-tagebuch\" in y.uri:\n",
" uris.append(y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f820cd7f",
"metadata": {},
"outputs": [],
"source": [
"len(uris)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe06fca6",
"metadata": {},
"outputs": [],
"source": [
"for x in tqdm(uris):\n",
" x.delete()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8fc5b7d",
"metadata": {},
"outputs": [],
"source": [
"for i, row in tqdm(df.iterrows(), total=len(df)):\n",
" ent_uri = pmb_uri.format(row[\"ID\"])\n",
" uri = Uri.objects.get(uri=ent_uri)\n",
" temp_ent = uri.entity\n",
" ent = temp_ent.get_child_entity()\n",
" new_uri, _ = Uri.objects.get_or_create(\n",
" uri=row[\"URL\"],\n",
" domain=\"schnitzler-tagebuch\",\n",
" entity=ent\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04f28151",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
64 changes: 64 additions & 0 deletions issue__87.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "73b5462d",
"metadata": {},
"outputs": [],
"source": [
"uris = Uri.objects.filter(domain__icontains=\"default\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6fe59152",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<QuerySet [<Uri: https://pmb.acdh.oeaw.ac.at/entity/148271/>, <Uri: https://pmb.acdh.oeaw.ac.at/entity/148275/>]>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"uris"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3046b3cd",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion set_env_variables.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export $(grep -v '^#' .env | xargs)
export $(grep -v '^#' .secret | xargs)

0 comments on commit ba1c5f2

Please sign in to comment.