Skip to content

Commit

Permalink
notebook which closes #58 added [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Jan 12, 2024
1 parent aca4ca4 commit b1e7d7f
Showing 1 changed file with 76 additions and 0 deletions.
76 changes: 76 additions & 0 deletions wikidata_wiki__to_wikidata_entity.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from apis_core.apis_metainfo.models import Uri"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cols = [\"id\", \"uri\", \"domain\", \"entity__id\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = Uri.objects.filter(uri__icontains=\"/Q\").exclude(uri__icontains=\"pedia\").values_list(*cols)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(data, columns=cols)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for ent, ndf in df.groupby(\"entity__id\"):\n",
" if len(ndf) > 1:\n",
" for i, row in ndf.iterrows():\n",
" if \"org/wiki\" in row[\"uri\"]:\n",
" uri_obj = Uri.objects.get(id=row[\"id\"])\n",
" uri_obj.delete()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit b1e7d7f

Please sign in to comment.