Skip to content

Commit

Permalink
changed paths
Browse files Browse the repository at this point in the history
  • Loading branch information
dswigh committed Jan 17, 2024
1 parent a880d9c commit 7b4b0f5
Showing 1 changed file with 121 additions and 19 deletions.
140 changes: 121 additions & 19 deletions notebooks/inspect_orderly_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -32,7 +32,7 @@
"'[S:1](=[O:4])(=[O:3])=[O:2].[S:5](=[O:9])(=[O:8])([OH:7])[OH:6]>>[OH:8][S:5]([OH:9])(=[O:7])=[O:6].[O:2]=[S:1](=[O:4])=[O:3] |f:2.3|'"
]
},
"execution_count": 10,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -53,7 +53,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -62,7 +62,7 @@
"'uspto-grants-1993_09'"
]
},
"execution_count": 12,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -73,13 +73,13 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# Can also find ORD file just given the hash:\n",
"def open_ord(ord_hash):\n",
" folder_path = '/Users/dsw46/Projects_local/ORDerly/data/ord'\n",
" folder_path = '/Users/danielwigh/projects_local/ORDerly_project/ORDerly/data/ord'\n",
" # look for files within that folder path or deeper for a file that contains the ord_hash\n",
" for root, dirs, files in os.walk(folder_path):\n",
" for file in files:\n",
Expand All @@ -93,25 +93,45 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'uspto-grants-2001_07'"
"'uspto-grants-1986_09'"
]
},
"execution_count": 39,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = open_ord('ord_dataset-85c00026681b46f89ef8634d2b8618c3')\n",
"data = open_ord('ord_dataset-0b70410902ae4139bd5d334881938f69')\n",
"data.name"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/danielwigh/projects_local/ORDerly_project/ORDerly/notebooks'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pwd"
]
},
{
"attachments": {},
"cell_type": "markdown",
Expand All @@ -122,7 +142,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -131,34 +151,116 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"path = \"../data/orderly/uspto_no_trust/extracted_ords/uspto-grants-1993_09.parquet\"\n",
"path = \"../data/orderly/uspto_no_trust/extracted_ords/uspto-grants-1986_09.parquet\"\n",
"df = pd.read_parquet(path)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'[S:1](=[O:4])(=[O:3])=[O:2].[S:5](=[O:9])(=[O:8])([OH:7])[OH:6]>>[OH:8][S:5]([OH:9])(=[O:7])=[O:6].[O:2]=[S:1](=[O:4])=[O:3]'"
"262 C([O:8][C:9](=[O:32])[C@@H:10]1[CH2:14][CH2:13...\n",
"265 C(OC([N:11]1[CH2:15][CH2:14][CH2:13][C@H:12]1[...\n",
"273 C([O:8][C:9](=[O:35])[C@@H:10]1[CH2:14][CH2:13...\n",
"275 C([O:8][C:9](=[O:25])[C@H:10]([CH2:19][C:20]1[...\n",
"276 C(OC([N:11]1[CH2:15][CH2:14][CH2:13][C@H:12]1[...\n",
"277 C(OC([N:11]1[CH2:23][CH2:22][CH2:21][C@H:12]1[...\n",
"279 C(OC([N:11]1[CH2:15][CH2:14][CH2:13][C@H:12]1[...\n",
"281 C(OC([N:11]1[CH2:15][CH2:14][CH2:13][C@H:12]1[...\n",
"285 C(OC([N:11]1[CH2:15][CH2:14][CH2:13][C@H:12]1[...\n",
"368 [ClH:1].C(OC([NH:12][CH:13]([CH2:23][CH2:24][C...\n",
"377 [C:1]([O:4][CH2:5][CH2:6][CH2:7][NH:8]C(OCC1C=...\n",
"379 [ClH:1].[C:2]([O:5][CH2:6][CH2:7][CH2:8][NH:9]...\n",
"582 [CH3:1][C:2]1[O:3][C:4](=[O:14])[C:5](=[CH:7][...\n",
"Name: rxn_str, dtype: object"
]
},
"execution_count": 31,
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['rxn_str'][0]"
"# find df['rxn_str'] which contains [Pd] and [C]\n",
"\n",
"filtered_df = df[df['rxn_str'].str.contains('\\[Pd\\]') & df['rxn_str'].str.contains('\\[C\\]')]\n",
"filtered_df['rxn_str']\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'C([O:8][C:9](=[O:32])[C@@H:10]1[CH2:14][CH2:13][CH2:12][N:11]1[C:15](=[O:31])[C@H:16]([CH2:25][C:26]1[N:30]=[CH:29][NH:28][CH:27]=1)[NH:17][C:18]([C@H:20]1[NH:23][C:22](=[O:24])[CH2:21]1)=[O:19])C1C=CC=CC=1>CO.[C].[Pd]>[NH:23]1[C@H:20]([C:18]([NH:17][C@H:16]([C:15]([N:11]2[CH2:12][CH2:13][CH2:14][C@H:10]2[C:9]([OH:32])=[O:8])=[O:31])[CH2:25][C:26]2[N:30]=[CH:29][NH:28][CH:27]=2)=[O:19])[CH2:21][C:22]1=[O:24]'"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[262]['rxn_str']"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"rxn = 'C([O:8][C:9](=[O:32])[C@@H:10]1[CH2:14][CH2:13][CH2:12][N:11]1[C:15](=[O:31])[C@H:16]([CH2:25][C:26]1[N:30]=[CH:29][NH:28][CH:27]=1)[NH:17][C:18]([C@H:20]1[NH:23][C:22](=[O:24])[CH2:21]1)=[O:19])C1C=CC=CC=1>CO.[C].[Pd]>[NH:23]1[C@H:20]([C:18]([NH:17][C@H:16]([C:15]([N:11]2[CH2:12][CH2:13][CH2:14][C@H:10]2[C:9]([OH:32])=[O:8])=[O:31])[CH2:25][C:26]2[N:30]=[CH:29][NH:28][CH:27]=2)=[O:19])[CH2:21][C:22]1=[O:24]'\n",
"react,ag,prod=rxn.split('>')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'O=C1C[C@@H](C(=O)N[C@@H](Cc2c[nH]cn2)C(=O)N2CCC[C@H]2C(=O)O)N1'"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[262]['product_000']\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"O=C1C[C@@H](C(=O)N[C@@H](Cc2c[nH]cn2)C(=O)N2CCC[C@H]2C(=O)O)N1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down Expand Up @@ -1248,7 +1350,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.10.13"
},
"orig_nbformat": 4
},
Expand Down

0 comments on commit 7b4b0f5

Please sign in to comment.