Skip to content

Commit

Permalink
feat: implement benchmark field 2 (#128)
Browse files Browse the repository at this point in the history
Co-authored-by: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Mar 23, 2023
1 parent ff03074 commit b2ae8dc
Show file tree
Hide file tree
Showing 27 changed files with 158 additions and 24 deletions.
5 changes: 4 additions & 1 deletion data/caco2_wang/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ targets:
uris:
- http://www.bioassayontology.org/bao#BAO_0010008
- http://purl.obolibrary.org/obo/MI_2162
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
identifiers:
- id: SMILES
type: SMILES
Expand Down
8 changes: 7 additions & 1 deletion data/caco2_wang/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,13 @@ def get_and_transform_data():
],
},
],
"split_col": "split", # name of the column that contains the split information
"benchmarks": [
{
"name": "TDC", # unique benchmark name
"link": "https://tdcommons.ai/", # benchmark URL
"split_column": "split", # name of the column that contains the split information
},
],
"identifiers": [
{
"id": "SMILES", # column name
Expand Down
5 changes: 4 additions & 1 deletion data/cav3_t-type_calcium_channels_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ links:
description: corresponding publication
- url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
description: corresponding publication
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
num_points: 100875
url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
bibtex:
Expand Down
8 changes: 7 additions & 1 deletion data/cav3_t-type_calcium_channels_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,13 @@ def get_and_transform_data():
"description": "corresponding publication",
},
],
"split_col": "split", # name of the column that contains the split information
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
},
],
"num_points": len(df), # number of datapoints in this dataset
"url": "https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al",
"bibtex": [
Expand Down
5 changes: 4 additions & 1 deletion data/choline_transporter_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ targets:
- 588401
- 493222
- 602208
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
identifiers:
- id: SMILES
type: SMILES
Expand Down
8 changes: 7 additions & 1 deletion data/choline_transporter_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,13 @@ def get_and_transform_data():
"pubchem_aids": [488975, 493221, 504840, 588401, 493222, 602208],
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"identifiers": [
{
"id": "SMILES", # column name
Expand Down
5 changes: 4 additions & 1 deletion data/clintox/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ targets:
- http://purl.bioontology.org/ontology/MESH/Q000633
- https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27990
- https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27955
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
identifiers:
- id: SMILES
type: SMILES
Expand Down
8 changes: 7 additions & 1 deletion data/clintox/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,13 @@ def get_and_transform_data():
],
},
],
"split_col": "split", # name of the column that contains the split information
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"identifiers": [
{
"id": "SMILES", # column name
Expand Down
5 changes: 4 additions & 1 deletion data/kcnq2_potassium_channel_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ links:
description: corresponding publication
- url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
description: corresponding publication
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
num_points: 302405
url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
bibtex:
Expand Down
8 changes: 7 additions & 1 deletion data/kcnq2_potassium_channel_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,13 @@ def get_and_transform_data():
"description": "corresponding publication",
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"num_points": len(df), # number of datapoints in this dataset
"url": "https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al",
"bibtex": [
Expand Down
5 changes: 4 additions & 1 deletion data/ld50_zhu/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ license: CC BY 4.0
links:
- url: https://doi.org/10.1021/tx900189p
description: corresponding publication
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
num_points: 7385
url: https://tdcommons.ai/single_pred_tasks/tox/#acute-toxicity-ld50
bibtex:
Expand Down
8 changes: 7 additions & 1 deletion data/ld50_zhu/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,13 @@ def get_and_transform_data():
"description": "corresponding publication",
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"num_points": len(df), # number of datapoints in this dataset
"url": "https://tdcommons.ai/single_pred_tasks/tox/#acute-toxicity-ld50",
"bibtex": [
Expand Down
5 changes: 4 additions & 1 deletion data/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ links:
description: corresponding publication
- url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
description: corresponding publication
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
num_points: 61833
url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
bibtex:
Expand Down
8 changes: 7 additions & 1 deletion data/m1_muscarinic_receptor_agonists_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,13 @@ def get_and_transform_data():
"description": "corresponding publication",
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
},
],
"num_points": len(df), # number of datapoints in this dataset
"url": "https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al",
"bibtex": [
Expand Down
5 changes: 4 additions & 1 deletion data/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ links:
description: corresponding publication
- url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
description: corresponding publication
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
num_points: 61756
url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
bibtex:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@ def get_and_transform_data():
"description": "corresponding publication",
},
],
"split_col": "split", # name of the column that contains the split information
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"num_points": len(df), # number of datapoints in this dataset
"url": "https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al",
"bibtex": [
Expand Down
5 changes: 4 additions & 1 deletion data/orexin1_receptor_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ identifiers:
- id: SMILES
type: SMILES
description: SMILES
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
license: CC BY 4.0
links:
- url: https://doi.org/10.3390/molecules18010735
Expand Down
8 changes: 7 additions & 1 deletion data/orexin1_receptor_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,13 @@ def get_and_transform_data():
"description": "SMILES", # description (optional, except for "Other")
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"license": "CC BY 4.0", # license under which the original dataset was published
"links": [ # list of relevant links (original dataset, other uses, etc.)
{
Expand Down
5 changes: 4 additions & 1 deletion data/pampa_ncats/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ links:
description: original dataset link
- url: https://journals.sagepub.com/doi/full/10.1177/24725552211017520
description: corresponding publication
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
num_points: 2034
bibtex:
- |-
Expand Down
8 changes: 7 additions & 1 deletion data/pampa_ncats/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,13 @@ def get_and_transform_data():
"description": "corresponding publication",
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"num_points": len(df), # number of datapoints in this dataset
"bibtex": [
"""@article{siramshetty2021validating,
Expand Down
5 changes: 4 additions & 1 deletion data/potassium_ion_channel_kir2_1_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ identifiers:
- id: SMILES
type: SMILES
description: SMILES
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
license: CC BY 4.0
links:
- url: https://doi.org/10.3390/molecules18010735
Expand Down
8 changes: 7 additions & 1 deletion data/potassium_ion_channel_kir2_1_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,13 @@ def get_and_transform_data():
"description": "SMILES", # description (optional, except for "Other")
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"license": "CC BY 4.0", # license under which the original dataset was published
"links": [ # list of relevant links (original dataset, other uses, etc.)
{
Expand Down
5 changes: 4 additions & 1 deletion data/serine_threonine_kinase_33_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ identifiers:
- id: SMILES
type: SMILES
description: SMILES
split_col: split
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
license: CC BY 4.0
links:
- url: https://doi.org/10.3390/molecules18010735
Expand Down
8 changes: 7 additions & 1 deletion data/serine_threonine_kinase_33_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,13 @@ def get_and_transform_data():
"description": "SMILES", # description (optional, except for "Other")
},
],
"split_col": "split",
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"license": "CC BY 4.0", # license under which the original dataset was published
"links": [ # list of relevant links (original dataset, other uses, etc.)
{
Expand Down
4 changes: 4 additions & 0 deletions data/tyrosyl-dna_phosphodiesterase_butkiewicz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ links:
description: corresponding publication
- url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
description: corresponding publication
benchmarks:
- name: TDC
link: https://tdcommons.ai/
split_column: split
num_points: 341365
url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
bibtex:
Expand Down
7 changes: 7 additions & 0 deletions data/tyrosyl-dna_phosphodiesterase_butkiewicz/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ def get_and_transform_data():
"description": "corresponding publication",
},
],
"benchmarks": [
{
"name": "TDC",
"link": "https://tdcommons.ai/",
"split_column": "split",
}
],
"num_points": len(df), # number of datapoints in this dataset
"url": "https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al",
"bibtex": [
Expand Down
15 changes: 15 additions & 0 deletions src/chemnlp/data_val/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,19 @@ class Link(YamlModel):
description: str


class Benchmark(YamlModel):
"""Benchmark information."""

"""The name of the benchmark, e.g. MoleculeNet."""
name: str

"""The link to the benchmark."""
link: str

"""The name of the column in the dataset that indicates the fold of the data point."""
split_column: str


class Dataset(YamlModel):
name: str
description: str
Expand All @@ -163,6 +176,8 @@ class Dataset(YamlModel):
fields: Optional[Dict[str, TemplateField]]
links: List[Link]

benchmarks: Optional[List[Benchmark]]

@validator("num_points")
def num_points_must_be_positive(cls, v):
if v < 0:
Expand Down

0 comments on commit b2ae8dc

Please sign in to comment.