-
Notifications
You must be signed in to change notification settings - Fork 0
/
CITATION.bib
10 lines (10 loc) · 1.64 KB
/
CITATION.bib
1
2
3
4
5
6
7
8
9
10
@Article{D2DD00058J,
author ="Winter, Benedikt and Winter, Clemens and Schilling, Johannes and Bardow, André",
title ="A smile is all you need: predicting limiting activity coefficients from SMILES with natural language processing",
journal ="Digital Discovery",
year ="2022",
pages ="-",
publisher ="RSC",
doi ="10.1039/D2DD00058J",
url ="http://dx.doi.org/10.1039/D2DD00058J",
abstract ="The knowledge of mixtures’ phase equilibria is crucial in nature and technical chemistry. Phase equilibria calculations of mixtures require activity coefficients. However{,} experimental data on activity coefficients are often limited due to the high cost of experiments. For an accurate and efficient prediction of activity coefficients{,} machine learning approaches have been recently developed. However{,} current machine learning approaches still extrapolate poorly for activity coefficients of unknown molecules. In this work{,} we introduce a SMILES-to-properties-transformer (SPT){,} a natural language processing network{,} to predict binary limiting activity coefficients from SMILES codes. To overcome the limitations of available experimental data{,} we initially train our network on a large dataset of synthetic data sampled from COSMO-RS (10 million data points) and then fine-tune the model on experimental data (20 870 data points). This training strategy enables the SPT to accurately predict limiting activity coefficients even for unknown molecules{,} cutting the mean prediction error in half compared to state-of-the-art models for activity coefficient predictions such as COSMO-RS and UNIFACDortmund{,} and improving on recent machine learning approaches."}