From 1cb14cbd13bd94f6e9f40f2e9313bf585569e84c Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Thu, 19 Dec 2019 18:50:47 -0500
Subject: [PATCH 01/22] add discussion in interpretability section and update
 section on molecular design

---
 build/ci/cache/requests-cache.sqlite | Bin 0 -> 20480 bytes
 build/output/citations.tsv           |   1 +
 build/output/manuscript.md           |  38 +++++++++++++++++++++++++++
 build/output/references.json         |   1 +
 build/output/variables.json          |  17 ++++++++++++
 build/webpage/v/freeze/index.html    |  19 ++++++++++++++
 build/webpage/v/latest               |   1 +
 content/05.treat.md                  |  36 +++++++++++--------------
 content/06.discussion.md             |  34 +++++++++++++-----------
 content/citation-tags.tsv            |   7 ++++-
 10 files changed, 117 insertions(+), 37 deletions(-)
 create mode 100644 build/ci/cache/requests-cache.sqlite
 create mode 100644 build/output/citations.tsv
 create mode 100644 build/output/manuscript.md
 create mode 100644 build/output/references.json
 create mode 100644 build/output/variables.json
 create mode 100644 build/webpage/v/freeze/index.html
 create mode 120000 build/webpage/v/latest
diff --git a/build/ci/cache/requests-cache.sqlite b/build/ci/cache/requests-cache.sqlite
new file mode 100644
index 0000000000000000000000000000000000000000..5f5d7c8e968a7214f2eb1395eb7369bddc7dc5be
GIT binary patch
literal 20480
zcmeI%K}*9h7=YoV>xvUbcPTwCHx)!E9(UQ49%K`B?y^%?F~YE}y0)S}a6ibKN1MWm
z9(wdr-ayhXA?-(=^b&G^H(TdgJ*C^1I9KOlAfyyyrGyYxS-WN37t^=*o$@Jv#Z93l
z&PHE-_gmO*Pq?|eb9?TzX)@yoAb<b@2q1s}0tg_000Id7yTG{P_?{<q9zP_y*e02&
z_T@aBEJC%IT+BkXGD9nM{H#CK?fmL`GGD4`xI9(waZ>1$Bir#uo)qiNqyEU=lJZ;~
z#YLW0{m8r<1*YxZNXH)zWm=`$>g+Y$WIC$@>u=I}yg74VIes{7%3OaS3hJ;^dL9_l
zzG()gp&)<&0tg_000IagfB*srAb>zy1rE)^IRCeGds!C(2q1s}0tg_000IagfB*uO
z0Qdj=2nZm600IagfB*srAb<b@2(({-`~UXuF)Ks>0R#|0009ILKmY**5J2Du^0!$7

literal 0
HcmV?d00001

diff --git a/build/output/citations.tsv b/build/output/citations.tsv
new file mode 100644
index 00000000..e4349893
--- /dev/null
+++ b/build/output/citations.tsv
@@ -0,0 +1 @@
+manuscript_citekey	detagged_citekey	standard_citekey	short_citekey
diff --git a/build/output/manuscript.md b/build/output/manuscript.md
new file mode 100644
index 00000000..df0d610a
--- /dev/null
+++ b/build/output/manuscript.md
@@ -0,0 +1,38 @@
+---
+author-meta: []
+date-meta: '2019-12-19'
+header-includes: '<!--
+
+  Manubot generated metadata rendered from header-includes-template.html.
+
+  Suggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html
+
+  -->
+
+  <meta name="dc.format" content="text/html" />
+
+  <meta name="dc.date" content="2019-12-19" />
+
+  <meta name="citation_publication_date" content="2019-12-19" />
+
+  <meta name="dc.relation.ispartof" content="Manubot" />
+
+  <meta name="dc.publisher" content="Manubot" />
+
+  <meta name="citation_journal_title" content="Manubot" />
+
+  <meta name="citation_technical_report_institution" content="Manubot" />
+
+  <meta property="og:type" content="article" />
+
+  <meta property="twitter:card" content="summary_large_image" />
+
+  <link rel="icon" type="image/png" sizes="192x192" href="https://manubot.org/favicon-192x192.png" />
+
+  <link rel="mask-icon" href="https://manubot.org/safari-pinned-tab.svg" color="#ad1457" />
+
+  <meta name="theme-color" content="#ad1457" />
+
+  <!-- end Manubot generated metadata -->'
+...
+
diff --git a/build/output/references.json b/build/output/references.json
new file mode 100644
index 00000000..fe51488c
--- /dev/null
+++ b/build/output/references.json
@@ -0,0 +1 @@
+[]
diff --git a/build/output/variables.json b/build/output/variables.json
new file mode 100644
index 00000000..88840dd9
--- /dev/null
+++ b/build/output/variables.json
@@ -0,0 +1,17 @@
+{
+  "pandoc": {
+    "date-meta": "2019-12-19",
+    "author-meta": [],
+    "header-includes": "<!--\nManubot generated metadata rendered from header-includes-template.html.\nSuggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html\n-->\n<meta name=\"dc.format\" content=\"text/html\" />\n<meta name=\"dc.date\" content=\"2019-12-19\" />\n<meta name=\"citation_publication_date\" content=\"2019-12-19\" />\n<meta name=\"dc.relation.ispartof\" content=\"Manubot\" />\n<meta name=\"dc.publisher\" content=\"Manubot\" />\n<meta name=\"citation_journal_title\" content=\"Manubot\" />\n<meta name=\"citation_technical_report_institution\" content=\"Manubot\" />\n<meta property=\"og:type\" content=\"article\" />\n<meta property=\"twitter:card\" content=\"summary_large_image\" />\n<link rel=\"icon\" type=\"image/png\" sizes=\"192x192\" href=\"https://manubot.org/favicon-192x192.png\" />\n<link rel=\"mask-icon\" href=\"https://manubot.org/safari-pinned-tab.svg\" color=\"#ad1457\" />\n<meta name=\"theme-color\" content=\"#ad1457\" />\n<!-- end Manubot generated metadata -->"
+  },
+  "manubot": {
+    "date": "December 19, 2019",
+    "authors": [],
+    "manuscript_stats": {
+      "reference_counts": {
+        "total": 0
+      },
+      "word_count": 0
+    }
+  }
+}
diff --git a/build/webpage/v/freeze/index.html b/build/webpage/v/freeze/index.html
new file mode 100644
index 00000000..bff3da63
--- /dev/null
+++ b/build/webpage/v/freeze/index.html
@@ -0,0 +1,19 @@
+<!DOCTYPE HTML>
+<!--
+  HTML template for redirecting from this page to another.
+  Template in python, setting url to the destination URL.
+  Derived from https://stackoverflow.com/a/5411601/4651668.
+-->
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <meta http-equiv="refresh" content="0; url=../local/">
+    <script type="text/javascript">
+      window.location.href = "../local/"
+    </script>
+    <title>Page Redirection</title>
+  </head>
+  <body>
+    If you are not redirected automatically, follow <a href="../local/">this link</a>.
+  </body>
+</html>
diff --git a/build/webpage/v/latest b/build/webpage/v/latest
new file mode 120000
index 00000000..c2c027fe
--- /dev/null
+++ b/build/webpage/v/latest
@@ -0,0 +1 @@
+local
\ No newline at end of file
diff --git a/content/05.treat.md b/content/05.treat.md
index 96db7d25..3c71ba95 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -180,28 +180,24 @@ However, in the long term, atomic convolutions may ultimately overtake grid-base
 
 #### *De novo* drug design
 
-*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
+*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery in-silico [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
 It explores an estimated 10<sup>60</sup> synthesizable organic molecules with drug-like properties without explicit enumeration [@doi:10.1002/wcms.1104].
-To test or score structures, algorithms like those discussed earlier are used.
+To test or score structures, physics-based simulation could be used, or machine learning models based on techniques discussed may be used, as they are much more computationally efficient.
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
-Unfortunately, this often leads to overfit, "weird" molecules, which are difficult to synthesize in the lab.
-Current programs have settled on rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
-Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
-In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
-The challenge of generating molecules has parallels to the generation of syntactically and semantically correct text [@arxiv:1308.0850].
-
-As deep learning models that directly output (molecular) graphs remain under-explored, generative neural networks for drug design typically represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
-This allows treating molecules as sequences and leveraging recent progress in recurrent neural networks.
-Gómez-Bombarelli et al. designed a SMILES-to-SMILES autoencoder to learn a continuous latent feature space for chemicals [@tag:Gomezb2016_automatic].
-In this learned continuous space it was possible to interpolate between continuous representations of chemicals in a manner that is not possible with discrete
-(e.g. bit vector or string) features or in symbolic, molecular graph space.
-Even more interesting is the prospect of performing gradient-based or Bayesian optimization of molecules within this latent space.
-The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
+
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review]
+
+Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
+
+The first successful demonstration of a deep learning based approach for molecular optimization occured in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
+In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete
+(e.g. bit vector or string) features or in symbolic, molecular graph space. Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space. The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
 A drawback of the SMILES-to-SMILES autoencoder is that not all SMILES strings produced by the autoencoder's decoder correspond to valid chemical structures.
-Recently, the Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, has been proposed to alleviate this issue [@arxiv:1703.01925].
+The Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, helps alleviate this issue to some extent [@arxiv:1703.01925].
 
 Another approach to *de novo* design is to train character-based RNNs on large collections of molecules, for example, ChEMBL [@doi:10.1093/nar/gkr777], to first obtain a generic generative model for drug-like compounds [@tag:Segler2017_drug_design].
-These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures.
-The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796].
-Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
-The great flexibility of neural networks, and progress in generative models offers many opportunities for deep architectures in *de novo* design (e.g. the adaptation of GANs for molecules).
+These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures. The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796]. Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
+
+Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry. Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review] A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network.[@doi:10.1038/s41598-019-47148-x] As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1).[@tag:Zhavoronkov2019_drugs] Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1.[@tag:Zhavoronkov2019_drugs]  
+
+It is worth pointing out that it has been shown that classical genetic algorithms can compete with many of the most advanced deep learning methods for molecular optimization.[@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C] Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849]. Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 364ff199..910a02c7 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -6,14 +6,12 @@ Here we examine these factors that may impede further progress, ask what steps h
 ### Customizing deep learning models reflects a tradeoff between bias and variance
 
 Some of the challenges in applying deep learning are shared with other machine learning methods.
-In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize predictivity.
-Methods for adjusting the flexibility of deep learning models include dropout, reduced data projections, and transfer learning (described below).
-One way of understanding such model optimizations is that they incorporate external information to limit model flexibility and thereby improve predictions.
-This balance is formally described as a tradeoff between "bias and variance"
+In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize generalizability and prevent overfitting.
+Methods for for preventing overfitting in deep learning models include adding regularization terms to the loss, dropout, using reduced data projections, and early stopping.
+The need for balance between model expressiveness and overfitting is formally described as a tradeoff between "bias and variance"
 [@url:http://www.deeplearningbook.org/].
 
-Although the bias-variance tradeoff is common to all machine learning applications, recent empirical and theoretical observations suggest that deep learning models may have uniquely advantageous generalization properties [@tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
-Nevertheless, additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
+Although the bias-variance tradeoff is is important to take into account in many machine learning tasks, recent empirical and theoretical observations suggest that deep neural networks have uniquely advantageous generalization properties and do not obey the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works]. According to the bias-variance theory, many of the most successful deep neural networks have so many free parameters they should overfit.[@tag:Belkin2019_PNAS] It has been shown that deep neural networks operate in a regime where they can exactly interpolate their training data yet are still able to generalize.[@tag:Belkin2019_PNAS] Thus, poor generalizability can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory. Additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
 
 #### Evaluation metrics for imbalanced classification
 
@@ -106,18 +104,22 @@ As a result, several opportunities for innovation arise: understanding the cause
 Unfortunately, uncertainty quantification techniques are underutilized in the computational biology communities and largely ignored in the current deep learning for biomedicine literature.
 Thus, the practical value of uncertainty quantification in biomedical domains is yet to be appreciated.
 
-### Interpretation
+### Interpretability
 
-As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable.
-Interpretability matters for two main reasons.
-First, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
-However, this would not be possible if the model is a black box.
-Second, interpretability is important for trust.
-If a model is making medical diagnoses, it is important to ensure the model is making decisions for reliable reasons and is not focusing on an artifact of the data.
-A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
-In the context of deep learning, understanding the basis of a model's output is particularly important as deep learning models are unusually susceptible to adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
+As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable. There are several important reasons to care about interpretability.
 
-As the concept of interpretability is quite broad, many methods described as improving the interpretability of deep learning models take disparate and often complementary approaches.
+Firstly, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
+For instance, interpreting a model for predicting chemical properties from molecular graphs may illuminate previously unknown structure-property relations.
+It is also useful to see if a model is using known relationships - if not, this may suggest a way to improve the model.
+Finally, there is a chance that the model may have learned relationships that are known to be wrong. This can be due to improper training data or due to overfitting on spurious correlations in the training data.
+
+This is particularly important if a model is making medical diagnoses. A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
+
+It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise. While this is largely still an unsolved problem, the interpretation of deep learning models can help understand these failure modes and how to prevent them.
+
+Several different levels of interpretability can be distinguished. Consider a prototypical CNN used for image classification. At a high level, one can perform an occulusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap. Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing]. Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
+
+A starting point for many discussions of interpretability is the interpretability-accuracy trade-off. The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate. This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks.[@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one. For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations were learned by the neural network. More recently, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
 
 #### Assigning example-specific importance scores
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index b4eb9efe..ee40470f 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -20,6 +20,7 @@ Bar2015_nonmed_tl	doi:10.1117/12.2083124
 Barash2010_splicing_code	doi:10.1038/nature09000
 Baxt1991_myocardial	doi:10.7326/0003-4819-115-11-843
 BeaulieuJones2016_ehr_encode	doi:10.1016/j.jbi.2016.10.007
+Belkin2019_PNAS	doi:10.1073/pnas.1903070116
 Bengio2015_prec	arxiv:1412.7024
 Berezikov2011_mirna	doi:10.1038/nrg3079
 Bergstra2011_hyper	url:https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
@@ -66,6 +67,7 @@ Duvenaud2015_graph_conv	url:http://papers.nips.cc/paper/5954-convolutional-netwo
 Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
+Elton_molecular_design_review	doi:10.1039/C9ME00039A
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -76,6 +78,7 @@ Feinberg2018	doi:10.1056/NEJMra1402513
 Finnegan2017_maximum	doi:10.1101/105957
 Fong2017_perturb	doi:10.1109/ICCV.2017.371
 Fraga2005	doi:10.1073/pnas.0500398102
+Frosst2017_distilling	arxiv:1711.09784
 Fu2019	doi:10.1109/TCBB.2019.2909237
 Gal2015_dropout	arxiv:1506.02142
 Gaublomme2015_th17	doi:10.1016/j.cell.2015.11.009
@@ -184,6 +187,7 @@ Meissner2008	doi:10.1038/nature07107
 Metaphlan	doi:10.1038/nmeth.2066
 Meng2016_mllib	arxiv:1505.06807
 Min2016_deepenhancer	doi:10.1109/BIBM.2016.7822593
+Montavon2018_visualization	doi:10.1016/j.dsp.2017.10.011
 Momeni2018	doi:10.1101/438341
 Moritz2015_sparknet	arxiv:1511.06051
 Mordvintsev2015_inceptionism	url:http://googleresearch.blogspot.co.uk/2015/06/inceptionism-going-deeper-into-neural.html
@@ -310,7 +314,8 @@ Yoon2016_cancer_reports	doi:10.1007/978-3-319-47898-2_21
 Yosinski2014	url:https://papers.nips.cc/paper/5347-how-transferable-are-features-in-deep-neural-networks
 Yosinksi2015_understanding	arxiv:1506.06579
 Yu2016_melanoma_resnet	doi:10.1109/TMI.2016.2642839
-Zeiler2013_visualizing	arxiv:1311.2901
+Zhavoronkov2019_drugs	doi:10.1038/s41587-019-0224-x
+Zeiler2013_visualizing	doi:10.1007/978-3-319-10590-1_53
 Zeng2015	doi:10.1186/s12859-015-0553-9
 Zeng2016_convolutional	doi:10.1093/bioinformatics/btw255
 Zhang2015_multitask_tl	doi:10.1145/2783258.2783304

From e7f6ca6ee794d700f8cdc8d1bdca2f8c3982583b Mon Sep 17 00:00:00 2001
From: Casey Greene <greenescientist@gmail.com>
Date: Mon, 10 Feb 2020 15:00:15 -0500
Subject: [PATCH 02/22] remove build files

---
 build/ci/cache/requests-cache.sqlite | Bin 20480 -> 0 bytes
 build/output/citations.tsv           |   1 -
 build/output/manuscript.md           |  38 ---------------------------
 build/output/references.json         |   1 -
 build/output/variables.json          |  17 ------------
 build/webpage/v/freeze/index.html    |  19 --------------
 build/webpage/v/latest               |   1 -
 7 files changed, 77 deletions(-)
 delete mode 100644 build/ci/cache/requests-cache.sqlite
 delete mode 100644 build/output/citations.tsv
 delete mode 100644 build/output/manuscript.md
 delete mode 100644 build/output/references.json
 delete mode 100644 build/output/variables.json
 delete mode 100644 build/webpage/v/freeze/index.html
 delete mode 120000 build/webpage/v/latest

diff --git a/build/ci/cache/requests-cache.sqlite b/build/ci/cache/requests-cache.sqlite
deleted file mode 100644
index 5f5d7c8e968a7214f2eb1395eb7369bddc7dc5be..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20480
zcmeI%K}*9h7=YoV>xvUbcPTwCHx)!E9(UQ49%K`B?y^%?F~YE}y0)S}a6ibKN1MWm
z9(wdr-ayhXA?-(=^b&G^H(TdgJ*C^1I9KOlAfyyyrGyYxS-WN37t^=*o$@Jv#Z93l
z&PHE-_gmO*Pq?|eb9?TzX)@yoAb<b@2q1s}0tg_000Id7yTG{P_?{<q9zP_y*e02&
z_T@aBEJC%IT+BkXGD9nM{H#CK?fmL`GGD4`xI9(waZ>1$Bir#uo)qiNqyEU=lJZ;~
z#YLW0{m8r<1*YxZNXH)zWm=`$>g+Y$WIC$@>u=I}yg74VIes{7%3OaS3hJ;^dL9_l
zzG()gp&)<&0tg_000IagfB*srAb>zy1rE)^IRCeGds!C(2q1s}0tg_000IagfB*uO
z0Qdj=2nZm600IagfB*srAb<b@2(({-`~UXuF)Ks>0R#|0009ILKmY**5J2Du^0!$7

diff --git a/build/output/citations.tsv b/build/output/citations.tsv
deleted file mode 100644
index e4349893..00000000
--- a/build/output/citations.tsv
+++ /dev/null
@@ -1 +0,0 @@
-manuscript_citekey	detagged_citekey	standard_citekey	short_citekey
diff --git a/build/output/manuscript.md b/build/output/manuscript.md
deleted file mode 100644
index df0d610a..00000000
--- a/build/output/manuscript.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-author-meta: []
-date-meta: '2019-12-19'
-header-includes: '<!--
-
-  Manubot generated metadata rendered from header-includes-template.html.
-
-  Suggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html
-
-  -->
-
-  <meta name="dc.format" content="text/html" />
-
-  <meta name="dc.date" content="2019-12-19" />
-
-  <meta name="citation_publication_date" content="2019-12-19" />
-
-  <meta name="dc.relation.ispartof" content="Manubot" />
-
-  <meta name="dc.publisher" content="Manubot" />
-
-  <meta name="citation_journal_title" content="Manubot" />
-
-  <meta name="citation_technical_report_institution" content="Manubot" />
-
-  <meta property="og:type" content="article" />
-
-  <meta property="twitter:card" content="summary_large_image" />
-
-  <link rel="icon" type="image/png" sizes="192x192" href="https://manubot.org/favicon-192x192.png" />
-
-  <link rel="mask-icon" href="https://manubot.org/safari-pinned-tab.svg" color="#ad1457" />
-
-  <meta name="theme-color" content="#ad1457" />
-
-  <!-- end Manubot generated metadata -->'
-...
-
diff --git a/build/output/references.json b/build/output/references.json
deleted file mode 100644
index fe51488c..00000000
--- a/build/output/references.json
+++ /dev/null
@@ -1 +0,0 @@
-[]
diff --git a/build/output/variables.json b/build/output/variables.json
deleted file mode 100644
index 88840dd9..00000000
--- a/build/output/variables.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "pandoc": {
-    "date-meta": "2019-12-19",
-    "author-meta": [],
-    "header-includes": "<!--\nManubot generated metadata rendered from header-includes-template.html.\nSuggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html\n-->\n<meta name=\"dc.format\" content=\"text/html\" />\n<meta name=\"dc.date\" content=\"2019-12-19\" />\n<meta name=\"citation_publication_date\" content=\"2019-12-19\" />\n<meta name=\"dc.relation.ispartof\" content=\"Manubot\" />\n<meta name=\"dc.publisher\" content=\"Manubot\" />\n<meta name=\"citation_journal_title\" content=\"Manubot\" />\n<meta name=\"citation_technical_report_institution\" content=\"Manubot\" />\n<meta property=\"og:type\" content=\"article\" />\n<meta property=\"twitter:card\" content=\"summary_large_image\" />\n<link rel=\"icon\" type=\"image/png\" sizes=\"192x192\" href=\"https://manubot.org/favicon-192x192.png\" />\n<link rel=\"mask-icon\" href=\"https://manubot.org/safari-pinned-tab.svg\" color=\"#ad1457\" />\n<meta name=\"theme-color\" content=\"#ad1457\" />\n<!-- end Manubot generated metadata -->"
-  },
-  "manubot": {
-    "date": "December 19, 2019",
-    "authors": [],
-    "manuscript_stats": {
-      "reference_counts": {
-        "total": 0
-      },
-      "word_count": 0
-    }
-  }
-}
diff --git a/build/webpage/v/freeze/index.html b/build/webpage/v/freeze/index.html
deleted file mode 100644
index bff3da63..00000000
--- a/build/webpage/v/freeze/index.html
+++ /dev/null
@@ -1,19 +0,0 @@
-<!DOCTYPE HTML>
-<!--
-  HTML template for redirecting from this page to another.
-  Template in python, setting url to the destination URL.
-  Derived from https://stackoverflow.com/a/5411601/4651668.
--->
-<html lang="en-US">
-  <head>
-    <meta charset="UTF-8">
-    <meta http-equiv="refresh" content="0; url=../local/">
-    <script type="text/javascript">
-      window.location.href = "../local/"
-    </script>
-    <title>Page Redirection</title>
-  </head>
-  <body>
-    If you are not redirected automatically, follow <a href="../local/">this link</a>.
-  </body>
-</html>
diff --git a/build/webpage/v/latest b/build/webpage/v/latest
deleted file mode 120000
index c2c027fe..00000000
--- a/build/webpage/v/latest
+++ /dev/null
@@ -1 +0,0 @@
-local
\ No newline at end of file

From 4e53c22c4566fb4dd8160a80cedea8806aa35450 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:11:32 -0500
Subject: [PATCH 03/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/05.treat.md       | 34 +++++++++++++++++++--------
 content/06.discussion.md  | 48 +++++++++++++++++++++++++++------------
 content/citation-tags.tsv |  7 ++++++
 3 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 3c71ba95..217b6c71 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -182,22 +182,36 @@ However, in the long term, atomic convolutions may ultimately overtake grid-base
 
 *De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery in-silico [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
 It explores an estimated 10<sup>60</sup> synthesizable organic molecules with drug-like properties without explicit enumeration [@doi:10.1002/wcms.1104].
-To test or score structures, physics-based simulation could be used, or machine learning models based on techniques discussed may be used, as they are much more computationally efficient.
+To score molecules after generation or during optimization, physics-based simulation could be used [@tag:Sumita2018], but machine learning models based on techniques discussed earlier may be preferable [@tag:Gomezb2016_automatic], as they are much more computationally expedient. Computationally efficiency is particularly important during optimization as the "scoring function" may need to be called thousands of times.
+
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
+These approaches can lead to overfit, "weird" molecules, which are difficult to synthesize in the lab.
+A popular approach which may help ensure synthesizability is to use rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
+Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
+In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
 
-In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review]
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tagVamathevan2019].
 
 Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
-The first successful demonstration of a deep learning based approach for molecular optimization occured in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
-In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete
-(e.g. bit vector or string) features or in symbolic, molecular graph space. Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space. The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
+The first successful demonstration of a deep learning based approach for molecular optimization occurred in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
+In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete (e.g. bit vector or string) features or in symbolic, molecular graph space.
+Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space.
+The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
 A drawback of the SMILES-to-SMILES autoencoder is that not all SMILES strings produced by the autoencoder's decoder correspond to valid chemical structures.
 The Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, helps alleviate this issue to some extent [@arxiv:1703.01925].
 
 Another approach to *de novo* design is to train character-based RNNs on large collections of molecules, for example, ChEMBL [@doi:10.1093/nar/gkr777], to first obtain a generic generative model for drug-like compounds [@tag:Segler2017_drug_design].
-These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures. The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796]. Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
-
-Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry. Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review] A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network.[@doi:10.1038/s41598-019-47148-x] As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1).[@tag:Zhavoronkov2019_drugs] Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1.[@tag:Zhavoronkov2019_drugs]  
-
-It is worth pointing out that it has been shown that classical genetic algorithms can compete with many of the most advanced deep learning methods for molecular optimization.[@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C] Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849]. Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
+These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures.
+The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796].
+Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
+
+Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry.
+Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review]
+A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network [@doi:10.1038/s41598-019-47148-x].
+As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
+Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1 [@tag:Zhavoronkov2019_drugs].
+
+In concluding this section, it is worth pointing out that it has been shown that classical genetic algorithms can compete with some of the most advanced deep learning methods for molecular optimization [@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C].
+Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
+Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 910a02c7..65159052 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,15 +3,21 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Customizing deep learning models reflects a tradeoff between bias and variance
+### Preventing overfitting via hyperparameter tuning
 
-Some of the challenges in applying deep learning are shared with other machine learning methods.
-In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize generalizability and prevent overfitting.
-Methods for for preventing overfitting in deep learning models include adding regularization terms to the loss, dropout, using reduced data projections, and early stopping.
-The need for balance between model expressiveness and overfitting is formally described as a tradeoff between "bias and variance"
-[@url:http://www.deeplearningbook.org/].
+Overfitting is one of the most common problems in machine learning which all practioners must learn to grapple with.
+In the classical theory of statistical learning and model fitting, there is a trade-off between "bias and variance" [@url:http://www.deeplearningbook.org/].
+Increasing the capacity of the model (by adding more layers) reduces bias but can increase variance, indicating overfitting.
+According to this theory, one way of reducing overfitting and increasing generalization performance is to reduce the capacity of the model.  
+Other methods for for preventing overfitting include adding regularization terms to the loss, using dropout, using reduced data projections, and early stopping. Each of these methods involves hyperparameters which must be tuned.
 
-Although the bias-variance tradeoff is is important to take into account in many machine learning tasks, recent empirical and theoretical observations suggest that deep neural networks have uniquely advantageous generalization properties and do not obey the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works]. According to the bias-variance theory, many of the most successful deep neural networks have so many free parameters they should overfit.[@tag:Belkin2019_PNAS] It has been shown that deep neural networks operate in a regime where they can exactly interpolate their training data yet are still able to generalize.[@tag:Belkin2019_PNAS] Thus, poor generalizability can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory. Additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
+Although the bias-variance trade-off is is important to take into account with many classical machine learning models, recent empirical and theoretical observations suggest that deep neural networks in particular do not the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
+It has been demonstrated that poor generalizability (test error) can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory.
+This phenomena, known as "double descent" indicates that deep neural networks achieve their best performance when they smoothly interpolate training data - resulting in near zero training error [@tag:Belkin2019_PNAS].
+
+To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
+This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
+Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 
@@ -106,20 +112,33 @@ Thus, the practical value of uncertainty quantification in biomedical domains is
 
 ### Interpretability
 
-As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable. There are several important reasons to care about interpretability.
+As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to develop methods for interpreting how they function.
+There are several important reasons one might be interested in interpretability, which is also called "explainability".
 
 Firstly, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
 For instance, interpreting a model for predicting chemical properties from molecular graphs may illuminate previously unknown structure-property relations.
 It is also useful to see if a model is using known relationships - if not, this may suggest a way to improve the model.
-Finally, there is a chance that the model may have learned relationships that are known to be wrong. This can be due to improper training data or due to overfitting on spurious correlations in the training data.
+Finally, there is a chance that the model may have learned relationships that are known to be wrong.
+This can be due to improper training data or due to overfitting on spurious correlations in the training data.
 
-This is particularly important if a model is making medical diagnoses. A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
+This is particularly important if a model is making medical diagnoses.
+A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
 
-It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise. While this is largely still an unsolved problem, the interpretation of deep learning models can help understand these failure modes and how to prevent them.
+It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
+While this is largely still an unsolved problem, the interpretation of deep learning models may help understand these failure modes and how to prevent them.
 
-Several different levels of interpretability can be distinguished. Consider a prototypical CNN used for image classification. At a high level, one can perform an occulusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap. Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing]. Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
+Several different levels of interpretability can be distinguished.
+Consider a prototypical CNN used for image classification.
+At a high level, one can perform an occlusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap.
+Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing].
+Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
 
-A starting point for many discussions of interpretability is the interpretability-accuracy trade-off. The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate. This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks.[@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one. For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations were learned by the neural network. More recently, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
+A starting point for many discussions of interpretability is the interpretability-accuracy trade-off.
+The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate.
+This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks, [@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] and new methods for creating highly accurate interpretable models [@tag:Rudin2019].
+Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one, which is sometimes called "post-hoc interpretation".
+For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations which may have been learned by the neural network.
+Along similar lines, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
 
 #### Assigning example-specific importance scores
 
@@ -221,7 +240,8 @@ Towards this end, Che et al. [@tag:Che2015_distill] used gradient boosted trees
 
 Finally, it is sometimes possible to train the model to provide justifications for its predictions.
 Lei et al. [@tag:Lei2016_rationalizing] used a generator to identify "rationales", which are short and coherent pieces of the input text that produce similar results to the whole input when passed through an encoder.
-The authors applied their approach to a sentiment analysis task and obtained substantially superior results compared to an attention-based method.
+Shen et al. [@tag:Shen2019] trained a CNN for lung nodule malignancy classification which also provides a series of attributes for the nodule, which they argue help understand how the network functions.
+These are both simple examples of an emerging approach towards engendering trust in AI systems which Elton calls "self-explaining AI" [@tag:Elton2020].
 
 #### Future outlook
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index ee40470f..74e7e90b 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -68,6 +68,7 @@ Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
 Elton_molecular_design_review	doi:10.1039/C9ME00039A
+Elton2020 arxiv:2002.05149
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -195,6 +196,7 @@ Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
 matis	doi:10.1016/S0097-8485(96)80015-5
 nbc	doi:10.1093/bioinformatics/btq619
 Murdoch2017_automatic	arxiv:1702.02540
+Murdoch2019 doi:10.1073/pnas.1900654116
 Nazor2012	doi:10.1016/j.stem.2012.02.013
 Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
 Ni2018	doi:10.1101/385849
@@ -237,6 +239,7 @@ Rogers2010_fingerprints	doi:10.1021/ci100050t
 Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
 Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
 Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
+Rudin2019 doi:10.1038/s42256-019-0048-x
 Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
 Sa2015_buckwild	pmcid:PMC4907892
 Salas2018_GR	doi:10.1101/gr.233213.117
@@ -245,6 +248,7 @@ Salzberg	doi:10.1186/1471-2105-11-544
 Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
 Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
 Scotti2016_missplicing	doi:10.1038/nrg.2015.3
+Sculley2018 url:https://openreview.net/pdf?id=rJWF0Fywf
 Segata	doi:10.1371/journal.pcbi.1004977
 Segler2017_drug_design	arxiv:1701.01329
 Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
@@ -254,6 +258,7 @@ Serden	doi:10.1016/S0168-8510(02)00208-7
 Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
 Shapely	doi:10.1515/9781400881970-018
 Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
+Shen2019 doi:10.1016/j.eswa.2019.01.048
 Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
 Shrikumar2017_learning	arxiv:1704.02685
 Shrikumar2017_reversecomplement	doi:10.1101/103663
@@ -276,6 +281,7 @@ Su2015_gpu	arxiv:1507.01239
 Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
 Sun2016_ensemble	arxiv:1606.00575
 Sundararajan2017_axiomatic	arxiv:1703.01365
+Sumita2018 doi:10.1021/acscentsci.8b00213
 Sutskever	arxiv:1409.3215
 Swamidass2009_irv	doi:10.1021/ci8004379
 Tan2014_psb	doi:10.1142/9789814644730_0014
@@ -291,6 +297,7 @@ Torracinta2016_sim	doi:10.1101/079087
 Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
 Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
 Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
+Vamathevan2019 doi:10.1038/s41573-019-0024-5
 Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
 Vervier	doi:10.1093/bioinformatics/btv683
 Wallach2015_atom_net	arxiv:1510.02855

From 721829f15b2c0d26aa5787f46910d0d64e280ad6 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:16:14 -0500
Subject: [PATCH 04/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/06.discussion.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/content/06.discussion.md b/content/06.discussion.md
index 65159052..ec42462b 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -17,7 +17,8 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
-Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
+Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018]. 
 
 #### Evaluation metrics for imbalanced classification
 

From 6f0e60949df2493ef965835b0a683b18b82b6ee1 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:17:29 -0500
Subject: [PATCH 05/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/06.discussion.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/content/06.discussion.md b/content/06.discussion.md
index ec42462b..76248872 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,7 +3,7 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Preventing overfitting via hyperparameter tuning
+### Preventing overfitting and hyperparameter tuning
 
 Overfitting is one of the most common problems in machine learning which all practioners must learn to grapple with.
 In the classical theory of statistical learning and model fitting, there is a trade-off between "bias and variance" [@url:http://www.deeplearningbook.org/].
@@ -18,7 +18,7 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
 Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
-A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018]. 
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 

From e18d9397bcc069c1e3395a771f3fb971f729b58a Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:37:55 -0500
Subject: [PATCH 06/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/05.treat.md            |  2 +-
 content/06.discussion.md       |  5 +++--
 content/citation-tags.tsv      | 14 ++++++------
 content/manual-references.json | 41 ++++++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 217b6c71..164c3c74 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -190,7 +190,7 @@ A popular approach which may help ensure synthesizability is to use rule-based v
 Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
 In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
 
-In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tagVamathevan2019].
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tag:Vamathevan2019].
 
 Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 65159052..76248872 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,7 +3,7 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Preventing overfitting via hyperparameter tuning
+### Preventing overfitting and hyperparameter tuning
 
 Overfitting is one of the most common problems in machine learning which all practioners must learn to grapple with.
 In the classical theory of statistical learning and model fitting, there is a trade-off between "bias and variance" [@url:http://www.deeplearningbook.org/].
@@ -17,7 +17,8 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
-Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
+Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index 74e7e90b..502511a6 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -68,7 +68,7 @@ Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
 Elton_molecular_design_review	doi:10.1039/C9ME00039A
-Elton2020 arxiv:2002.05149
+Elton2020	arxiv:2002.05149
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -196,7 +196,7 @@ Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
 matis	doi:10.1016/S0097-8485(96)80015-5
 nbc	doi:10.1093/bioinformatics/btq619
 Murdoch2017_automatic	arxiv:1702.02540
-Murdoch2019 doi:10.1073/pnas.1900654116
+Murdoch2019	doi:10.1073/pnas.1900654116
 Nazor2012	doi:10.1016/j.stem.2012.02.013
 Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
 Ni2018	doi:10.1101/385849
@@ -239,7 +239,7 @@ Rogers2010_fingerprints	doi:10.1021/ci100050t
 Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
 Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
 Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
-Rudin2019 doi:10.1038/s42256-019-0048-x
+Rudin2019	doi:10.1038/s42256-019-0048-x
 Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
 Sa2015_buckwild	pmcid:PMC4907892
 Salas2018_GR	doi:10.1101/gr.233213.117
@@ -248,7 +248,7 @@ Salzberg	doi:10.1186/1471-2105-11-544
 Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
 Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
 Scotti2016_missplicing	doi:10.1038/nrg.2015.3
-Sculley2018 url:https://openreview.net/pdf?id=rJWF0Fywf
+Sculley2018	url:https://openreview.net/pdf?id=rJWF0Fywf
 Segata	doi:10.1371/journal.pcbi.1004977
 Segler2017_drug_design	arxiv:1701.01329
 Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
@@ -258,7 +258,7 @@ Serden	doi:10.1016/S0168-8510(02)00208-7
 Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
 Shapely	doi:10.1515/9781400881970-018
 Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
-Shen2019 doi:10.1016/j.eswa.2019.01.048
+Shen2019	doi:10.1016/j.eswa.2019.01.048
 Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
 Shrikumar2017_learning	arxiv:1704.02685
 Shrikumar2017_reversecomplement	doi:10.1101/103663
@@ -281,7 +281,7 @@ Su2015_gpu	arxiv:1507.01239
 Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
 Sun2016_ensemble	arxiv:1606.00575
 Sundararajan2017_axiomatic	arxiv:1703.01365
-Sumita2018 doi:10.1021/acscentsci.8b00213
+Sumita2018	doi:10.1021/acscentsci.8b00213
 Sutskever	arxiv:1409.3215
 Swamidass2009_irv	doi:10.1021/ci8004379
 Tan2014_psb	doi:10.1142/9789814644730_0014
@@ -297,7 +297,7 @@ Torracinta2016_sim	doi:10.1101/079087
 Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
 Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
 Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
-Vamathevan2019 doi:10.1038/s41573-019-0024-5
+Vamathevan2019	doi:10.1038/s41573-019-0024-5
 Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
 Vervier	doi:10.1093/bioinformatics/btv683
 Wallach2015_atom_net	arxiv:1510.02855
diff --git a/content/manual-references.json b/content/manual-references.json
index 22d98717..bc7bb13b 100644
--- a/content/manual-references.json
+++ b/content/manual-references.json
@@ -52,6 +52,47 @@
    ]
   }
  },
+ {
+  "id": "url:https://openreview.net/pdf?id=rJWF0Fywf",
+  "type": "article-journal",
+  "title": "Winner's Curse? On Pace, Progress, and Empirical Rigor ...",
+  "container-title": "International Conference on Learning Representations 2018",
+  "URL": "https://openreview.net/pdf?id=rJWF0Fywf",
+  "author": [
+   {
+    "family": "Sculley",
+    "given": "D."
+   },
+   {
+    "family": "Snoek",
+    "given": "Jasper"
+   },
+   {
+    "family": "Rahimi",
+    "given": "Ali"
+   },
+   {
+    "family": "Wiltschko",
+    "given": "Alex"
+   }
+  ],
+  "issued": {
+   "date-parts": [
+    [
+     "2018"
+    ]
+   ]
+  },
+  "accessed": {
+   "date-parts": [
+    [
+     "2020",
+     2,
+     14
+    ]
+   ]
+  }
+ },
  {
   "id": "url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515",
   "type": "report",

From 5dcf0daef5149fb9d1d1fca11e90ce9cdf4152bc Mon Sep 17 00:00:00 2001
From: Anthony Gitter <agitter@users.noreply.github.com>
Date: Mon, 16 Mar 2020 07:03:30 -0500
Subject: [PATCH 07/22] Minor changes for recent version 2.0 updates (#1002)

* Experiment with v2 author contribution whitespace

* Remove 2019 dates from readme

* Add new v1 author contribution category for dhimmel
Match the contribution listed in metadata.yaml

* Typo in documentation

* Standardize capitalization and minor rephrasing

* One more capitalization change

* Add whitespace
---
 README.md                  |  5 +++--
 build/randomize-authors.py |  2 +-
 content/00.front-matter.md |  8 ++++----
 content/08.methods.md      | 23 +++++++++++++----------
 content/metadata.yaml      |  4 ++--
 5 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 39c8fe21..e26eddf3 100644
--- a/README.md
+++ b/README.md
@@ -17,11 +17,12 @@ The original version of the Deep Review was published in 2018 and should be cite
 > Ching T, Himmelstein DS, Beaulieu-Jones BK, Kalinin AA, Do BT, Way GP, Ferrero E, Agapow P-M, Zietz M, Hoffman MM, Xie W, Rosen GL, Lengerich BJ, Israeli J, Lanchantin J, Woloszynek S, Carpenter AE, Shrikumar A, Xu J, Cofer EM, Lavender CA, Turaga SC, Alexandari AM, Lu Z, Harris DJ, DeCaprio D, Qi Y, Kundaje A, Peng Y, Wiley LK, Segler MHS, Boca SM, Swamidass SJ, Huang A, Gitter A, and Greene CS. 2018. Opportunities and obstacles for deep learning in biology and medicine. _Journal of The Royal Society Interface_ 15(141):20170387. [doi:10.1098/rsif.2017.0387](https://doi.org/10.1098/rsif.2017.0387)
 
 
-### Current stage: planning Deep Review 2019
+### Current stage: planning Deep Review version 2.0
 
-As of writing, we are aiming to publish an update of the deep review each year, with the next such release occurring at the end of 2019.
+As of writing, we are aiming to publish an update of the deep review.
 We will continue to make project preprints available on bioRxiv or another preprint service and aim to continue publishing the finished reviews in a peer-reviewed venue as well.
 Like the initial release, we are planning for an open and collaborative effort.
+New contributors are welcome and will be listed as version 2.0 authors.
 Please see [issue #810](https://github.com/greenelab/deep-review/issues/810) to contribute to the discussion of future plans, and help decide how to best continue this project.
 
 **Manubot updates:**
diff --git a/build/randomize-authors.py b/build/randomize-authors.py
index 25104113..74df3442 100644
--- a/build/randomize-authors.py
+++ b/build/randomize-authors.py
@@ -11,7 +11,7 @@
 
 def parse_args():
     parser = argparse.ArgumentParser(
-        description="Randomize metadata.authors. Ovewrites metadata.yaml"
+        description="Randomize metadata.authors. Overwrites metadata.yaml"
     )
     parser.add_argument(
         "--path", default="content/metadata.yaml", help="path to metadata.yaml"
diff --git a/content/00.front-matter.md b/content/00.front-matter.md
index b6374a1c..5da71351 100644
--- a/content/00.front-matter.md
+++ b/content/00.front-matter.md
@@ -8,8 +8,8 @@
 <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css">
 [
 []{.fas .fa-info-circle .fa-lg} **Update Underway**<br>
-A published version of this manuscript from 04 April 2018, termed Version 1.0, is available at <https://doi.org/10.1098/rsif.2017.0387>.
-A new effort is underway to update the manuscript to a Version 2.0 that is current as of the first half of 2020.
+A published version of this manuscript from 04 April 2018, termed version 1.0, is available at <https://doi.org/10.1098/rsif.2017.0387>.
+A new effort is underway to update the manuscript to a version 2.0 that is current as of the first half of 2020.
 New authors and links to new sections are available in [GitHub Issue #959](https://github.com/greenelab/deep-review/issues/959).
 ]{.banner .lightred}
 
@@ -30,7 +30,7 @@ on {{manubot.date}}.
 
 ## Authors
 
-### Version 2.0 Authors
+### Version 2.0 authors
 
 {% for author in manubot.authors %}
   {% if author.v2 -%}
@@ -54,7 +54,7 @@ on {{manubot.date}}.
 
 </small>
 
-### Version 1.0 Authors
+### Version 1.0 authors
 
   [![ORCID icon](images/orcid.svg){height="11px" width="11px"}](https://orcid.org/0000-0002-5577-3516)
     Travers Ching<sup>1.1,☯</sup>,
diff --git a/content/08.methods.md b/content/08.methods.md
index a53b1e2d..64159a1d 100644
--- a/content/08.methods.md
+++ b/content/08.methods.md
@@ -10,12 +10,12 @@ Contributions were handled through GitHub, with individuals submitting "pull req
 This collaborative writing approach was later generalized into [Manubot](https://manubot.org/) [@doi:10.1371/journal.pcbi.1007128].
 
 Manubot supports citations of persistent identifiers, such as DOIs, PubMed Central IDs, PubMed IDs, arXiv IDs, and URLs.
-This reduces one major barrier to collaboratively writing, which is syncing reference managers between participants.
-In addition, Manubot using continuous integration to build and deploy manuscripts.
+This reduces one major barrier to writing collaboratively, which is syncing reference managers between participants.
+In addition, Manubot uses continuous integration to build and deploy manuscripts.
 This allows for automated error checking of proposed changes to catch malformated citations and invalid syntax.
 Originally, the Deep Review used Travis CI for continuous integration, but in 2020 switched to GitHub Actions, which became the default for Manubot manuscripts.
 
-For version 1.0 of the Deep Review, author order was randomized as described in [Version 1.0] [@tag:techblog-perkel].
+For version 1.0 of the Deep Review, author order was randomized as described in [version 1.0] [@tag:techblog-perkel].
 However, this was a one-time manual process.
 Starting with version 2.0, we began shuffling authors for every manuscript version.
 Manubot allowed us to automate this process, using the Git commit hash as a random seed to ensure reproducible ordering.
@@ -24,15 +24,18 @@ Manubot allowed us to automate this process, using the Git commit hash as a rand
 
 #### Version 2.0
 
-We continued using the open repository on the GitHub version control platform ([`greenelab/deep-review`](https://github.com/greenelab/deep-review)) [@url:https://github.com/greenelab/deep-review], which was established to write the Version 1.0 manuscript. {% for v2, authors in manubot.authors|groupby('v2') %}{% if v2|length %}{{ v2 }}: {% for author in authors %}{{author.name }}{%- if not loop.last -%},  {%- endif -%}{% endfor %}. {% endif %}{% endfor %}
+We continued using the open repository on the GitHub version control platform ([`greenelab/deep-review`](https://github.com/greenelab/deep-review)) [@url:https://github.com/greenelab/deep-review], which was established to write the version 1.0 manuscript.
+{% for v2, authors in manubot.authors|groupby('v2') %}
+  {% if v2|length %}{{ v2 }}: {% for author in authors %}{{author.name }}{% if not loop.last %}, {% endif %}{% endfor %}. {% endif %}
+{% endfor %}
 
-##### Version 2.0 Competing Interests
+##### Version 2.0 competing interests
 
 |Author|Competing Interests|Last Reviewed|
 |---|---|---|{% for author in manubot.authors %}
 |{{author.name}}|{{author.coi.string}}|{{author.coi.lastapproved}}|{% endfor %}
 
-##### Version 2.0 Funding Statement
+##### Version 2.0 funding statement
 
 We acknowledge funding from the Gordon and Betty Moore Foundation award GBMF4552 (C.S.G. and D.S.H.);
 the National Institutes of Health awards R01HG010067 (C.S.G. and D.S.H.), R01CA237170 (C.S.G), T32LM012204 (A.J.T.), R01CA216265 (B.C.C.);
@@ -49,13 +52,13 @@ drafted the manuscript or provided substantial critical revisions;
 approved the final manuscript draft; and agreed to be accountable in all aspects of the work.
 Individuals who did not contribute in all of these ways, but who did participate, are acknowledged below.
 We grouped authors into the following four classes of approximately equal contributions and randomly ordered authors within each contribution class.
-Drafted multiple sub-sections along with extensive editing, pull request reviews, or discussion: Travers Ching, Daniel S. Himmelstein, Brett K. Beaulieu-Jones, Alexandr A. Kalinin, Brian T. Do, Gregory P. Way, Enrico Ferrero, Paul-Michael Agapow, Michael Zietz, Michael M. Hoffman.
+Drafted multiple sub-sections along with extensive editing, pull request reviews, or discussion: Travers Ching, Brett K. Beaulieu-Jones, Alexandr A. Kalinin, Brian T. Do, Gregory P. Way, Enrico Ferrero, Paul-Michael Agapow, Michael Zietz, Michael M. Hoffman.
+Edited the manuscript, reviewed pull requests, and developed Manubot: Daniel S. Himmelstein.
 Drafted one or more sub-sections: Wei Xie, Gail L. Rosen, Benjamin J. Lengerich, Johnny Israeli, Jack Lanchantin, Stephen Woloszynek, Anne E. Carpenter, Avanti Shrikumar, Jinbo Xu, Evan M. Cofer, Christopher A. Lavender, Srinivas C. Turaga, Amr M. Alexandari, Zhiyong Lu.
 Drafted sub-sections, edited the manuscript, reviewed pull requests, and coordinated co-authors: Anthony Gitter, Casey S. Greene.
 Revised specific sub-sections or supervised drafting one or more sub-sections: David J. Harris, Dave DeCaprio, Yanjun Qi, Anshul Kundaje, Yifan Peng, Laura K. Wiley, Marwin H.S. Segler, Simina M. Boca, S. Joshua Swamidass, Austin Huang.
 
-##### Version 1.0 Competing interests
-
+##### Version 1.0 competing interests
 
 |Author|Competing Interests|Last Reviewed|
 |---|---|---|
@@ -96,7 +99,7 @@ Revised specific sub-sections or supervised drafting one or more sub-sections: D
 |Anthony Gitter|None|2017-05-26|
 |Casey S. Greene|None|2017-05-26|
 
-##### Version 1.0 Funding Statement
+##### Version 1.0 funding statement
 
 We acknowledge funding from the Gordon and Betty Moore Foundation awards GBMF4552 (C.S.G. and D.S.H.) and GBMF4563 (D.J.H.);
 the Howard Hughes Medical Institute (S.C.T.);
diff --git a/content/metadata.yaml b/content/metadata.yaml
index 20c70c07..3cd17bf1 100644
--- a/content/metadata.yaml
+++ b/content/metadata.yaml
@@ -68,8 +68,8 @@ authors:
     affiliations:
       - Department of Systems Pharmacology and Translational Therapeutics, University of Pennsylvania, Philadelphia, Pennsylvania, United States of America
     funders: G-2018-11163 and GBMF4552
-    v1: "edited the manuscript, reviewed pull requests, and developed Manubot"
-    v2: "edited the manuscript, reviewed pull requests, and developed Manubot"
+    v1: "Edited the manuscript, reviewed pull requests, and developed Manubot"
+    v2: "Edited the manuscript, reviewed pull requests, and developed Manubot"
     coi:
       string: "None"
       lastapproved: !!str 2020-03-10

From 081fb466dd13c2813b3ae14bb916173f5d0442c5 Mon Sep 17 00:00:00 2001
From: Christian Brueffer <christian.brueffer@med.lu.se>
Date: Sat, 11 Apr 2020 22:15:21 +0100
Subject: [PATCH 08/22] Spelling and wording cleanup (#1014)

* Fix typos and a spurious word.

* Harmonize spelling of word2vec.

word2vec was already the predominant spelling in this repository and is
the version used in the original word2vec code repository.

* Update content/06.discussion.md

Co-Authored-By: Anthony Gitter <agitter@users.noreply.github.com>

Co-authored-by: Anthony Gitter <agitter@users.noreply.github.com>
---
 content/04.study.md       | 6 +++---
 content/06.discussion.md  | 4 ++--
 content/90.back-matter.md | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/content/04.study.md b/content/04.study.md
index 050c7abe..00bf9b69 100644
--- a/content/04.study.md
+++ b/content/04.study.md
@@ -384,7 +384,7 @@ MHCflurry adds placeholder amino acids to transform variable-length peptides to
 In training the MHCflurry feed-forward neural network [@doi:10.1101/054775], the authors imputed missing MHC-peptide binding affinities using a Gibbs sampling method, showing that imputation improves performance for data-sets with roughly 100 or fewer training examples.
 MHCflurry's imputation method increases its performance on poorly characterized alleles, making it competitive with NetMHCpan for this task.
 Kuksa et al. [@doi:10.1093/bioinformatics/btv371] developed a shallow, higher-order neural network (HONN) comprised of both mean and covariance hidden units to capture some of the higher-order dependencies between amino acid locations.
-Pretraining this HONN with a semi-restricted Boltzmann machine, the authors found that the performance of the HONN exceeded that of a simple deep neural network, as well as that of NetMHC.
+Pre-training this HONN with a semi-restricted Boltzmann machine, the authors found that the performance of the HONN exceeded that of a simple deep neural network, as well as that of NetMHC.
 
 Deep learning's unique flexibility was recently leveraged by Bhattacharya et al. [@doi:10.1101/154757], who used a gated RNN method called MHCnuggets to overcome the difficulty of multiple peptide lengths.
 Under this framework, they used smoothed sparse encoding to represent amino acids individually.
@@ -484,7 +484,7 @@ Also, researchers have looked into how feature selection can improve classificat
 
 Most neural networks are used for phylogenetic classification or functional annotation from sequence data where there is ample data for training.
 Neural networks have been applied successfully to gene annotation (e.g. Orphelia [@tag:Hoff] and FragGeneScan [@doi:10.1093/nar/gkq747]).
-Representations (similar to Word2Vec [@tag:Word2Vec] in natural language processing) for protein family classification have been introduced and classified with a skip-gram neural network [@tag:Asgari].
+Representations (similar to word2vec [@tag:word2vec] in natural language processing) for protein family classification have been introduced and classified with a skip-gram neural network [@tag:Asgari].
 Recurrent neural networks show good performance for homology and protein family identification [@tag:Hochreiter; @tag:Sonderby].
 
 One of the first techniques of *de novo* genome binning used self-organizing maps, a type of neural network [@tag:Abe].
@@ -558,7 +558,7 @@ Even when they are not directly modeling biological neurons, deep networks have
 They have been developed as statistical time series models of neural activity in the brain.
 And in contrast to the encoding models described earlier, these models are used for decoding neural activity, for instance in brain machine interfaces [@doi:10.1101/152884].
 They have been crucial to the field of connectomics, which is concerned with mapping the connectivity of biological neural networks in the brain.
-In connectomics, deep networks are used to segment the shapes of individual neurons and to infer their connectivity from 3D electron microscopic images [@doi:10.1016/j.conb.2010.07.004], and they have been also been used to infer causal connectivity from optical measurement and perturbation of neural activity [@tag:Aitchison2017].
+In connectomics, deep networks are used to segment the shapes of individual neurons and to infer their connectivity from 3D electron microscopic images [@doi:10.1016/j.conb.2010.07.004], and they have also been used to infer causal connectivity from optical measurement and perturbation of neural activity [@tag:Aitchison2017].
 
 It is an exciting time for neuroscience.
 Recent rapid progress in deep networks continues to inspire new machine learning based models of brain computation [@doi:10.3389/fncom.2016.00094].
diff --git a/content/06.discussion.md b/content/06.discussion.md
index b6311851..5baa3a37 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -196,7 +196,7 @@ The contribution scores were then used to identify key phrases from a model trai
 #### Latent space manipulation
 
 Interpretation of embedded or latent space features learned through generative unsupervised models can reveal underlying patterns otherwise masked in the original input.
-Embedded feature interpretation has been emphasized mostly in image and text based applications [@tag:Radford_dcgan; @tag:Word2Vec], but applications to genomic and biomedical domains are increasing.
+Embedded feature interpretation has been emphasized mostly in image and text based applications [@tag:Radford_dcgan; @tag:word2vec], but applications to genomic and biomedical domains are increasing.
 
 For example, Way and Greene trained a VAE on gene expression from The Cancer Genome Atlas (TCGA) [@doi:10.1038/ng.2764] and use latent space arithmetic to rapidly isolate and interpret gene expression features descriptive of high grade serous ovarian cancer subtypes [@tag:WayGreene2017_tybalt].
 The most differentiating VAE features were representative of biological processes that are known to distinguish the subtypes.
@@ -270,7 +270,7 @@ There is a risk that a model will easily discriminate synthetic examples but not
 Multimodal, multi-task, and transfer learning, discussed in detail below, can also combat data limitations to some degree.
 There are also emerging network architectures, such as Diet Networks for high-dimensional SNP data [@tag:Romero2017_diet].
 These use multiple networks to drastically reduce the number of free parameters by first flipping the problem and training a network to predict parameters (weights) for each input (SNP) to learn a feature embedding.
-This embedding (e.g. from principal component analysis, per class histograms, or a Word2vec [@tag:Word2Vec] generalization) can be learned directly from input data or take advantage of other datasets or domain knowledge.
+This embedding (e.g. from principal component analysis, per class histograms, or a word2vec [@tag:word2vec] generalization) can be learned directly from input data or take advantage of other datasets or domain knowledge.
 Additionally, in this task the features are the examples, an important advantage when it is typical to have 500 thousand or more SNPs and only a few thousand patients.
 Finally, this embedding is of a much lower dimension, allowing for a large reduction in the number of free parameters.
 In the example given, the number of free parameters was reduced from 30 million to 50 thousand, a factor of 600.
diff --git a/content/90.back-matter.md b/content/90.back-matter.md
index 943cb2b9..8c40153d 100644
--- a/content/90.back-matter.md
+++ b/content/90.back-matter.md
@@ -297,7 +297,6 @@
 [@tag:WayGreene2017_eval]: arxiv:1711.04828
 [@tag:WayGreene2017_tybalt]: doi:10.1101/174474
 [@tag:Wilhelm-Benartzi2013]: doi:10.1038/bjc.2013.496
-[@tag:Word2Vec]: arxiv:1301.3781
 [@tag:Wu2017_molecule_net]: doi:10.1039/C7SC02664A
 [@tag:Xiang]: doi:10.1016/S0167-9473(99)00098-5
 [@tag:Xiong2011_bayesian]: doi:10.1093/bioinformatics/btr444
@@ -345,5 +344,6 @@
 [@tag:uncertainty_multi_task]: arxiv:1705.07115
 [@tag:uncertainty_types]: arxiv:1703.04977
 [@tag:wgsquikr]: doi:10.1371/journal.pone.0091784
+[@tag:word2vec]: arxiv:1301.3781
 [@tag:world2004international]: url:http://www.who.int/classifications/icd/en/
 [@tag:yok]: doi:10.1186/1471-2105-12-20

From 77e5ff35bb9e1887f3f392499d0e40c523d4ce95 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Thu, 19 Dec 2019 18:50:47 -0500
Subject: [PATCH 09/22] add discussion in interpretability section and update
 section on molecular design

---
 build/ci/cache/requests-cache.sqlite | Bin 0 -> 20480 bytes
 build/output/citations.tsv           |   1 +
 build/output/manuscript.md           |  38 +++
 build/output/references.json         |   1 +
 build/output/variables.json          |  17 ++
 build/webpage/v/freeze/index.html    |  19 ++
 build/webpage/v/latest               |   1 +
 content/05.treat.md                  |  36 ++-
 content/06.discussion.md             |  27 ++-
 content/citation-tags.tsv            | 345 +++++++++++++++++++++++++++
 10 files changed, 453 insertions(+), 32 deletions(-)
 create mode 100644 build/ci/cache/requests-cache.sqlite
 create mode 100644 build/output/citations.tsv
 create mode 100644 build/output/manuscript.md
 create mode 100644 build/output/references.json
 create mode 100644 build/output/variables.json
 create mode 100644 build/webpage/v/freeze/index.html
 create mode 120000 build/webpage/v/latest
 create mode 100644 content/citation-tags.tsv

diff --git a/build/ci/cache/requests-cache.sqlite b/build/ci/cache/requests-cache.sqlite
new file mode 100644
index 0000000000000000000000000000000000000000..5f5d7c8e968a7214f2eb1395eb7369bddc7dc5be
GIT binary patch
literal 20480
zcmeI%K}*9h7=YoV>xvUbcPTwCHx)!E9(UQ49%K`B?y^%?F~YE}y0)S}a6ibKN1MWm
z9(wdr-ayhXA?-(=^b&G^H(TdgJ*C^1I9KOlAfyyyrGyYxS-WN37t^=*o$@Jv#Z93l
z&PHE-_gmO*Pq?|eb9?TzX)@yoAb<b@2q1s}0tg_000Id7yTG{P_?{<q9zP_y*e02&
z_T@aBEJC%IT+BkXGD9nM{H#CK?fmL`GGD4`xI9(waZ>1$Bir#uo)qiNqyEU=lJZ;~
z#YLW0{m8r<1*YxZNXH)zWm=`$>g+Y$WIC$@>u=I}yg74VIes{7%3OaS3hJ;^dL9_l
zzG()gp&)<&0tg_000IagfB*srAb>zy1rE)^IRCeGds!C(2q1s}0tg_000IagfB*uO
z0Qdj=2nZm600IagfB*srAb<b@2(({-`~UXuF)Ks>0R#|0009ILKmY**5J2Du^0!$7

literal 0
HcmV?d00001

diff --git a/build/output/citations.tsv b/build/output/citations.tsv
new file mode 100644
index 00000000..e4349893
--- /dev/null
+++ b/build/output/citations.tsv
@@ -0,0 +1 @@
+manuscript_citekey	detagged_citekey	standard_citekey	short_citekey
diff --git a/build/output/manuscript.md b/build/output/manuscript.md
new file mode 100644
index 00000000..df0d610a
--- /dev/null
+++ b/build/output/manuscript.md
@@ -0,0 +1,38 @@
+---
+author-meta: []
+date-meta: '2019-12-19'
+header-includes: '<!--
+
+  Manubot generated metadata rendered from header-includes-template.html.
+
+  Suggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html
+
+  -->
+
+  <meta name="dc.format" content="text/html" />
+
+  <meta name="dc.date" content="2019-12-19" />
+
+  <meta name="citation_publication_date" content="2019-12-19" />
+
+  <meta name="dc.relation.ispartof" content="Manubot" />
+
+  <meta name="dc.publisher" content="Manubot" />
+
+  <meta name="citation_journal_title" content="Manubot" />
+
+  <meta name="citation_technical_report_institution" content="Manubot" />
+
+  <meta property="og:type" content="article" />
+
+  <meta property="twitter:card" content="summary_large_image" />
+
+  <link rel="icon" type="image/png" sizes="192x192" href="https://manubot.org/favicon-192x192.png" />
+
+  <link rel="mask-icon" href="https://manubot.org/safari-pinned-tab.svg" color="#ad1457" />
+
+  <meta name="theme-color" content="#ad1457" />
+
+  <!-- end Manubot generated metadata -->'
+...
+
diff --git a/build/output/references.json b/build/output/references.json
new file mode 100644
index 00000000..fe51488c
--- /dev/null
+++ b/build/output/references.json
@@ -0,0 +1 @@
+[]
diff --git a/build/output/variables.json b/build/output/variables.json
new file mode 100644
index 00000000..88840dd9
--- /dev/null
+++ b/build/output/variables.json
@@ -0,0 +1,17 @@
+{
+  "pandoc": {
+    "date-meta": "2019-12-19",
+    "author-meta": [],
+    "header-includes": "<!--\nManubot generated metadata rendered from header-includes-template.html.\nSuggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html\n-->\n<meta name=\"dc.format\" content=\"text/html\" />\n<meta name=\"dc.date\" content=\"2019-12-19\" />\n<meta name=\"citation_publication_date\" content=\"2019-12-19\" />\n<meta name=\"dc.relation.ispartof\" content=\"Manubot\" />\n<meta name=\"dc.publisher\" content=\"Manubot\" />\n<meta name=\"citation_journal_title\" content=\"Manubot\" />\n<meta name=\"citation_technical_report_institution\" content=\"Manubot\" />\n<meta property=\"og:type\" content=\"article\" />\n<meta property=\"twitter:card\" content=\"summary_large_image\" />\n<link rel=\"icon\" type=\"image/png\" sizes=\"192x192\" href=\"https://manubot.org/favicon-192x192.png\" />\n<link rel=\"mask-icon\" href=\"https://manubot.org/safari-pinned-tab.svg\" color=\"#ad1457\" />\n<meta name=\"theme-color\" content=\"#ad1457\" />\n<!-- end Manubot generated metadata -->"
+  },
+  "manubot": {
+    "date": "December 19, 2019",
+    "authors": [],
+    "manuscript_stats": {
+      "reference_counts": {
+        "total": 0
+      },
+      "word_count": 0
+    }
+  }
+}
diff --git a/build/webpage/v/freeze/index.html b/build/webpage/v/freeze/index.html
new file mode 100644
index 00000000..bff3da63
--- /dev/null
+++ b/build/webpage/v/freeze/index.html
@@ -0,0 +1,19 @@
+<!DOCTYPE HTML>
+<!--
+  HTML template for redirecting from this page to another.
+  Template in python, setting url to the destination URL.
+  Derived from https://stackoverflow.com/a/5411601/4651668.
+-->
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <meta http-equiv="refresh" content="0; url=../local/">
+    <script type="text/javascript">
+      window.location.href = "../local/"
+    </script>
+    <title>Page Redirection</title>
+  </head>
+  <body>
+    If you are not redirected automatically, follow <a href="../local/">this link</a>.
+  </body>
+</html>
diff --git a/build/webpage/v/latest b/build/webpage/v/latest
new file mode 120000
index 00000000..c2c027fe
--- /dev/null
+++ b/build/webpage/v/latest
@@ -0,0 +1 @@
+local
\ No newline at end of file
diff --git a/content/05.treat.md b/content/05.treat.md
index 96db7d25..3c71ba95 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -180,28 +180,24 @@ However, in the long term, atomic convolutions may ultimately overtake grid-base
 
 #### *De novo* drug design
 
-*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
+*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery in-silico [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
 It explores an estimated 10<sup>60</sup> synthesizable organic molecules with drug-like properties without explicit enumeration [@doi:10.1002/wcms.1104].
-To test or score structures, algorithms like those discussed earlier are used.
+To test or score structures, physics-based simulation could be used, or machine learning models based on techniques discussed may be used, as they are much more computationally efficient.
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
-Unfortunately, this often leads to overfit, "weird" molecules, which are difficult to synthesize in the lab.
-Current programs have settled on rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
-Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
-In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
-The challenge of generating molecules has parallels to the generation of syntactically and semantically correct text [@arxiv:1308.0850].
-
-As deep learning models that directly output (molecular) graphs remain under-explored, generative neural networks for drug design typically represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
-This allows treating molecules as sequences and leveraging recent progress in recurrent neural networks.
-Gómez-Bombarelli et al. designed a SMILES-to-SMILES autoencoder to learn a continuous latent feature space for chemicals [@tag:Gomezb2016_automatic].
-In this learned continuous space it was possible to interpolate between continuous representations of chemicals in a manner that is not possible with discrete
-(e.g. bit vector or string) features or in symbolic, molecular graph space.
-Even more interesting is the prospect of performing gradient-based or Bayesian optimization of molecules within this latent space.
-The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
+
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review]
+
+Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
+
+The first successful demonstration of a deep learning based approach for molecular optimization occured in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
+In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete
+(e.g. bit vector or string) features or in symbolic, molecular graph space. Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space. The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
 A drawback of the SMILES-to-SMILES autoencoder is that not all SMILES strings produced by the autoencoder's decoder correspond to valid chemical structures.
-Recently, the Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, has been proposed to alleviate this issue [@arxiv:1703.01925].
+The Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, helps alleviate this issue to some extent [@arxiv:1703.01925].
 
 Another approach to *de novo* design is to train character-based RNNs on large collections of molecules, for example, ChEMBL [@doi:10.1093/nar/gkr777], to first obtain a generic generative model for drug-like compounds [@tag:Segler2017_drug_design].
-These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures.
-The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796].
-Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
-The great flexibility of neural networks, and progress in generative models offers many opportunities for deep architectures in *de novo* design (e.g. the adaptation of GANs for molecules).
+These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures. The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796]. Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
+
+Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry. Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review] A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network.[@doi:10.1038/s41598-019-47148-x] As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1).[@tag:Zhavoronkov2019_drugs] Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1.[@tag:Zhavoronkov2019_drugs]  
+
+It is worth pointing out that it has been shown that classical genetic algorithms can compete with many of the most advanced deep learning methods for molecular optimization.[@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C] Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849]. Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 5baa3a37..27c13b64 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -12,8 +12,7 @@ One way of understanding such model optimizations is that they incorporate exter
 This balance is formally described as a tradeoff between "bias and variance"
 [@tag:goodfellow2016deep].
 
-Although the bias-variance tradeoff is common to all machine learning applications, recent empirical and theoretical observations suggest that deep learning models may have uniquely advantageous generalization properties [@tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
-Nevertheless, additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
+Although the bias-variance tradeoff is is important to take into account in many machine learning tasks, recent empirical and theoretical observations suggest that deep neural networks have uniquely advantageous generalization properties and do not obey the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works]. According to the bias-variance theory, many of the most successful deep neural networks have so many free parameters they should overfit.[@tag:Belkin2019_PNAS] It has been shown that deep neural networks operate in a regime where they can exactly interpolate their training data yet are still able to generalize.[@tag:Belkin2019_PNAS] Thus, poor generalizability can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory. Additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
 
 #### Evaluation metrics for imbalanced classification
 
@@ -106,18 +105,22 @@ As a result, several opportunities for innovation arise: understanding the cause
 Unfortunately, uncertainty quantification techniques are underutilized in the computational biology communities and largely ignored in the current deep learning for biomedicine literature.
 Thus, the practical value of uncertainty quantification in biomedical domains is yet to be appreciated.
 
-### Interpretation
+### Interpretability
 
-As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable.
-Interpretability matters for two main reasons.
-First, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
-However, this would not be possible if the model is a black box.
-Second, interpretability is important for trust.
-If a model is making medical diagnoses, it is important to ensure the model is making decisions for reliable reasons and is not focusing on an artifact of the data.
-A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
-In the context of deep learning, understanding the basis of a model's output is particularly important as deep learning models are unusually susceptible to adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
+As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable. There are several important reasons to care about interpretability.
 
-As the concept of interpretability is quite broad, many methods described as improving the interpretability of deep learning models take disparate and often complementary approaches.
+Firstly, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
+For instance, interpreting a model for predicting chemical properties from molecular graphs may illuminate previously unknown structure-property relations.
+It is also useful to see if a model is using known relationships - if not, this may suggest a way to improve the model.
+Finally, there is a chance that the model may have learned relationships that are known to be wrong. This can be due to improper training data or due to overfitting on spurious correlations in the training data.
+
+This is particularly important if a model is making medical diagnoses. A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
+
+It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise. While this is largely still an unsolved problem, the interpretation of deep learning models can help understand these failure modes and how to prevent them.
+
+Several different levels of interpretability can be distinguished. Consider a prototypical CNN used for image classification. At a high level, one can perform an occulusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap. Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing]. Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
+
+A starting point for many discussions of interpretability is the interpretability-accuracy trade-off. The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate. This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks.[@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one. For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations were learned by the neural network. More recently, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
 
 #### Assigning example-specific importance scores
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
new file mode 100644
index 00000000..ee40470f
--- /dev/null
+++ b/content/citation-tags.tsv
@@ -0,0 +1,345 @@
+tag	citation
+Abe	doi:10.1101/gr.634603
+Abramoff2016_dr	doi:10.1167/iovs.16-19964
+Agarwal2015_targetscan	doi:10.7554/eLife.05005
+Aitchison2017	url:http://papers.nips.cc/paper/6940-model-based-bayesian-inference-of-neural-activity-and-connectivity-from-all-optical-interrogation-of-a-neural-circuit
+Alipanahi2015_predicting	doi:10.1038/nbt.3300
+AltaeTran2016_one_shot	doi:10.1021/acscentsci.6b00367
+Amit2017_breast_mri	doi:10.1117/12.2249981
+Asgari	doi:10.1371/journal.pone.0141287
+blast	doi:10.1016/S0022-2836(05)80360-2
+Angermueller2016_dl_review	doi:10.15252/msb.20156651
+Angermueller2016_single_methyl	doi:10.1186/s13059-017-1189-z
+Angermueller2017	doi:10.1186/s13059-017-1189-z
+Artemov2016_clinical	doi:10.1101/095653
+Arvaniti2016_rare_subsets	doi:10.1101/046508
+Bach2015_on	doi:10.1371/journal.pone.0130140
+Bahdanu2014_neural	arxiv:1409.0473
+Baskin2015_drug_disc	doi:10.1080/17460441.2016.1201262
+Bar2015_nonmed_tl	doi:10.1117/12.2083124
+Barash2010_splicing_code	doi:10.1038/nature09000
+Baxt1991_myocardial	doi:10.7326/0003-4819-115-11-843
+BeaulieuJones2016_ehr_encode	doi:10.1016/j.jbi.2016.10.007
+Belkin2019_PNAS	doi:10.1073/pnas.1903070116
+Bengio2015_prec	arxiv:1412.7024
+Berezikov2011_mirna	doi:10.1038/nrg3079
+Bergstra2011_hyper	url:https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
+Bergstra2012_random	url:http://www.jmlr.org/papers/v13/bergstra12a.html
+Bracken2016_mirna	doi:10.1038/nrg.2016.134
+Boza	doi:10.1371/journal.pone.0178751
+Buggenthin2017_imaged_lineage	doi:10.1038/nmeth.4182
+Burlina2016_amd	doi:10.1109/ISBI.2016.7493240
+Chatterjee2018	arxiv:1807.09617
+Caruana2014_need	arxiv:1312.6184
+Caruana2015_intelligible	doi:10.1145/2783258.2788613
+Chaudhary2017_multiom_liver_cancer	doi:10.1101/114892
+Che2015_distill	arxiv:1512.03542
+Che2016_rnn	arxiv:1606.01865
+Chen2015_hashing	arxiv:1504.04788
+Chen2016_exprs_yeast	doi:10.1186/s12859-015-0852-1
+Chen2016_gene_expr	doi:10.1093/bioinformatics/btw074
+Chen2015_trans_species	doi:10.1093/bioinformatics/btv315
+Choi2016_retain	arxiv:1608.05745
+Choi2016_gram	arxiv:1611.07012
+Chollet2016_xception	arxiv:1610.02357
+Christensen2009	doi:10.1371/journal.pgen.1000602
+Chryssolouris1996_confidence	doi:10.1109/72.478409
+Ciresan2013_mitosis	doi:10.1007/978-3-642-40763-5_51
+Coates2013_cots_hpc	url:http://www.jmlr.org/proceedings/papers/v28/coates13.html
+Codella2016_ensemble_melanoma	arxiv:1610.04662
+Consortium2012_encode	doi:10.1038/nature11247
+CudNN	arxiv:1410.0759
+Dahl2014_multi_qsar	arxiv:1406.1231
+Darst2018	doi:10.1186/s12863-018-0646-3
+Dean2012_nips_downpour	url:http://research.google.com/archive/large_deep_networks_nips2012.html
+DeepChem	url:https://github.com/deepchem/deepchem
+Deming2016_genetic	arxiv:1605.07156
+Ding	doi:10.1186/s12859-015-0753-3
+Ditzler	doi:10.1186/s12859-015-0793-8
+Ditzler2	doi:10.1109/TNNLS.2014.2320415
+Ditzler3	doi:10.1109/TNB.2015.2461219
+Dhungel2015_struct_pred_mamm	doi:10.1007/978-3-319-24553-9_74
+Dhungel2016_mamm	doi:10.1007/978-3-319-46723-8_13
+Dhungel2017_mamm_min_interv	doi:10.1016/j.media.2017.01.009
+Dream_tf_binding	url:https://www.synapse.org/#!Synapse:syn6131484/wiki/402026
+Dragonn	url:http://kundajelab.github.io/dragonn/
+Duvenaud2015_graph_conv	url:http://papers.nips.cc/paper/5954-convolutional-networks-on-graphs-for-learning-molecular-fingerprints
+Edwards2015_growing_pains	doi:10.1145/2771283
+Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
+Elephas	url:https://github.com/maxpumperla/elephas
+Elton_molecular_design_review	doi:10.1039/C9ME00039A
+Errington2014_reproducibility	doi:10.7554/eLife.04333
+Eser2016_fiddle	doi:10.1101/081380
+Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
+Essinger2010_taxonomic	doi:10.1109/IJCNN.2010.5596644
+Esteva2017_skin_cancer_nature	doi:10.1038/nature21056
+Faruqi	url:http://alifar76.github.io/sklearn-metrics/
+Feinberg2018	doi:10.1056/NEJMra1402513
+Finnegan2017_maximum	doi:10.1101/105957
+Fong2017_perturb	doi:10.1109/ICCV.2017.371
+Fraga2005	doi:10.1073/pnas.0500398102
+Frosst2017_distilling	arxiv:1711.09784
+Fu2019	doi:10.1109/TCBB.2019.2909237
+Gal2015_dropout	arxiv:1506.02142
+Gaublomme2015_th17	doi:10.1016/j.cell.2015.11.009
+Gargeya2017_dr	doi:10.1016/j.ophtha.2017.02.008
+Gawad2016_singlecell	doi:10.1038/nrg.2015.16
+Geras2017_multiview_mamm	doi:10.1038/nrg.2015.16
+Gerstein2016_scaling	doi:10.1186/s13059-016-0917-0
+Ghandi2014_enhanced	doi:10.1371/journal.pcbi.1003711
+Ghosh1992_sequence	doi:10.1117/12.140112
+Glorot2011_domain	url:http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.231.3442
+Goldsborough2017_cytogan	doi:10.1101/227645
+Gomezb2016_automatic	arxiv:1610.02415v1
+Graphlab	doi:10.14778/2212351.2212354
+Groop1986_islet	doi:10.2337/diab.35.2.237
+Gultepe2014_sepsis	doi:10.1136/amiajnl-2013-001815
+Gulshan2016_dt	doi:10.1001/jama.2016.17216
+Gupta2015_exprs_yeast	doi:10.1101/031906
+Gupta2015_prec	arxiv:1502.02551
+Guetterman	url:http://www.fasebj.org/content/30/1_Supplement/406.3
+Hadjas2015_cc	arxiv:1504.04343
+He2015_images	arxiv:1512.03385
+Hinton2006_autoencoders	doi:10.1126/science.1127647
+Hinton2015_dark_knowledge	arxiv:1503.02531
+Hinton2015_dk	arxiv:1503.02531v1
+Hochreiter	doi:10.1093/bioinformatics/btm247
+Hoff	doi:10.1093/nar/gkp327
+Horton1992_assessment	doi:10.1093/nar/20.16.4331
+Horvath2013	doi:10.1186/gb-2013-14-10-r115
+Horvath2014	doi:10.1073/pnas.1412759111
+Houseman2012	doi:10.1186/1471-2105-13-86
+Houseman2016	doi:10.1186/s12859-016-1140-4
+Hubara2016_qnn	arxiv:1609.07061
+Huddar2016_predicting	doi:10.1109/ACCESS.2016.2618775
+Hughes2016_macromol_react	doi:10.1021/acscentsci.6b00162
+Iglovikov2017_baa	doi:10.1101/234120
+Islam2018	doi:10.1186/s12919-018-0121-1
+Ithapu2015_efficient	doi:10.1016/j.jalz.2015.01.010
+Jafari2016_skin_lesions	doi:10.1007/s11548-017-1567-8
+Jha2017_integrative_models	doi:10.1101/104869
+Johnson2017_integ_cell	arxiv:1705.00092
+JuanMateu2016_t1d	doi:10.1530/EJE-15-0916
+Kahng2017_activis	arxiv:1704.01942
+Kalinin2018_pgx	arxiv:1801.08570
+Karlin	doi:10.1128/jb.179.12.3899-3913.1997
+Karpathy2015_visualizing	arxiv:1506.02078
+Katzman2016_deepsurv	arxiv:1606.00931
+Kearnes2016_admet	arxiv:1606.08793
+Kearnes2016_graph_conv	doi:10.1007/s10822-016-9938-8
+Kelley2016_basset	doi:10.1101/gr.200535.115
+Keras	url:https://github.com/fchollet/keras
+Kizek	doi:10.1016/j.bjid.2015.08.013
+Kindermans2016_investigating	arxiv:1611.07270
+Knights	doi:10.1111/j.1574-6976.2010.00251.x
+Koh2016_denoising	doi:10.1101/052118
+Koh2017_understanding	arxiv:1703.04730
+Kooi2016_mamm_lesions	doi:10.1016/j.media.2016.07.007
+Kooi2017_mamm_tl	doi:10.1002/mp.12110
+Korfiatis2017	doi:10.1007/s10278-017-0009-z
+Kraus2017_deeploc	doi:10.15252/msb.20177551
+Kresovich2019	doi:10.1093/jnci/djz020
+Krizhevsky2013_nips_cnn	url:https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
+Krizhevsky2014_weird_trick	arxiv:1404.5997
+Kwabi-Addo2007	doi:10.1158/1078-0432.CCR-07-0085
+Khwaja2017	doi:10.1109/BIOCAS.2017.8325078
+Khwaja2018	arxiv:1810.01243
+Lacey2016_dl_fpga	arxiv:1602.04283
+Laird2010	doi:10.1038/nrg2732
+Lakhani2017_radiography	doi:10.1148/radiol.2017162326
+Lanchantin2016_motif	arxiv:1608.03644
+Lee2016_deeptarget	arxiv:1603.09123v2
+Lee2016_emr_oct_amd	doi:10.1101/094276
+Lei2016_rationalizing	arxiv:1606.04155
+Leibig2016_dr	doi:10.1101/084210
+Levy2019	doi:10.1101/692665
+Levy-Jurgenson2018	doi:10.1101/491357
+Li2014_minibatch	doi:10.1145/2623330.2623612
+Li2016_variation	doi:10.1126/science.aad9417
+Liang2015_exprs_cancer	doi:10.1109/TCBB.2014.2377729
+Lin2017_why_dl_works	arxiv:1608.08225v3
+Lipton2016_missing	arxiv:1606.04130
+Lipton2015_lstm	arxiv:1510.07641
+Litjens2016_histopath_survey	doi:10.1038/srep26286
+Litjens2017_medimage_survey	doi:10.1016/j.media.2017.07.005
+Lisboa2006_review	doi:10.1016/j.neunet.2005.10.007
+Liu2013	doi:10.1038/nbt.2487
+Liu	doi:10.1371/journal.pone.0053253
+Liu2016_towards	arxiv:1604.07043
+Liu2016_sc_transcriptome	doi:10.12688/f1000research.7223.1
+Lodato2015_neurons	doi:10.1126/science.aab1785
+Lowe2012_kaggle	url:http://blogs.sciencemag.org/pipeline/archives/2012/12/11/did_kaggle_predict_drug_candidate_activities_or_not
+lmat	doi:10.1093/bioinformatics/btt389
+Lundberg2016_an	arxiv:1611.07478
+Lusci2015_irv	doi:10.1186/s13321-015-0110-6
+Lusci2013_rnn	doi:10.1021/ci400187y
+Ma2015_qsar_merck	doi:10.1021/ci500747n
+Maaten2008_tsne	url:http://www.jmlr.org/papers/v9/vandermaaten08a.html
+Mahendran2014_understanding	arxiv:1412.0035
+Mahendran2016_salient	doi:10.1007/978-3-319-46466-4_8
+Mahendran2016_visualizing	doi:10.1007/s11263-016-0911-8
+Mahmood	doi:10.1016/S0140-6736(13)61752-3
+Mapreduce	doi:10.1145/1327452.1327492
+Mayr2016_deep_tox	doi:10.3389/fenvs.2015.00080
+McHardy	doi:10.1038/nmeth976
+McHardy2	doi:10.7717/peerj.1603
+Meissner2008	doi:10.1038/nature07107
+Metaphlan	doi:10.1038/nmeth.2066
+Meng2016_mllib	arxiv:1505.06807
+Min2016_deepenhancer	doi:10.1109/BIBM.2016.7822593
+Montavon2018_visualization	doi:10.1016/j.dsp.2017.10.011
+Momeni2018	doi:10.1101/438341
+Moritz2015_sparknet	arxiv:1511.06051
+Mordvintsev2015_inceptionism	url:http://googleresearch.blogspot.co.uk/2015/06/inceptionism-going-deeper-into-neural.html
+Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
+matis	doi:10.1016/S0097-8485(96)80015-5
+nbc	doi:10.1093/bioinformatics/btq619
+Murdoch2017_automatic	arxiv:1702.02540
+Nazor2012	doi:10.1016/j.stem.2012.02.013
+Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
+Ni2018	doi:10.1101/385849
+Nguyen2014_adversarial	arxiv:1412.1897v4
+Ngiam2011	url:https://ai.stanford.edu/~ang/papers/icml11-MultimodalDeepLearning.pdf
+Nie2016_3d_survival	doi:10.1007/978-3-319-46723-8_25
+NIH2016_genome_cost	url:https://www.genome.gov/27565109/the-cost-of-sequencing-a-human-genome/
+Nih_curiosity	url:https://www.nigms.nih.gov/Education/Documents/curiosity.pdf
+Olivecrona2017_drug_design	arxiv:1704.07555
+Osokin2017_biogan	arxiv:1708.04692
+onecodex	url:https://www.onecodex.com/
+Papernot2017_pate	url:https://openreview.net/forum?id=HkwoSDPgg
+Park2016_deepmirgene	arxiv:1605.00017
+Parnamaa2017	doi:10.1534/g3.116.033654
+Pan2018	doi:10.1101/438218
+Pawlowski2016	doi:10.1101/085118
+Peng2019	doi:10.1101/527044
+Pereira2016_docking	doi:10.1021/acs.jcim.6b00355
+PerezSianes2016_screening	doi:10.1007/978-3-319-40126-3_2
+Phymm	doi:10.1038/nmeth.1358
+Poplin2016_deepvariant	doi:10.1101/092890
+Pratt2016_dr	doi:10.1016/j.procs.2016.07.014
+Quach2017	doi:10.18632/aging.101168
+Quang2017_factor	doi:10.1101/151274
+Qin2017_onehot	doi:10.1371/journal.pcbi.1005403
+Qiu2017_graph_embedding	doi:10.1101/110668
+Qiu2018	doi:10.1101/406066
+Ragoza2016_protein	arxiv:1612.02751
+RAD2010_view_cc	doi:10.1145/1721654.1721672
+Radford_dcgan	arxiv:1511.06434v2
+Rajkomar2017_radiographs	doi:10.1007/s10278-016-9914-9
+Rakhlin2018_histology	doi:10.1101/259911
+Ramsundar2015_multitask_drug	arxiv:1502.02072
+Ranganath2016_deep	arxiv:1608.02158
+Raina2009_gpu	doi:10.1145/1553374.1553486
+Relton2010	doi:10.1371/journal.pmed.1000356
+Ribeiro2016_lime	arxiv:1602.04938
+Robertson2005	doi:10.1038/nrg1655
+Rogers2010_fingerprints	doi:10.1021/ci100050t
+Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
+Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
+Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
+Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
+Sa2015_buckwild	pmcid:PMC4907892
+Salas2018_GR	doi:10.1101/gr.233213.117
+Salas2018	doi:10.1186/s13059-018-1448-7
+Salzberg	doi:10.1186/1471-2105-11-544
+Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
+Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
+Scotti2016_missplicing	doi:10.1038/nrg.2015.3
+Segata	doi:10.1371/journal.pcbi.1004977
+Segler2017_drug_design	arxiv:1701.01329
+Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
+Setty2015_seqgl	doi:10.1371/journal.pcbi.1004271
+Selvaraju2016_grad	arxiv:1610.02391
+Serden	doi:10.1016/S0168-8510(02)00208-7
+Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
+Shapely	doi:10.1515/9781400881970-018
+Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
+Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
+Shrikumar2017_learning	arxiv:1704.02685
+Shrikumar2017_reversecomplement	doi:10.1101/103663
+Simonyan2013_deep	arxiv:1312.6034
+Singh2017_attentivechrome	arxiv:1708.00339
+Singh2016_deepchrome	arxiv:1607.02078
+Singh2016_tsk	doi:10.1109/TCBB.2016.2609918
+Silver2016_alphago	doi:10.1038/nature16961
+Sonderby	doi:10.1007/978-3-319-21233-3_6
+Soueidan	doi:10.1515/metgen-2016-0001
+Spark	doi:10.1145/2934664
+Speech_recognition	url:http://www.businessinsider.com/ibm-edges-closer-to-human-speech-recognition-2017-3
+Springenberg2014_striving	arxiv:1412.6806
+Stein2010_cloud	doi:10.1186/gb-2010-11-5-207
+Stenstrom2005_latent	doi:10.2337/diabetes.54.suppl_2.S68
+Stormo2000_dna	doi:10.1093/bioinformatics/16.1.16
+Stratnikov	doi:10.1186/2049-2618-1-11
+Strobelt2016_visual	arxiv:1606.07461
+Su2015_gpu	arxiv:1507.01239
+Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
+Sun2016_ensemble	arxiv:1606.00575
+Sundararajan2017_axiomatic	arxiv:1703.01365
+Sutskever	arxiv:1409.3215
+Swamidass2009_irv	doi:10.1021/ci8004379
+Tan2014_psb	doi:10.1142/9789814644730_0014
+Tan2015_adage	doi:10.1128/mSystems.00025-15
+Tan2016_eadage	doi:10.1101/078659
+TAC-ELM	doi:10.1142/S0219720012500151
+TensorFlow	arxiv:1603.04467
+Teschendorff2017	doi:10.2217/epi-2016-0153
+Tian2019	doi:10.1186/s12864-019-5488-5
+Titus2017	doi:10.1093/hmg/ddx275
+Torracinta2016_deep_snp	doi:10.1101/097469
+Torracinta2016_sim	doi:10.1101/079087
+Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
+Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
+Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
+Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
+Vervier	doi:10.1093/bioinformatics/btv683
+Wallach2015_atom_net	arxiv:1510.02855
+Wang2016_breast_cancer	arxiv:1606.05718
+Wang2016_methyl	doi:10.1038/srep19598
+Wang2016_protein_contact	doi:10.1371/journal.pcbi.1005324
+Wasson1985_clinical	doi:10.1056/NEJM198509263131306
+WayGreene2017_eval	arxiv:1711.04828
+WayGreene2017_tybalt	doi:10.1101/174474
+Wilhelm-Benartzi2013	doi:10.1038/bjc.2013.496
+Word2Vec	arxiv:1301.3781
+wgsquikr	doi:10.1371/journal.pone.0091784
+Wu2017_molecule_net	doi:10.1039/C7SC02664A
+Xiang	doi:10.1016/S0167-9473(99)00098-5
+Xiong2011_bayesian	doi:10.1093/bioinformatics/btr444
+Xiong2015_splicing_code	doi:10.1126/science.1254806
+Xu2015_show	arxiv:1502.03044
+Yasushi2016_cgbvs_dnn	doi:10.1002/minf.201600045
+yok	doi:10.1186/1471-2105-12-20
+Yoon2016_cancer_reports	doi:10.1007/978-3-319-47898-2_21
+Yosinski2014	url:https://papers.nips.cc/paper/5347-how-transferable-are-features-in-deep-neural-networks
+Yosinksi2015_understanding	arxiv:1506.06579
+Yu2016_melanoma_resnet	doi:10.1109/TMI.2016.2642839
+Zhavoronkov2019_drugs	doi:10.1038/s41587-019-0224-x
+Zeiler2013_visualizing	doi:10.1007/978-3-319-10590-1_53
+Zeng2015	doi:10.1186/s12859-015-0553-9
+Zeng2016_convolutional	doi:10.1093/bioinformatics/btw255
+Zhang2015_multitask_tl	doi:10.1145/2783258.2783304
+Zhang2017_generalization	arxiv:1611.03530v2
+Zhang2019	doi:10.1186/s12885-019-5932-6
+Zhou2015_deep_sea	doi:10.1038/nmeth.3547
+Zhu2016_advers_mamm	doi:10.1101/095786
+Zhu2016_mult_inst_mamm	doi:10.1101/095794
+Zintgraf2017_visualizing	arxiv:1702.04595
+goodfellow2016deep	url:http://www.deeplearningbook.org/
+li2016joint	url:https://dl.acm.org/citation.cfm?id=3061018
+world2004international	url:http://www.who.int/classifications/icd/en/
+ghahramani_protect	arxiv:1707.02476
+uncertainty_types	arxiv:1703.04977
+uncertainty_multi_task	arxiv:1705.07115
+guo_calibration	arxiv:1706.04599
+platt_scaling	url:http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639
+out_dist_baseline	arxiv:1610.02136
+temp_out_dist	arxiv:1706.02690
+ai_safety	arxiv:1606.06565
+strong_adversary	arxiv:1705.07263
+retinopathy_uncertainty	doi:10.1038/s41598-017-17876-z
+bayesian_hypernets	arxiv:1710.04759
+mcclure_bayesian	arxiv:1611.01639
+uncertainty_ensembles	arxiv:1612.01474
+domain_adapt_uncertainty	arxiv:1505.07818
+gal_thesis	url:http://www.cs.ox.ac.uk/people/yarin.gal/website/thesis/thesis.pdf

From cf85f39f8aa0193edd1663bdeabcecece3803435 Mon Sep 17 00:00:00 2001
From: Casey Greene <greenescientist@gmail.com>
Date: Mon, 10 Feb 2020 15:00:15 -0500
Subject: [PATCH 10/22] remove build files

---
 build/ci/cache/requests-cache.sqlite | Bin 20480 -> 0 bytes
 build/output/citations.tsv           |   1 -
 build/output/manuscript.md           |  38 ---------------------------
 build/output/references.json         |   1 -
 build/output/variables.json          |  17 ------------
 build/webpage/v/freeze/index.html    |  19 --------------
 build/webpage/v/latest               |   1 -
 7 files changed, 77 deletions(-)
 delete mode 100644 build/ci/cache/requests-cache.sqlite
 delete mode 100644 build/output/citations.tsv
 delete mode 100644 build/output/manuscript.md
 delete mode 100644 build/output/references.json
 delete mode 100644 build/output/variables.json
 delete mode 100644 build/webpage/v/freeze/index.html
 delete mode 120000 build/webpage/v/latest

diff --git a/build/ci/cache/requests-cache.sqlite b/build/ci/cache/requests-cache.sqlite
deleted file mode 100644
index 5f5d7c8e968a7214f2eb1395eb7369bddc7dc5be..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20480
zcmeI%K}*9h7=YoV>xvUbcPTwCHx)!E9(UQ49%K`B?y^%?F~YE}y0)S}a6ibKN1MWm
z9(wdr-ayhXA?-(=^b&G^H(TdgJ*C^1I9KOlAfyyyrGyYxS-WN37t^=*o$@Jv#Z93l
z&PHE-_gmO*Pq?|eb9?TzX)@yoAb<b@2q1s}0tg_000Id7yTG{P_?{<q9zP_y*e02&
z_T@aBEJC%IT+BkXGD9nM{H#CK?fmL`GGD4`xI9(waZ>1$Bir#uo)qiNqyEU=lJZ;~
z#YLW0{m8r<1*YxZNXH)zWm=`$>g+Y$WIC$@>u=I}yg74VIes{7%3OaS3hJ;^dL9_l
zzG()gp&)<&0tg_000IagfB*srAb>zy1rE)^IRCeGds!C(2q1s}0tg_000IagfB*uO
z0Qdj=2nZm600IagfB*srAb<b@2(({-`~UXuF)Ks>0R#|0009ILKmY**5J2Du^0!$7

diff --git a/build/output/citations.tsv b/build/output/citations.tsv
deleted file mode 100644
index e4349893..00000000
--- a/build/output/citations.tsv
+++ /dev/null
@@ -1 +0,0 @@
-manuscript_citekey	detagged_citekey	standard_citekey	short_citekey
diff --git a/build/output/manuscript.md b/build/output/manuscript.md
deleted file mode 100644
index df0d610a..00000000
--- a/build/output/manuscript.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-author-meta: []
-date-meta: '2019-12-19'
-header-includes: '<!--
-
-  Manubot generated metadata rendered from header-includes-template.html.
-
-  Suggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html
-
-  -->
-
-  <meta name="dc.format" content="text/html" />
-
-  <meta name="dc.date" content="2019-12-19" />
-
-  <meta name="citation_publication_date" content="2019-12-19" />
-
-  <meta name="dc.relation.ispartof" content="Manubot" />
-
-  <meta name="dc.publisher" content="Manubot" />
-
-  <meta name="citation_journal_title" content="Manubot" />
-
-  <meta name="citation_technical_report_institution" content="Manubot" />
-
-  <meta property="og:type" content="article" />
-
-  <meta property="twitter:card" content="summary_large_image" />
-
-  <link rel="icon" type="image/png" sizes="192x192" href="https://manubot.org/favicon-192x192.png" />
-
-  <link rel="mask-icon" href="https://manubot.org/safari-pinned-tab.svg" color="#ad1457" />
-
-  <meta name="theme-color" content="#ad1457" />
-
-  <!-- end Manubot generated metadata -->'
-...
-
diff --git a/build/output/references.json b/build/output/references.json
deleted file mode 100644
index fe51488c..00000000
--- a/build/output/references.json
+++ /dev/null
@@ -1 +0,0 @@
-[]
diff --git a/build/output/variables.json b/build/output/variables.json
deleted file mode 100644
index 88840dd9..00000000
--- a/build/output/variables.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "pandoc": {
-    "date-meta": "2019-12-19",
-    "author-meta": [],
-    "header-includes": "<!--\nManubot generated metadata rendered from header-includes-template.html.\nSuggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html\n-->\n<meta name=\"dc.format\" content=\"text/html\" />\n<meta name=\"dc.date\" content=\"2019-12-19\" />\n<meta name=\"citation_publication_date\" content=\"2019-12-19\" />\n<meta name=\"dc.relation.ispartof\" content=\"Manubot\" />\n<meta name=\"dc.publisher\" content=\"Manubot\" />\n<meta name=\"citation_journal_title\" content=\"Manubot\" />\n<meta name=\"citation_technical_report_institution\" content=\"Manubot\" />\n<meta property=\"og:type\" content=\"article\" />\n<meta property=\"twitter:card\" content=\"summary_large_image\" />\n<link rel=\"icon\" type=\"image/png\" sizes=\"192x192\" href=\"https://manubot.org/favicon-192x192.png\" />\n<link rel=\"mask-icon\" href=\"https://manubot.org/safari-pinned-tab.svg\" color=\"#ad1457\" />\n<meta name=\"theme-color\" content=\"#ad1457\" />\n<!-- end Manubot generated metadata -->"
-  },
-  "manubot": {
-    "date": "December 19, 2019",
-    "authors": [],
-    "manuscript_stats": {
-      "reference_counts": {
-        "total": 0
-      },
-      "word_count": 0
-    }
-  }
-}
diff --git a/build/webpage/v/freeze/index.html b/build/webpage/v/freeze/index.html
deleted file mode 100644
index bff3da63..00000000
--- a/build/webpage/v/freeze/index.html
+++ /dev/null
@@ -1,19 +0,0 @@
-<!DOCTYPE HTML>
-<!--
-  HTML template for redirecting from this page to another.
-  Template in python, setting url to the destination URL.
-  Derived from https://stackoverflow.com/a/5411601/4651668.
--->
-<html lang="en-US">
-  <head>
-    <meta charset="UTF-8">
-    <meta http-equiv="refresh" content="0; url=../local/">
-    <script type="text/javascript">
-      window.location.href = "../local/"
-    </script>
-    <title>Page Redirection</title>
-  </head>
-  <body>
-    If you are not redirected automatically, follow <a href="../local/">this link</a>.
-  </body>
-</html>
diff --git a/build/webpage/v/latest b/build/webpage/v/latest
deleted file mode 120000
index c2c027fe..00000000
--- a/build/webpage/v/latest
+++ /dev/null
@@ -1 +0,0 @@
-local
\ No newline at end of file

From ebb27b1f6e36717b1b215eb74f1ad19abf7e760a Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:11:32 -0500
Subject: [PATCH 11/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/05.treat.md       | 34 ++++++++++++++++++++++++----------
 content/06.discussion.md  | 38 +++++++++++++++++++++++++++++---------
 content/citation-tags.tsv |  7 +++++++
 3 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 3c71ba95..217b6c71 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -182,22 +182,36 @@ However, in the long term, atomic convolutions may ultimately overtake grid-base
 
 *De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery in-silico [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
 It explores an estimated 10<sup>60</sup> synthesizable organic molecules with drug-like properties without explicit enumeration [@doi:10.1002/wcms.1104].
-To test or score structures, physics-based simulation could be used, or machine learning models based on techniques discussed may be used, as they are much more computationally efficient.
+To score molecules after generation or during optimization, physics-based simulation could be used [@tag:Sumita2018], but machine learning models based on techniques discussed earlier may be preferable [@tag:Gomezb2016_automatic], as they are much more computationally expedient. Computationally efficiency is particularly important during optimization as the "scoring function" may need to be called thousands of times.
+
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
+These approaches can lead to overfit, "weird" molecules, which are difficult to synthesize in the lab.
+A popular approach which may help ensure synthesizability is to use rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
+Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
+In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
 
-In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review]
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tagVamathevan2019].
 
 Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
-The first successful demonstration of a deep learning based approach for molecular optimization occured in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
-In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete
-(e.g. bit vector or string) features or in symbolic, molecular graph space. Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space. The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
+The first successful demonstration of a deep learning based approach for molecular optimization occurred in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
+In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete (e.g. bit vector or string) features or in symbolic, molecular graph space.
+Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space.
+The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
 A drawback of the SMILES-to-SMILES autoencoder is that not all SMILES strings produced by the autoencoder's decoder correspond to valid chemical structures.
 The Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, helps alleviate this issue to some extent [@arxiv:1703.01925].
 
 Another approach to *de novo* design is to train character-based RNNs on large collections of molecules, for example, ChEMBL [@doi:10.1093/nar/gkr777], to first obtain a generic generative model for drug-like compounds [@tag:Segler2017_drug_design].
-These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures. The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796]. Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
-
-Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry. Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review] A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network.[@doi:10.1038/s41598-019-47148-x] As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1).[@tag:Zhavoronkov2019_drugs] Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1.[@tag:Zhavoronkov2019_drugs]  
-
-It is worth pointing out that it has been shown that classical genetic algorithms can compete with many of the most advanced deep learning methods for molecular optimization.[@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C] Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849]. Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
+These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures.
+The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796].
+Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
+
+Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry.
+Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review]
+A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network [@doi:10.1038/s41598-019-47148-x].
+As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
+Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1 [@tag:Zhavoronkov2019_drugs].
+
+In concluding this section, it is worth pointing out that it has been shown that classical genetic algorithms can compete with some of the most advanced deep learning methods for molecular optimization [@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C].
+Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
+Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 27c13b64..936a1d75 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,7 +3,7 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Customizing deep learning models reflects a tradeoff between bias and variance
+### Preventing overfitting via hyperparameter tuning
 
 Some of the challenges in applying deep learning are shared with other machine learning methods.
 In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize predictivity.
@@ -12,7 +12,13 @@ One way of understanding such model optimizations is that they incorporate exter
 This balance is formally described as a tradeoff between "bias and variance"
 [@tag:goodfellow2016deep].
 
-Although the bias-variance tradeoff is is important to take into account in many machine learning tasks, recent empirical and theoretical observations suggest that deep neural networks have uniquely advantageous generalization properties and do not obey the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works]. According to the bias-variance theory, many of the most successful deep neural networks have so many free parameters they should overfit.[@tag:Belkin2019_PNAS] It has been shown that deep neural networks operate in a regime where they can exactly interpolate their training data yet are still able to generalize.[@tag:Belkin2019_PNAS] Thus, poor generalizability can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory. Additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
+Although the bias-variance trade-off is is important to take into account with many classical machine learning models, recent empirical and theoretical observations suggest that deep neural networks in particular do not the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
+It has been demonstrated that poor generalizability (test error) can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory.
+This phenomena, known as "double descent" indicates that deep neural networks achieve their best performance when they smoothly interpolate training data - resulting in near zero training error [@tag:Belkin2019_PNAS].
+
+To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
+This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
+Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 
@@ -107,20 +113,33 @@ Thus, the practical value of uncertainty quantification in biomedical domains is
 
 ### Interpretability
 
-As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable. There are several important reasons to care about interpretability.
+As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to develop methods for interpreting how they function.
+There are several important reasons one might be interested in interpretability, which is also called "explainability".
 
 Firstly, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
 For instance, interpreting a model for predicting chemical properties from molecular graphs may illuminate previously unknown structure-property relations.
 It is also useful to see if a model is using known relationships - if not, this may suggest a way to improve the model.
-Finally, there is a chance that the model may have learned relationships that are known to be wrong. This can be due to improper training data or due to overfitting on spurious correlations in the training data.
+Finally, there is a chance that the model may have learned relationships that are known to be wrong.
+This can be due to improper training data or due to overfitting on spurious correlations in the training data.
 
-This is particularly important if a model is making medical diagnoses. A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
+This is particularly important if a model is making medical diagnoses.
+A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
 
-It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise. While this is largely still an unsolved problem, the interpretation of deep learning models can help understand these failure modes and how to prevent them.
+It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
+While this is largely still an unsolved problem, the interpretation of deep learning models may help understand these failure modes and how to prevent them.
 
-Several different levels of interpretability can be distinguished. Consider a prototypical CNN used for image classification. At a high level, one can perform an occulusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap. Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing]. Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
+Several different levels of interpretability can be distinguished.
+Consider a prototypical CNN used for image classification.
+At a high level, one can perform an occlusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap.
+Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing].
+Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
 
-A starting point for many discussions of interpretability is the interpretability-accuracy trade-off. The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate. This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks.[@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one. For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations were learned by the neural network. More recently, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
+A starting point for many discussions of interpretability is the interpretability-accuracy trade-off.
+The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate.
+This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks, [@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] and new methods for creating highly accurate interpretable models [@tag:Rudin2019].
+Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one, which is sometimes called "post-hoc interpretation".
+For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations which may have been learned by the neural network.
+Along similar lines, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
 
 #### Assigning example-specific importance scores
 
@@ -222,7 +241,8 @@ Towards this end, Che et al. [@tag:Che2015_distill] used gradient boosted trees
 
 Finally, it is sometimes possible to train the model to provide justifications for its predictions.
 Lei et al. [@tag:Lei2016_rationalizing] used a generator to identify "rationales", which are short and coherent pieces of the input text that produce similar results to the whole input when passed through an encoder.
-The authors applied their approach to a sentiment analysis task and obtained substantially superior results compared to an attention-based method.
+Shen et al. [@tag:Shen2019] trained a CNN for lung nodule malignancy classification which also provides a series of attributes for the nodule, which they argue help understand how the network functions.
+These are both simple examples of an emerging approach towards engendering trust in AI systems which Elton calls "self-explaining AI" [@tag:Elton2020].
 
 #### Future outlook
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index ee40470f..74e7e90b 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -68,6 +68,7 @@ Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
 Elton_molecular_design_review	doi:10.1039/C9ME00039A
+Elton2020 arxiv:2002.05149
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -195,6 +196,7 @@ Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
 matis	doi:10.1016/S0097-8485(96)80015-5
 nbc	doi:10.1093/bioinformatics/btq619
 Murdoch2017_automatic	arxiv:1702.02540
+Murdoch2019 doi:10.1073/pnas.1900654116
 Nazor2012	doi:10.1016/j.stem.2012.02.013
 Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
 Ni2018	doi:10.1101/385849
@@ -237,6 +239,7 @@ Rogers2010_fingerprints	doi:10.1021/ci100050t
 Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
 Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
 Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
+Rudin2019 doi:10.1038/s42256-019-0048-x
 Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
 Sa2015_buckwild	pmcid:PMC4907892
 Salas2018_GR	doi:10.1101/gr.233213.117
@@ -245,6 +248,7 @@ Salzberg	doi:10.1186/1471-2105-11-544
 Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
 Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
 Scotti2016_missplicing	doi:10.1038/nrg.2015.3
+Sculley2018 url:https://openreview.net/pdf?id=rJWF0Fywf
 Segata	doi:10.1371/journal.pcbi.1004977
 Segler2017_drug_design	arxiv:1701.01329
 Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
@@ -254,6 +258,7 @@ Serden	doi:10.1016/S0168-8510(02)00208-7
 Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
 Shapely	doi:10.1515/9781400881970-018
 Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
+Shen2019 doi:10.1016/j.eswa.2019.01.048
 Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
 Shrikumar2017_learning	arxiv:1704.02685
 Shrikumar2017_reversecomplement	doi:10.1101/103663
@@ -276,6 +281,7 @@ Su2015_gpu	arxiv:1507.01239
 Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
 Sun2016_ensemble	arxiv:1606.00575
 Sundararajan2017_axiomatic	arxiv:1703.01365
+Sumita2018 doi:10.1021/acscentsci.8b00213
 Sutskever	arxiv:1409.3215
 Swamidass2009_irv	doi:10.1021/ci8004379
 Tan2014_psb	doi:10.1142/9789814644730_0014
@@ -291,6 +297,7 @@ Torracinta2016_sim	doi:10.1101/079087
 Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
 Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
 Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
+Vamathevan2019 doi:10.1038/s41573-019-0024-5
 Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
 Vervier	doi:10.1093/bioinformatics/btv683
 Wallach2015_atom_net	arxiv:1510.02855

From 15564c5feb71a2a344b86aa58b0b3c5ff456c7b9 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:16:14 -0500
Subject: [PATCH 12/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/06.discussion.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/content/06.discussion.md b/content/06.discussion.md
index 936a1d75..17f29ede 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -18,7 +18,8 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
-Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
+Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018]. 
 
 #### Evaluation metrics for imbalanced classification
 

From 1de073dcc78a9e439a876d8a01d46fa492217a26 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:17:29 -0500
Subject: [PATCH 13/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/06.discussion.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/content/06.discussion.md b/content/06.discussion.md
index 17f29ede..422010d8 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,7 +3,7 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Preventing overfitting via hyperparameter tuning
+### Preventing overfitting and hyperparameter tuning
 
 Some of the challenges in applying deep learning are shared with other machine learning methods.
 In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize predictivity.
@@ -19,7 +19,7 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
 Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
-A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018]. 
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 

From 0089d290924d3653aaa7c870a2f4bbe5ecf0a796 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:37:55 -0500
Subject: [PATCH 14/22] rehash/update my previous commit - single lines and
 other fixes

---
 content/05.treat.md            |  2 +-
 content/citation-tags.tsv      | 14 ++++++------
 content/manual-references.json | 41 ++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 217b6c71..164c3c74 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -190,7 +190,7 @@ A popular approach which may help ensure synthesizability is to use rule-based v
 Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
 In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
 
-In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tagVamathevan2019].
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tag:Vamathevan2019].
 
 Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index 74e7e90b..502511a6 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -68,7 +68,7 @@ Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
 Elton_molecular_design_review	doi:10.1039/C9ME00039A
-Elton2020 arxiv:2002.05149
+Elton2020	arxiv:2002.05149
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -196,7 +196,7 @@ Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
 matis	doi:10.1016/S0097-8485(96)80015-5
 nbc	doi:10.1093/bioinformatics/btq619
 Murdoch2017_automatic	arxiv:1702.02540
-Murdoch2019 doi:10.1073/pnas.1900654116
+Murdoch2019	doi:10.1073/pnas.1900654116
 Nazor2012	doi:10.1016/j.stem.2012.02.013
 Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
 Ni2018	doi:10.1101/385849
@@ -239,7 +239,7 @@ Rogers2010_fingerprints	doi:10.1021/ci100050t
 Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
 Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
 Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
-Rudin2019 doi:10.1038/s42256-019-0048-x
+Rudin2019	doi:10.1038/s42256-019-0048-x
 Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
 Sa2015_buckwild	pmcid:PMC4907892
 Salas2018_GR	doi:10.1101/gr.233213.117
@@ -248,7 +248,7 @@ Salzberg	doi:10.1186/1471-2105-11-544
 Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
 Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
 Scotti2016_missplicing	doi:10.1038/nrg.2015.3
-Sculley2018 url:https://openreview.net/pdf?id=rJWF0Fywf
+Sculley2018	url:https://openreview.net/pdf?id=rJWF0Fywf
 Segata	doi:10.1371/journal.pcbi.1004977
 Segler2017_drug_design	arxiv:1701.01329
 Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
@@ -258,7 +258,7 @@ Serden	doi:10.1016/S0168-8510(02)00208-7
 Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
 Shapely	doi:10.1515/9781400881970-018
 Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
-Shen2019 doi:10.1016/j.eswa.2019.01.048
+Shen2019	doi:10.1016/j.eswa.2019.01.048
 Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
 Shrikumar2017_learning	arxiv:1704.02685
 Shrikumar2017_reversecomplement	doi:10.1101/103663
@@ -281,7 +281,7 @@ Su2015_gpu	arxiv:1507.01239
 Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
 Sun2016_ensemble	arxiv:1606.00575
 Sundararajan2017_axiomatic	arxiv:1703.01365
-Sumita2018 doi:10.1021/acscentsci.8b00213
+Sumita2018	doi:10.1021/acscentsci.8b00213
 Sutskever	arxiv:1409.3215
 Swamidass2009_irv	doi:10.1021/ci8004379
 Tan2014_psb	doi:10.1142/9789814644730_0014
@@ -297,7 +297,7 @@ Torracinta2016_sim	doi:10.1101/079087
 Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
 Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
 Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
-Vamathevan2019 doi:10.1038/s41573-019-0024-5
+Vamathevan2019	doi:10.1038/s41573-019-0024-5
 Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
 Vervier	doi:10.1093/bioinformatics/btv683
 Wallach2015_atom_net	arxiv:1510.02855
diff --git a/content/manual-references.json b/content/manual-references.json
index 8ad4cb94..908dfd25 100644
--- a/content/manual-references.json
+++ b/content/manual-references.json
@@ -52,6 +52,47 @@
    ]
   }
  },
+ {
+  "id": "url:https://openreview.net/pdf?id=rJWF0Fywf",
+  "type": "article-journal",
+  "title": "Winner's Curse? On Pace, Progress, and Empirical Rigor ...",
+  "container-title": "International Conference on Learning Representations 2018",
+  "URL": "https://openreview.net/pdf?id=rJWF0Fywf",
+  "author": [
+   {
+    "family": "Sculley",
+    "given": "D."
+   },
+   {
+    "family": "Snoek",
+    "given": "Jasper"
+   },
+   {
+    "family": "Rahimi",
+    "given": "Ali"
+   },
+   {
+    "family": "Wiltschko",
+    "given": "Alex"
+   }
+  ],
+  "issued": {
+   "date-parts": [
+    [
+     "2018"
+    ]
+   ]
+  },
+  "accessed": {
+   "date-parts": [
+    [
+     "2020",
+     2,
+     14
+    ]
+   ]
+  }
+ },
  {
   "id": "url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515",
   "type": "report",

From fea5c229d4aa318a5afe8a5269de62931486e5ec Mon Sep 17 00:00:00 2001
From: Anthony Gitter <agitter@users.noreply.github.com>
Date: Sat, 8 Aug 2020 08:45:29 -0500
Subject: [PATCH 15/22] Delete citation tags

Converting to Markdown format
---
 content/citation-tags.tsv | 352 --------------------------------------
 1 file changed, 352 deletions(-)
 delete mode 100644 content/citation-tags.tsv

diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
deleted file mode 100644
index 502511a6..00000000
--- a/content/citation-tags.tsv
+++ /dev/null
@@ -1,352 +0,0 @@
-tag	citation
-Abe	doi:10.1101/gr.634603
-Abramoff2016_dr	doi:10.1167/iovs.16-19964
-Agarwal2015_targetscan	doi:10.7554/eLife.05005
-Aitchison2017	url:http://papers.nips.cc/paper/6940-model-based-bayesian-inference-of-neural-activity-and-connectivity-from-all-optical-interrogation-of-a-neural-circuit
-Alipanahi2015_predicting	doi:10.1038/nbt.3300
-AltaeTran2016_one_shot	doi:10.1021/acscentsci.6b00367
-Amit2017_breast_mri	doi:10.1117/12.2249981
-Asgari	doi:10.1371/journal.pone.0141287
-blast	doi:10.1016/S0022-2836(05)80360-2
-Angermueller2016_dl_review	doi:10.15252/msb.20156651
-Angermueller2016_single_methyl	doi:10.1186/s13059-017-1189-z
-Angermueller2017	doi:10.1186/s13059-017-1189-z
-Artemov2016_clinical	doi:10.1101/095653
-Arvaniti2016_rare_subsets	doi:10.1101/046508
-Bach2015_on	doi:10.1371/journal.pone.0130140
-Bahdanu2014_neural	arxiv:1409.0473
-Baskin2015_drug_disc	doi:10.1080/17460441.2016.1201262
-Bar2015_nonmed_tl	doi:10.1117/12.2083124
-Barash2010_splicing_code	doi:10.1038/nature09000
-Baxt1991_myocardial	doi:10.7326/0003-4819-115-11-843
-BeaulieuJones2016_ehr_encode	doi:10.1016/j.jbi.2016.10.007
-Belkin2019_PNAS	doi:10.1073/pnas.1903070116
-Bengio2015_prec	arxiv:1412.7024
-Berezikov2011_mirna	doi:10.1038/nrg3079
-Bergstra2011_hyper	url:https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
-Bergstra2012_random	url:http://www.jmlr.org/papers/v13/bergstra12a.html
-Bracken2016_mirna	doi:10.1038/nrg.2016.134
-Boza	doi:10.1371/journal.pone.0178751
-Buggenthin2017_imaged_lineage	doi:10.1038/nmeth.4182
-Burlina2016_amd	doi:10.1109/ISBI.2016.7493240
-Chatterjee2018	arxiv:1807.09617
-Caruana2014_need	arxiv:1312.6184
-Caruana2015_intelligible	doi:10.1145/2783258.2788613
-Chaudhary2017_multiom_liver_cancer	doi:10.1101/114892
-Che2015_distill	arxiv:1512.03542
-Che2016_rnn	arxiv:1606.01865
-Chen2015_hashing	arxiv:1504.04788
-Chen2016_exprs_yeast	doi:10.1186/s12859-015-0852-1
-Chen2016_gene_expr	doi:10.1093/bioinformatics/btw074
-Chen2015_trans_species	doi:10.1093/bioinformatics/btv315
-Choi2016_retain	arxiv:1608.05745
-Choi2016_gram	arxiv:1611.07012
-Chollet2016_xception	arxiv:1610.02357
-Christensen2009	doi:10.1371/journal.pgen.1000602
-Chryssolouris1996_confidence	doi:10.1109/72.478409
-Ciresan2013_mitosis	doi:10.1007/978-3-642-40763-5_51
-Coates2013_cots_hpc	url:http://www.jmlr.org/proceedings/papers/v28/coates13.html
-Codella2016_ensemble_melanoma	arxiv:1610.04662
-Consortium2012_encode	doi:10.1038/nature11247
-CudNN	arxiv:1410.0759
-Dahl2014_multi_qsar	arxiv:1406.1231
-Darst2018	doi:10.1186/s12863-018-0646-3
-Dean2012_nips_downpour	url:http://research.google.com/archive/large_deep_networks_nips2012.html
-DeepChem	url:https://github.com/deepchem/deepchem
-Deming2016_genetic	arxiv:1605.07156
-Ding	doi:10.1186/s12859-015-0753-3
-Ditzler	doi:10.1186/s12859-015-0793-8
-Ditzler2	doi:10.1109/TNNLS.2014.2320415
-Ditzler3	doi:10.1109/TNB.2015.2461219
-Dhungel2015_struct_pred_mamm	doi:10.1007/978-3-319-24553-9_74
-Dhungel2016_mamm	doi:10.1007/978-3-319-46723-8_13
-Dhungel2017_mamm_min_interv	doi:10.1016/j.media.2017.01.009
-Dream_tf_binding	url:https://www.synapse.org/#!Synapse:syn6131484/wiki/402026
-Dragonn	url:http://kundajelab.github.io/dragonn/
-Duvenaud2015_graph_conv	url:http://papers.nips.cc/paper/5954-convolutional-networks-on-graphs-for-learning-molecular-fingerprints
-Edwards2015_growing_pains	doi:10.1145/2771283
-Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
-Elephas	url:https://github.com/maxpumperla/elephas
-Elton_molecular_design_review	doi:10.1039/C9ME00039A
-Elton2020	arxiv:2002.05149
-Errington2014_reproducibility	doi:10.7554/eLife.04333
-Eser2016_fiddle	doi:10.1101/081380
-Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
-Essinger2010_taxonomic	doi:10.1109/IJCNN.2010.5596644
-Esteva2017_skin_cancer_nature	doi:10.1038/nature21056
-Faruqi	url:http://alifar76.github.io/sklearn-metrics/
-Feinberg2018	doi:10.1056/NEJMra1402513
-Finnegan2017_maximum	doi:10.1101/105957
-Fong2017_perturb	doi:10.1109/ICCV.2017.371
-Fraga2005	doi:10.1073/pnas.0500398102
-Frosst2017_distilling	arxiv:1711.09784
-Fu2019	doi:10.1109/TCBB.2019.2909237
-Gal2015_dropout	arxiv:1506.02142
-Gaublomme2015_th17	doi:10.1016/j.cell.2015.11.009
-Gargeya2017_dr	doi:10.1016/j.ophtha.2017.02.008
-Gawad2016_singlecell	doi:10.1038/nrg.2015.16
-Geras2017_multiview_mamm	doi:10.1038/nrg.2015.16
-Gerstein2016_scaling	doi:10.1186/s13059-016-0917-0
-Ghandi2014_enhanced	doi:10.1371/journal.pcbi.1003711
-Ghosh1992_sequence	doi:10.1117/12.140112
-Glorot2011_domain	url:http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.231.3442
-Goldsborough2017_cytogan	doi:10.1101/227645
-Gomezb2016_automatic	arxiv:1610.02415v1
-Graphlab	doi:10.14778/2212351.2212354
-Groop1986_islet	doi:10.2337/diab.35.2.237
-Gultepe2014_sepsis	doi:10.1136/amiajnl-2013-001815
-Gulshan2016_dt	doi:10.1001/jama.2016.17216
-Gupta2015_exprs_yeast	doi:10.1101/031906
-Gupta2015_prec	arxiv:1502.02551
-Guetterman	url:http://www.fasebj.org/content/30/1_Supplement/406.3
-Hadjas2015_cc	arxiv:1504.04343
-He2015_images	arxiv:1512.03385
-Hinton2006_autoencoders	doi:10.1126/science.1127647
-Hinton2015_dark_knowledge	arxiv:1503.02531
-Hinton2015_dk	arxiv:1503.02531v1
-Hochreiter	doi:10.1093/bioinformatics/btm247
-Hoff	doi:10.1093/nar/gkp327
-Horton1992_assessment	doi:10.1093/nar/20.16.4331
-Horvath2013	doi:10.1186/gb-2013-14-10-r115
-Horvath2014	doi:10.1073/pnas.1412759111
-Houseman2012	doi:10.1186/1471-2105-13-86
-Houseman2016	doi:10.1186/s12859-016-1140-4
-Hubara2016_qnn	arxiv:1609.07061
-Huddar2016_predicting	doi:10.1109/ACCESS.2016.2618775
-Hughes2016_macromol_react	doi:10.1021/acscentsci.6b00162
-Iglovikov2017_baa	doi:10.1101/234120
-Islam2018	doi:10.1186/s12919-018-0121-1
-Ithapu2015_efficient	doi:10.1016/j.jalz.2015.01.010
-Jafari2016_skin_lesions	doi:10.1007/s11548-017-1567-8
-Jha2017_integrative_models	doi:10.1101/104869
-Johnson2017_integ_cell	arxiv:1705.00092
-JuanMateu2016_t1d	doi:10.1530/EJE-15-0916
-Kahng2017_activis	arxiv:1704.01942
-Kalinin2018_pgx	arxiv:1801.08570
-Karlin	doi:10.1128/jb.179.12.3899-3913.1997
-Karpathy2015_visualizing	arxiv:1506.02078
-Katzman2016_deepsurv	arxiv:1606.00931
-Kearnes2016_admet	arxiv:1606.08793
-Kearnes2016_graph_conv	doi:10.1007/s10822-016-9938-8
-Kelley2016_basset	doi:10.1101/gr.200535.115
-Keras	url:https://github.com/fchollet/keras
-Kizek	doi:10.1016/j.bjid.2015.08.013
-Kindermans2016_investigating	arxiv:1611.07270
-Knights	doi:10.1111/j.1574-6976.2010.00251.x
-Koh2016_denoising	doi:10.1101/052118
-Koh2017_understanding	arxiv:1703.04730
-Kooi2016_mamm_lesions	doi:10.1016/j.media.2016.07.007
-Kooi2017_mamm_tl	doi:10.1002/mp.12110
-Korfiatis2017	doi:10.1007/s10278-017-0009-z
-Kraus2017_deeploc	doi:10.15252/msb.20177551
-Kresovich2019	doi:10.1093/jnci/djz020
-Krizhevsky2013_nips_cnn	url:https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
-Krizhevsky2014_weird_trick	arxiv:1404.5997
-Kwabi-Addo2007	doi:10.1158/1078-0432.CCR-07-0085
-Khwaja2017	doi:10.1109/BIOCAS.2017.8325078
-Khwaja2018	arxiv:1810.01243
-Lacey2016_dl_fpga	arxiv:1602.04283
-Laird2010	doi:10.1038/nrg2732
-Lakhani2017_radiography	doi:10.1148/radiol.2017162326
-Lanchantin2016_motif	arxiv:1608.03644
-Lee2016_deeptarget	arxiv:1603.09123v2
-Lee2016_emr_oct_amd	doi:10.1101/094276
-Lei2016_rationalizing	arxiv:1606.04155
-Leibig2016_dr	doi:10.1101/084210
-Levy2019	doi:10.1101/692665
-Levy-Jurgenson2018	doi:10.1101/491357
-Li2014_minibatch	doi:10.1145/2623330.2623612
-Li2016_variation	doi:10.1126/science.aad9417
-Liang2015_exprs_cancer	doi:10.1109/TCBB.2014.2377729
-Lin2017_why_dl_works	arxiv:1608.08225v3
-Lipton2016_missing	arxiv:1606.04130
-Lipton2015_lstm	arxiv:1510.07641
-Litjens2016_histopath_survey	doi:10.1038/srep26286
-Litjens2017_medimage_survey	doi:10.1016/j.media.2017.07.005
-Lisboa2006_review	doi:10.1016/j.neunet.2005.10.007
-Liu2013	doi:10.1038/nbt.2487
-Liu	doi:10.1371/journal.pone.0053253
-Liu2016_towards	arxiv:1604.07043
-Liu2016_sc_transcriptome	doi:10.12688/f1000research.7223.1
-Lodato2015_neurons	doi:10.1126/science.aab1785
-Lowe2012_kaggle	url:http://blogs.sciencemag.org/pipeline/archives/2012/12/11/did_kaggle_predict_drug_candidate_activities_or_not
-lmat	doi:10.1093/bioinformatics/btt389
-Lundberg2016_an	arxiv:1611.07478
-Lusci2015_irv	doi:10.1186/s13321-015-0110-6
-Lusci2013_rnn	doi:10.1021/ci400187y
-Ma2015_qsar_merck	doi:10.1021/ci500747n
-Maaten2008_tsne	url:http://www.jmlr.org/papers/v9/vandermaaten08a.html
-Mahendran2014_understanding	arxiv:1412.0035
-Mahendran2016_salient	doi:10.1007/978-3-319-46466-4_8
-Mahendran2016_visualizing	doi:10.1007/s11263-016-0911-8
-Mahmood	doi:10.1016/S0140-6736(13)61752-3
-Mapreduce	doi:10.1145/1327452.1327492
-Mayr2016_deep_tox	doi:10.3389/fenvs.2015.00080
-McHardy	doi:10.1038/nmeth976
-McHardy2	doi:10.7717/peerj.1603
-Meissner2008	doi:10.1038/nature07107
-Metaphlan	doi:10.1038/nmeth.2066
-Meng2016_mllib	arxiv:1505.06807
-Min2016_deepenhancer	doi:10.1109/BIBM.2016.7822593
-Montavon2018_visualization	doi:10.1016/j.dsp.2017.10.011
-Momeni2018	doi:10.1101/438341
-Moritz2015_sparknet	arxiv:1511.06051
-Mordvintsev2015_inceptionism	url:http://googleresearch.blogspot.co.uk/2015/06/inceptionism-going-deeper-into-neural.html
-Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
-matis	doi:10.1016/S0097-8485(96)80015-5
-nbc	doi:10.1093/bioinformatics/btq619
-Murdoch2017_automatic	arxiv:1702.02540
-Murdoch2019	doi:10.1073/pnas.1900654116
-Nazor2012	doi:10.1016/j.stem.2012.02.013
-Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
-Ni2018	doi:10.1101/385849
-Nguyen2014_adversarial	arxiv:1412.1897v4
-Ngiam2011	url:https://ai.stanford.edu/~ang/papers/icml11-MultimodalDeepLearning.pdf
-Nie2016_3d_survival	doi:10.1007/978-3-319-46723-8_25
-NIH2016_genome_cost	url:https://www.genome.gov/27565109/the-cost-of-sequencing-a-human-genome/
-Nih_curiosity	url:https://www.nigms.nih.gov/Education/Documents/curiosity.pdf
-Olivecrona2017_drug_design	arxiv:1704.07555
-Osokin2017_biogan	arxiv:1708.04692
-onecodex	url:https://www.onecodex.com/
-Papernot2017_pate	url:https://openreview.net/forum?id=HkwoSDPgg
-Park2016_deepmirgene	arxiv:1605.00017
-Parnamaa2017	doi:10.1534/g3.116.033654
-Pan2018	doi:10.1101/438218
-Pawlowski2016	doi:10.1101/085118
-Peng2019	doi:10.1101/527044
-Pereira2016_docking	doi:10.1021/acs.jcim.6b00355
-PerezSianes2016_screening	doi:10.1007/978-3-319-40126-3_2
-Phymm	doi:10.1038/nmeth.1358
-Poplin2016_deepvariant	doi:10.1101/092890
-Pratt2016_dr	doi:10.1016/j.procs.2016.07.014
-Quach2017	doi:10.18632/aging.101168
-Quang2017_factor	doi:10.1101/151274
-Qin2017_onehot	doi:10.1371/journal.pcbi.1005403
-Qiu2017_graph_embedding	doi:10.1101/110668
-Qiu2018	doi:10.1101/406066
-Ragoza2016_protein	arxiv:1612.02751
-RAD2010_view_cc	doi:10.1145/1721654.1721672
-Radford_dcgan	arxiv:1511.06434v2
-Rajkomar2017_radiographs	doi:10.1007/s10278-016-9914-9
-Rakhlin2018_histology	doi:10.1101/259911
-Ramsundar2015_multitask_drug	arxiv:1502.02072
-Ranganath2016_deep	arxiv:1608.02158
-Raina2009_gpu	doi:10.1145/1553374.1553486
-Relton2010	doi:10.1371/journal.pmed.1000356
-Ribeiro2016_lime	arxiv:1602.04938
-Robertson2005	doi:10.1038/nrg1655
-Rogers2010_fingerprints	doi:10.1021/ci100050t
-Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
-Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
-Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
-Rudin2019	doi:10.1038/s42256-019-0048-x
-Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
-Sa2015_buckwild	pmcid:PMC4907892
-Salas2018_GR	doi:10.1101/gr.233213.117
-Salas2018	doi:10.1186/s13059-018-1448-7
-Salzberg	doi:10.1186/1471-2105-11-544
-Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
-Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
-Scotti2016_missplicing	doi:10.1038/nrg.2015.3
-Sculley2018	url:https://openreview.net/pdf?id=rJWF0Fywf
-Segata	doi:10.1371/journal.pcbi.1004977
-Segler2017_drug_design	arxiv:1701.01329
-Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
-Setty2015_seqgl	doi:10.1371/journal.pcbi.1004271
-Selvaraju2016_grad	arxiv:1610.02391
-Serden	doi:10.1016/S0168-8510(02)00208-7
-Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
-Shapely	doi:10.1515/9781400881970-018
-Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
-Shen2019	doi:10.1016/j.eswa.2019.01.048
-Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
-Shrikumar2017_learning	arxiv:1704.02685
-Shrikumar2017_reversecomplement	doi:10.1101/103663
-Simonyan2013_deep	arxiv:1312.6034
-Singh2017_attentivechrome	arxiv:1708.00339
-Singh2016_deepchrome	arxiv:1607.02078
-Singh2016_tsk	doi:10.1109/TCBB.2016.2609918
-Silver2016_alphago	doi:10.1038/nature16961
-Sonderby	doi:10.1007/978-3-319-21233-3_6
-Soueidan	doi:10.1515/metgen-2016-0001
-Spark	doi:10.1145/2934664
-Speech_recognition	url:http://www.businessinsider.com/ibm-edges-closer-to-human-speech-recognition-2017-3
-Springenberg2014_striving	arxiv:1412.6806
-Stein2010_cloud	doi:10.1186/gb-2010-11-5-207
-Stenstrom2005_latent	doi:10.2337/diabetes.54.suppl_2.S68
-Stormo2000_dna	doi:10.1093/bioinformatics/16.1.16
-Stratnikov	doi:10.1186/2049-2618-1-11
-Strobelt2016_visual	arxiv:1606.07461
-Su2015_gpu	arxiv:1507.01239
-Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
-Sun2016_ensemble	arxiv:1606.00575
-Sundararajan2017_axiomatic	arxiv:1703.01365
-Sumita2018	doi:10.1021/acscentsci.8b00213
-Sutskever	arxiv:1409.3215
-Swamidass2009_irv	doi:10.1021/ci8004379
-Tan2014_psb	doi:10.1142/9789814644730_0014
-Tan2015_adage	doi:10.1128/mSystems.00025-15
-Tan2016_eadage	doi:10.1101/078659
-TAC-ELM	doi:10.1142/S0219720012500151
-TensorFlow	arxiv:1603.04467
-Teschendorff2017	doi:10.2217/epi-2016-0153
-Tian2019	doi:10.1186/s12864-019-5488-5
-Titus2017	doi:10.1093/hmg/ddx275
-Torracinta2016_deep_snp	doi:10.1101/097469
-Torracinta2016_sim	doi:10.1101/079087
-Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
-Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
-Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
-Vamathevan2019	doi:10.1038/s41573-019-0024-5
-Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
-Vervier	doi:10.1093/bioinformatics/btv683
-Wallach2015_atom_net	arxiv:1510.02855
-Wang2016_breast_cancer	arxiv:1606.05718
-Wang2016_methyl	doi:10.1038/srep19598
-Wang2016_protein_contact	doi:10.1371/journal.pcbi.1005324
-Wasson1985_clinical	doi:10.1056/NEJM198509263131306
-WayGreene2017_eval	arxiv:1711.04828
-WayGreene2017_tybalt	doi:10.1101/174474
-Wilhelm-Benartzi2013	doi:10.1038/bjc.2013.496
-Word2Vec	arxiv:1301.3781
-wgsquikr	doi:10.1371/journal.pone.0091784
-Wu2017_molecule_net	doi:10.1039/C7SC02664A
-Xiang	doi:10.1016/S0167-9473(99)00098-5
-Xiong2011_bayesian	doi:10.1093/bioinformatics/btr444
-Xiong2015_splicing_code	doi:10.1126/science.1254806
-Xu2015_show	arxiv:1502.03044
-Yasushi2016_cgbvs_dnn	doi:10.1002/minf.201600045
-yok	doi:10.1186/1471-2105-12-20
-Yoon2016_cancer_reports	doi:10.1007/978-3-319-47898-2_21
-Yosinski2014	url:https://papers.nips.cc/paper/5347-how-transferable-are-features-in-deep-neural-networks
-Yosinksi2015_understanding	arxiv:1506.06579
-Yu2016_melanoma_resnet	doi:10.1109/TMI.2016.2642839
-Zhavoronkov2019_drugs	doi:10.1038/s41587-019-0224-x
-Zeiler2013_visualizing	doi:10.1007/978-3-319-10590-1_53
-Zeng2015	doi:10.1186/s12859-015-0553-9
-Zeng2016_convolutional	doi:10.1093/bioinformatics/btw255
-Zhang2015_multitask_tl	doi:10.1145/2783258.2783304
-Zhang2017_generalization	arxiv:1611.03530v2
-Zhang2019	doi:10.1186/s12885-019-5932-6
-Zhou2015_deep_sea	doi:10.1038/nmeth.3547
-Zhu2016_advers_mamm	doi:10.1101/095786
-Zhu2016_mult_inst_mamm	doi:10.1101/095794
-Zintgraf2017_visualizing	arxiv:1702.04595
-goodfellow2016deep	url:http://www.deeplearningbook.org/
-li2016joint	url:https://dl.acm.org/citation.cfm?id=3061018
-world2004international	url:http://www.who.int/classifications/icd/en/
-ghahramani_protect	arxiv:1707.02476
-uncertainty_types	arxiv:1703.04977
-uncertainty_multi_task	arxiv:1705.07115
-guo_calibration	arxiv:1706.04599
-platt_scaling	url:http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639
-out_dist_baseline	arxiv:1610.02136
-temp_out_dist	arxiv:1706.02690
-ai_safety	arxiv:1606.06565
-strong_adversary	arxiv:1705.07263
-retinopathy_uncertainty	doi:10.1038/s41598-017-17876-z
-bayesian_hypernets	arxiv:1710.04759
-mcclure_bayesian	arxiv:1611.01639
-uncertainty_ensembles	arxiv:1612.01474
-domain_adapt_uncertainty	arxiv:1505.07818
-gal_thesis	url:http://www.cs.ox.ac.uk/people/yarin.gal/website/thesis/thesis.pdf

From 9a1f92aea84de5f14279c222dc7e8409df12a594 Mon Sep 17 00:00:00 2001
From: Anthony Gitter <agitter@users.noreply.github.com>
Date: Sat, 8 Aug 2020 08:47:02 -0500
Subject: [PATCH 16/22] Convert tags to Markdown format

Adds tags from ebb27b1f6e36717b1b215eb74f1ad19abf7e760a
---
 content/90.back-matter.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/content/90.back-matter.md b/content/90.back-matter.md
index 8c40153d..b338c709 100644
--- a/content/90.back-matter.md
+++ b/content/90.back-matter.md
@@ -70,6 +70,7 @@
 [@tag:Edwards2015_growing_pains]: doi:10.1145/2771283
 [@tag:Ehran2009_visualizing]: url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 [@tag:Elephas]: url:https://github.com/maxpumperla/elephas
+[@tag:Elton2020]: arxiv:2002.05149
 [@tag:Errington2014_reproducibility]: doi:10.7554/eLife.04333
 [@tag:Eser2016_fiddle]: doi:10.1101/081380
 [@tag:Esfahani2016_melanoma]: doi:10.1109/EMBC.2016.7590963
@@ -192,6 +193,7 @@
 [@tag:Moritz2015_sparknet]: arxiv:1511.06051
 [@tag:Mrzelj]: url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
 [@tag:Murdoch2017_automatic]: arxiv:1702.02540
+[@tag:Murdoch2019]: doi:10.1073/pnas.1900654116
 [@tag:NIH2016_genome_cost]: url:https://www.genome.gov/27565109/the-cost-of-sequencing-a-human-genome/
 [@tag:Nazor2012]: doi:10.1016/j.stem.2012.02.013
 [@tag:Nemati2016_rl]: doi:10.1109/EMBC.2016.7591355
@@ -233,6 +235,7 @@
 [@tag:Romero2017_diet]: url:https://openreview.net/pdf?id=Sk-oDY9ge
 [@tag:Rosenberg2015_synthetic_seqs]: doi:10.1016/j.cell.2015.09.054
 [@tag:Roth2015_view_agg_cad]: doi:10.1109/TMI.2015.2482920
+[@tag:Rudin2019]: doi:10.1038/s42256-019-0048-x
 [@tag:Russakovsky2015_imagenet]: doi:10.1007/s11263-015-0816-y
 [@tag:Sa2015_buckwild]: pmcid:PMC4907892
 [@tag:Salas2018]: doi:10.1186/s13059-018-1448-7
@@ -241,6 +244,7 @@
 [@tag:Schatz2010_dna_cloud]: doi:10.1038/nbt0710-691
 [@tag:Schmidhuber2014_dnn_overview]: doi:10.1016/j.neunet.2014.09.003
 [@tag:Scotti2016_missplicing]: doi:10.1038/nrg.2015.3
+[@tag:Sculley2018]: url:https://openreview.net/pdf?id=rJWF0Fywf
 [@tag:Segata]: doi:10.1371/journal.pcbi.1004977
 [@tag:Segler2017_drug_design]: arxiv:1701.01329
 [@tag:Seide2014_parallel]: doi:10.1109/ICASSP.2014.6853593
@@ -250,6 +254,7 @@
 [@tag:Shaham2016_batch_effects]: doi:10.1093/bioinformatics/btx196
 [@tag:Shapely]: doi:10.1515/9781400881970-018
 [@tag:Shen2017_medimg_review]: doi:10.1146/annurev-bioeng-071516-044442
+[@tag:Shen2019]: doi:10.1016/j.eswa.2019.01.048
 [@tag:Shin2016_cad_tl]: doi:10.1109/TMI.2016.2528162
 [@tag:Shrikumar2017_learning]: arxiv:1704.02685
 [@tag:Shrikumar2017_reversecomplement]: doi:10.1101/103663
@@ -270,6 +275,7 @@
 [@tag:Strobelt2016_visual]: arxiv:1606.07461
 [@tag:Su2015_gpu]: arxiv:1507.01239
 [@tag:Subramanian2016_bace1]: doi:10.1021/acs.jcim.6b00290
+[@tag:Sumita2018]: doi:10.1021/acscentsci.8b00213
 [@tag:Sun2016_ensemble]: arxiv:1606.00575
 [@tag:Sundararajan2017_axiomatic]: arxiv:1703.01365
 [@tag:Sutskever]: arxiv:1409.3215
@@ -286,6 +292,7 @@
 [@tag:Torracinta2016_sim]: doi:10.1101/079087
 [@tag:Tu1996_anns]: doi:10.1016/S0895-4356(96)00002-9
 [@tag:Unterthiner2014_screening]: url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
+[@tag:Vamathevan2019]: doi:10.1038/s41573-019-0024-5
 [@tag:Vanhoucke2011_cpu]: url:https://research.google.com/pubs/pub37631.html
 [@tag:Vera2016_sc_analysis]: doi:10.1146/annurev-genet-120215-034854
 [@tag:Vervier]: doi:10.1093/bioinformatics/btv683

From 617ff701c12f050bfc98361a716ec808be0be23e Mon Sep 17 00:00:00 2001
From: "Daniel C. Elton" <delton137@users.noreply.github.com>
Date: Sat, 8 Aug 2020 12:43:28 -0400
Subject: [PATCH 17/22] Apply suggestions from code review

commit agitter's changes

Co-authored-by: Anthony Gitter <agitter@users.noreply.github.com>
---
 content/05.treat.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 164c3c74..6ed4de07 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -180,19 +180,20 @@ However, in the long term, atomic convolutions may ultimately overtake grid-base
 
 #### *De novo* drug design
 
-*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery in-silico [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
+*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery *in silico* [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
 It explores an estimated 10<sup>60</sup> synthesizable organic molecules with drug-like properties without explicit enumeration [@doi:10.1002/wcms.1104].
-To score molecules after generation or during optimization, physics-based simulation could be used [@tag:Sumita2018], but machine learning models based on techniques discussed earlier may be preferable [@tag:Gomezb2016_automatic], as they are much more computationally expedient. Computationally efficiency is particularly important during optimization as the "scoring function" may need to be called thousands of times.
+To score molecules after generation or during optimization, physics-based simulation could be used [@tag:Sumita2018], but machine learning models based on techniques discussed earlier may be preferable [@tag:Gomezb2016_automatic], as they are much more computationally expedient.
+Computational efficiency is particularly important during optimization as the "scoring function" may need to be called thousands of times.
 
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
 These approaches can lead to overfit, "weird" molecules, which are difficult to synthesize in the lab.
-A popular approach which may help ensure synthesizability is to use rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
+A popular approach that may help ensure synthesizability is to use rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
 Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
 In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
 
-In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tag:Vamathevan2019].
+In the past few years, a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including RNNs, VAEs, GANs, and reinforcement learning -- for a review see Elton et al. [@tag:Elton_molecular_design_review] or Vamathevan et al [@tag:Vamathevan2019].
 
-Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
+Building off the large amount of work that has already gone into text generation [@arxiv:1308.0850], many generative neural networks for drug design initially represented chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
 The first successful demonstration of a deep learning based approach for molecular optimization occurred in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
 In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete (e.g. bit vector or string) features or in symbolic, molecular graph space.
@@ -207,8 +208,8 @@ The initial RNN is then fine-tuned to generate molecules that are likely to be a
 Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
 
 Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry.
-Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review]
-A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network [@doi:10.1038/s41598-019-47148-x].
+Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules [@tag:Elton_molecular_design_review].
+A reinforcement learning agent developed by Zhou et al. [@doi:10.1038/s41598-019-47148-x] demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE [add ref], Objective-Reinforced Generative Adversarial Network [add ref], and Graph Convolutional Policy Network [add ref].
 As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
 Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1 [@tag:Zhavoronkov2019_drugs].
 

From 8c431eeb275bc7a1000e7ee1b616e46861c4e313 Mon Sep 17 00:00:00 2001
From: "Daniel C. Elton" <delton137@users.noreply.github.com>
Date: Sat, 8 Aug 2020 14:08:09 -0400
Subject: [PATCH 18/22] Update 05.treat.md

New references:
[@doi:10.1038/s41587-020-0418-2]
[@arxiv:1802.04364]
[@arXiv:1705.10843]
[@arXiv:1806.02473]
[@10.1021/acsmedchemlett.0c00088]
[@doi:10.1021/acs.jcim.0c00174]

note on references - I could not get DOIs for these so had to go with arXiv:
[@arxiv:1802.04364] is published in ICML , see https://dblp.org/rec/bibtex/conf/icml/JinBJ18
[@arXiv:1806.02473] is published in NeurIPS, see https://dblp.uni-trier.de/rec/bibtex/conf/nips/YouLYPL18

Note: another work which led to a synthesized and tested drug molecule is this, from 2018:  https://doi.org/10.1021/acs.molpharmaceut.8b00839.
However, the 2019 work ( Zhavoronkov et al) we discuss got much more attention. The review is already getting a bit "in the weeds"  so I left it out.

this is a recent review (july this year) which people may be interested in. I cited it.
https://pubs.acs.org/doi/pdf/10.1021/acsmedchemlett.0c00088

sorry for any typos that crept in - spellchecker isn't working in Github for some reason.
---
 content/05.treat.md | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 6ed4de07..cfd04e53 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -186,10 +186,9 @@ To score molecules after generation or during optimization, physics-based simula
 Computational efficiency is particularly important during optimization as the "scoring function" may need to be called thousands of times.
 
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
-These approaches can lead to overfit, "weird" molecules, which are difficult to synthesize in the lab.
-A popular approach that may help ensure synthesizability is to use rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
-Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
-In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
+These algorithms use a list of hard-coded rules to perform virtual chemical reactions on molecular structures during each iteration, leading to physically stable and synthesizable molecules [@doi:10.1021/acs.jmedchem.5b01849].
+Deep learning models have been proposed as an alternative.
+In contrast to the classical approaches, in theory generative models learned from big data would not require laboriously encoded expert knowledge to generate realistic, synthesizable molecules.
 
 In the past few years, a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including RNNs, VAEs, GANs, and reinforcement learning -- for a review see Elton et al. [@tag:Elton_molecular_design_review] or Vamathevan et al [@tag:Vamathevan2019].
 
@@ -209,10 +208,19 @@ Both the fine-tuning and reinforcement learning approaches can rediscover known,
 
 Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry.
 Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules [@tag:Elton_molecular_design_review].
-A reinforcement learning agent developed by Zhou et al. [@doi:10.1038/s41598-019-47148-x] demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE [add ref], Objective-Reinforced Generative Adversarial Network [add ref], and Graph Convolutional Policy Network [add ref].
-As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
-Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1 [@tag:Zhavoronkov2019_drugs].
-
-In concluding this section, it is worth pointing out that it has been shown that classical genetic algorithms can compete with some of the most advanced deep learning methods for molecular optimization [@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C].
-Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
-Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
+A reinforcement learning agent developed by Zhou et al. [@doi:10.1038/s41598-019-47148-x] demonstrated superior molecular optimization performance on optimizing  the quantitative estimate of drug-likeness (QED) metric and the "penalized logP" metric (logP minus the synthetic accessibility) when compared with other deep learning based approaches such as the Junction Tree VAE [@arxiv:1802.04364], Objective-Reinforced Generative Adversarial Network [@arXiv:1705.10843], and Graph Convolutional Policy Network [@arXiv:1806.02473].
+As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
+In contrast to most previous work, six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1. One of the molecules was chosen for further testing and showed promising results in a cancer cell line and mouse model [@tag:Zhavoronkov2019_drugs]. 
+
+
+In concluding this section, we want to highlight two areas where work is still needed before AI can bring added value to the existing drug discovery process - novelty and synthesizability.
+The work of Zhavoronkov et al. is a arguably an important milestone and recieved much fanfare in the popular press, but Walters and Murko have presented a more sober assessment, noting that the generated molecule they choose to test in the lab is very similar to an existing drug which was present in their training data [@doi:10.1038/s41587-020-0418-2].
+Small variations of existing molecules are likely not to be much better and may not be patentable.
+One way to tackle this problem is to add novelty and diversity metrics to the reward function of reinforcement learning based algorithms.
+Novelty should also be taken into account when comparing different models - and thus is included in the proposed GuacaMol benchmark (2019) for accessing generative molecules for molecular design [@doi:10.1021/acs.jcim.8b00839].
+The other area which has been pointed to as a key limitation of current approaches is synthesizability [@doi:10.1021/acs.jcim.0c00174,@10.1021/acsmedchemlett.0c00088].
+Current heuristics of synthesizability, such as the synthetic accessibility score, are based on a relatively limited domain of chemical data and are too restrictive, so better models/heuristics of synthesizability should help in this area [doi:10.1021/acs.jcim.0c00174]. 
+
+As noted before, genetic algorithms use hard coded rules based on possible chemical reactions to generate molecular structures and therefore may have less trouble generating synthesizable molecules [@doi:10.1021/acs.jmedchem.5b01849].
+We note in passing that Jensen et al. (2018) [@doi:10.1039/C8SC05372C] and Yoshikawa et al. (2019) [@doi:10.1246/cl.180665] have both demostrated genetic algorithms which are competative with deep learning approaches. 
+Progress on overcoming both of these shortcomings is proceeding on many fronts, and we believe the future of deep learning for molecular design is quite bright. 

From d60d129f99bd6dddbedf058585fc0b38a669e0f6 Mon Sep 17 00:00:00 2001
From: Anthony Gitter <gitter@biostat.wisc.edu>
Date: Sun, 9 Aug 2020 07:49:00 -0500
Subject: [PATCH 19/22] Remove interpretability changes from this pull request

---
 content/06.discussion.md | 50 +++++++++++-----------------------------
 1 file changed, 13 insertions(+), 37 deletions(-)

diff --git a/content/06.discussion.md b/content/06.discussion.md
index 422010d8..5baa3a37 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,7 +3,7 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Preventing overfitting and hyperparameter tuning
+### Customizing deep learning models reflects a tradeoff between bias and variance
 
 Some of the challenges in applying deep learning are shared with other machine learning methods.
 In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize predictivity.
@@ -12,14 +12,8 @@ One way of understanding such model optimizations is that they incorporate exter
 This balance is formally described as a tradeoff between "bias and variance"
 [@tag:goodfellow2016deep].
 
-Although the bias-variance trade-off is is important to take into account with many classical machine learning models, recent empirical and theoretical observations suggest that deep neural networks in particular do not the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
-It has been demonstrated that poor generalizability (test error) can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory.
-This phenomena, known as "double descent" indicates that deep neural networks achieve their best performance when they smoothly interpolate training data - resulting in near zero training error [@tag:Belkin2019_PNAS].
-
-To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
-This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
-Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
-A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018].
+Although the bias-variance tradeoff is common to all machine learning applications, recent empirical and theoretical observations suggest that deep learning models may have uniquely advantageous generalization properties [@tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
+Nevertheless, additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
 
 #### Evaluation metrics for imbalanced classification
 
@@ -112,35 +106,18 @@ As a result, several opportunities for innovation arise: understanding the cause
 Unfortunately, uncertainty quantification techniques are underutilized in the computational biology communities and largely ignored in the current deep learning for biomedicine literature.
 Thus, the practical value of uncertainty quantification in biomedical domains is yet to be appreciated.
 
-### Interpretability
-
-As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to develop methods for interpreting how they function.
-There are several important reasons one might be interested in interpretability, which is also called "explainability".
-
-Firstly, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
-For instance, interpreting a model for predicting chemical properties from molecular graphs may illuminate previously unknown structure-property relations.
-It is also useful to see if a model is using known relationships - if not, this may suggest a way to improve the model.
-Finally, there is a chance that the model may have learned relationships that are known to be wrong.
-This can be due to improper training data or due to overfitting on spurious correlations in the training data.
+### Interpretation
 
-This is particularly important if a model is making medical diagnoses.
+As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable.
+Interpretability matters for two main reasons.
+First, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
+However, this would not be possible if the model is a black box.
+Second, interpretability is important for trust.
+If a model is making medical diagnoses, it is important to ensure the model is making decisions for reliable reasons and is not focusing on an artifact of the data.
 A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
+In the context of deep learning, understanding the basis of a model's output is particularly important as deep learning models are unusually susceptible to adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
 
-It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
-While this is largely still an unsolved problem, the interpretation of deep learning models may help understand these failure modes and how to prevent them.
-
-Several different levels of interpretability can be distinguished.
-Consider a prototypical CNN used for image classification.
-At a high level, one can perform an occlusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap.
-Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing].
-Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
-
-A starting point for many discussions of interpretability is the interpretability-accuracy trade-off.
-The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate.
-This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks, [@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] and new methods for creating highly accurate interpretable models [@tag:Rudin2019].
-Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one, which is sometimes called "post-hoc interpretation".
-For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations which may have been learned by the neural network.
-Along similar lines, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
+As the concept of interpretability is quite broad, many methods described as improving the interpretability of deep learning models take disparate and often complementary approaches.
 
 #### Assigning example-specific importance scores
 
@@ -242,8 +219,7 @@ Towards this end, Che et al. [@tag:Che2015_distill] used gradient boosted trees
 
 Finally, it is sometimes possible to train the model to provide justifications for its predictions.
 Lei et al. [@tag:Lei2016_rationalizing] used a generator to identify "rationales", which are short and coherent pieces of the input text that produce similar results to the whole input when passed through an encoder.
-Shen et al. [@tag:Shen2019] trained a CNN for lung nodule malignancy classification which also provides a series of attributes for the nodule, which they argue help understand how the network functions.
-These are both simple examples of an emerging approach towards engendering trust in AI systems which Elton calls "self-explaining AI" [@tag:Elton2020].
+The authors applied their approach to a sentiment analysis task and obtained substantially superior results compared to an attention-based method.
 
 #### Future outlook
 

From 19e9c80d31fb4a891594c509e69bd701b5159ec2 Mon Sep 17 00:00:00 2001
From: Anthony Gitter <gitter@biostat.wisc.edu>
Date: Sun, 9 Aug 2020 08:00:44 -0500
Subject: [PATCH 20/22] Citation fixes Some tags missed during conversion to
 Markdown link format

---
 content/05.treat.md       | 5 ++---
 content/90.back-matter.md | 5 +++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index cfd04e53..600d5037 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -208,17 +208,16 @@ Both the fine-tuning and reinforcement learning approaches can rediscover known,
 
 Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry.
 Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules [@tag:Elton_molecular_design_review].
-A reinforcement learning agent developed by Zhou et al. [@doi:10.1038/s41598-019-47148-x] demonstrated superior molecular optimization performance on optimizing  the quantitative estimate of drug-likeness (QED) metric and the "penalized logP" metric (logP minus the synthetic accessibility) when compared with other deep learning based approaches such as the Junction Tree VAE [@arxiv:1802.04364], Objective-Reinforced Generative Adversarial Network [@arXiv:1705.10843], and Graph Convolutional Policy Network [@arXiv:1806.02473].
+A reinforcement learning agent developed by Zhou et al. [@doi:10.1038/s41598-019-47148-x] demonstrated superior molecular optimization performance on optimizing  the quantitative estimate of drug-likeness (QED) metric and the "penalized logP" metric (logP minus the synthetic accessibility) when compared with other deep learning based approaches such as the Junction Tree VAE [@arxiv:1802.04364], Objective-Reinforced Generative Adversarial Network [@arxiv:1705.10843], and Graph Convolutional Policy Network [@arxiv:1806.02473].
 As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
 In contrast to most previous work, six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1. One of the molecules was chosen for further testing and showed promising results in a cancer cell line and mouse model [@tag:Zhavoronkov2019_drugs]. 
 
-
 In concluding this section, we want to highlight two areas where work is still needed before AI can bring added value to the existing drug discovery process - novelty and synthesizability.
 The work of Zhavoronkov et al. is a arguably an important milestone and recieved much fanfare in the popular press, but Walters and Murko have presented a more sober assessment, noting that the generated molecule they choose to test in the lab is very similar to an existing drug which was present in their training data [@doi:10.1038/s41587-020-0418-2].
 Small variations of existing molecules are likely not to be much better and may not be patentable.
 One way to tackle this problem is to add novelty and diversity metrics to the reward function of reinforcement learning based algorithms.
 Novelty should also be taken into account when comparing different models - and thus is included in the proposed GuacaMol benchmark (2019) for accessing generative molecules for molecular design [@doi:10.1021/acs.jcim.8b00839].
-The other area which has been pointed to as a key limitation of current approaches is synthesizability [@doi:10.1021/acs.jcim.0c00174,@10.1021/acsmedchemlett.0c00088].
+The other area which has been pointed to as a key limitation of current approaches is synthesizability [@doi:10.1021/acs.jcim.0c00174; @doi:10.1021/acsmedchemlett.0c00088].
 Current heuristics of synthesizability, such as the synthetic accessibility score, are based on a relatively limited domain of chemical data and are too restrictive, so better models/heuristics of synthesizability should help in this area [doi:10.1021/acs.jcim.0c00174]. 
 
 As noted before, genetic algorithms use hard coded rules based on possible chemical reactions to generate molecular structures and therefore may have less trouble generating synthesizable molecules [@doi:10.1021/acs.jmedchem.5b01849].
diff --git a/content/90.back-matter.md b/content/90.back-matter.md
index b338c709..6c085849 100644
--- a/content/90.back-matter.md
+++ b/content/90.back-matter.md
@@ -24,6 +24,7 @@
 [@tag:Baskin2015_drug_disc]: doi:10.1080/17460441.2016.1201262
 [@tag:Baxt1991_myocardial]: doi:10.7326/0003-4819-115-11-843
 [@tag:BeaulieuJones2016_ehr_encode]: doi:10.1016/j.jbi.2016.10.007
+[@tag:Belkin2019_PNAS]: doi:10.1073/pnas.1903070116
 [@tag:Bengio2015_prec]: arxiv:1412.7024
 [@tag:Berezikov2011_mirna]: doi:10.1038/nrg3079
 [@tag:Bergstra2011_hyper]: url:https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
@@ -70,6 +71,7 @@
 [@tag:Edwards2015_growing_pains]: doi:10.1145/2771283
 [@tag:Ehran2009_visualizing]: url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 [@tag:Elephas]: url:https://github.com/maxpumperla/elephas
+[@tag:Elton_molecular_design_review]: doi:10.1039/C9ME00039A
 [@tag:Elton2020]: arxiv:2002.05149
 [@tag:Errington2014_reproducibility]: doi:10.7554/eLife.04333
 [@tag:Eser2016_fiddle]: doi:10.1101/081380
@@ -81,6 +83,7 @@
 [@tag:Finnegan2017_maximum]: doi:10.1101/105957
 [@tag:Fong2017_perturb]: doi:10.1109/ICCV.2017.371
 [@tag:Fraga2005]: doi:10.1073/pnas.0500398102
+[@tag:Frosst2017_distilling]: arxiv:1711.09784
 [@tag:Fu2019]: doi:10.1109/TCBB.2019.2909237
 [@tag:Gal2015_dropout]: arxiv:1506.02142
 [@tag:Gargeya2017_dr]: doi:10.1016/j.ophtha.2017.02.008
@@ -189,6 +192,7 @@
 [@tag:Metaphlan]: doi:10.1038/nmeth.2066
 [@tag:Min2016_deepenhancer]: doi:10.1109/BIBM.2016.7822593
 [@tag:Momeni2018]: doi:10.1101/438341
+[@tag:Montavon2018_visualization]: doi:10.1016/j.dsp.2017.10.011
 [@tag:Mordvintsev2015_inceptionism]: url:http://googleresearch.blogspot.co.uk/2015/06/inceptionism-going-deeper-into-neural.html
 [@tag:Moritz2015_sparknet]: arxiv:1511.06051
 [@tag:Mrzelj]: url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
@@ -320,6 +324,7 @@
 [@tag:Zhang2015_multitask_tl]: doi:10.1145/2783258.2783304
 [@tag:Zhang2017_generalization]: arxiv:1611.03530v2
 [@tag:Zhang2019]: doi:10.1186/s12885-019-5932-6
+[@tag:Zhavoronkov2019_drugs]: doi:10.1038/s41587-019-0224-x
 [@tag:Zhou2015_deep_sea]: doi:10.1038/nmeth.3547
 [@tag:Zhu2016_advers_mamm]: doi:10.1101/095786
 [@tag:Zhu2016_mult_inst_mamm]: doi:10.1101/095794

From 797550741106f444eccba21855ccbdddd9750da9 Mon Sep 17 00:00:00 2001
From: "Daniel C. Elton" <delton137@users.noreply.github.com>
Date: Sun, 9 Aug 2020 11:01:13 -0400
Subject: [PATCH 21/22] Apply suggestions from code review

agitter's copyedits

Co-authored-by: Anthony Gitter <agitter@users.noreply.github.com>
---
 content/05.treat.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 600d5037..9b133022 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -194,7 +194,7 @@ In the past few years, a large number of techniques for the generative modeling
 
 Building off the large amount of work that has already gone into text generation [@arxiv:1308.0850], many generative neural networks for drug design initially represented chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
-The first successful demonstration of a deep learning based approach for molecular optimization occurred in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
+The first successful demonstration of a deep learning based approach for molecular optimization occurred in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules [@tag:Gomezb2016_automatic].
 In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete (e.g. bit vector or string) features or in symbolic, molecular graph space.
 Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space.
 The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
@@ -208,18 +208,19 @@ Both the fine-tuning and reinforcement learning approaches can rediscover known,
 
 Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry.
 Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules [@tag:Elton_molecular_design_review].
-A reinforcement learning agent developed by Zhou et al. [@doi:10.1038/s41598-019-47148-x] demonstrated superior molecular optimization performance on optimizing  the quantitative estimate of drug-likeness (QED) metric and the "penalized logP" metric (logP minus the synthetic accessibility) when compared with other deep learning based approaches such as the Junction Tree VAE [@arxiv:1802.04364], Objective-Reinforced Generative Adversarial Network [@arxiv:1705.10843], and Graph Convolutional Policy Network [@arxiv:1806.02473].
+A reinforcement learning agent developed by Zhou et al. [@doi:10.1038/s41598-019-47148-x] demonstrated superior molecular optimization performance on optimizing the quantitative estimate of drug-likeness (QED) metric and the "penalized logP" metric (logP minus the synthetic accessibility) when compared with other deep learning based approaches such as the Junction Tree VAE [@arxiv:1802.04364], Objective-Reinforced Generative Adversarial Network [@arxiv:1705.10843], and Graph Convolutional Policy Network [@arxiv:1806.02473].
 As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
-In contrast to most previous work, six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1. One of the molecules was chosen for further testing and showed promising results in a cancer cell line and mouse model [@tag:Zhavoronkov2019_drugs]. 
+In contrast to most previous work, six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1.
+One of the molecules was chosen for further testing and showed promising results in a cancer cell line and mouse model [@tag:Zhavoronkov2019_drugs]. 
 
-In concluding this section, we want to highlight two areas where work is still needed before AI can bring added value to the existing drug discovery process - novelty and synthesizability.
-The work of Zhavoronkov et al. is a arguably an important milestone and recieved much fanfare in the popular press, but Walters and Murko have presented a more sober assessment, noting that the generated molecule they choose to test in the lab is very similar to an existing drug which was present in their training data [@doi:10.1038/s41587-020-0418-2].
+In concluding this section, we want to highlight two areas where work is still needed before AI can bring added value to the existing drug discovery process---novelty and synthesizability.
+The work of Zhavoronkov et al. is arguably an important milestone and received much fanfare in the popular press, but Walters and Murko have presented a more sober assessment, noting that the generated molecule they choose to test in the lab is very similar to an existing drug that was present in their training data [@doi:10.1038/s41587-020-0418-2].
 Small variations of existing molecules are likely not to be much better and may not be patentable.
 One way to tackle this problem is to add novelty and diversity metrics to the reward function of reinforcement learning based algorithms.
-Novelty should also be taken into account when comparing different models - and thus is included in the proposed GuacaMol benchmark (2019) for accessing generative molecules for molecular design [@doi:10.1021/acs.jcim.8b00839].
+Novelty should also be taken into account when comparing different models---and thus is included in the proposed GuacaMol benchmark (2019) for accessing generative molecules for molecular design [@doi:10.1021/acs.jcim.8b00839].
 The other area which has been pointed to as a key limitation of current approaches is synthesizability [@doi:10.1021/acs.jcim.0c00174; @doi:10.1021/acsmedchemlett.0c00088].
-Current heuristics of synthesizability, such as the synthetic accessibility score, are based on a relatively limited domain of chemical data and are too restrictive, so better models/heuristics of synthesizability should help in this area [doi:10.1021/acs.jcim.0c00174]. 
+Current heuristics of synthesizability, such as the synthetic accessibility score, are based on a relatively limited domain of chemical data and are too restrictive, so better models of synthesizability should help in this area [@doi:10.1021/acs.jcim.0c00174]. 
 
-As noted before, genetic algorithms use hard coded rules based on possible chemical reactions to generate molecular structures and therefore may have less trouble generating synthesizable molecules [@doi:10.1021/acs.jmedchem.5b01849].
-We note in passing that Jensen et al. (2018) [@doi:10.1039/C8SC05372C] and Yoshikawa et al. (2019) [@doi:10.1246/cl.180665] have both demostrated genetic algorithms which are competative with deep learning approaches. 
+As noted before, genetic algorithms use hard-coded rules based on possible chemical reactions to generate molecular structures and therefore may have less trouble generating synthesizable molecules [@doi:10.1021/acs.jmedchem.5b01849].
+We note in passing that Jensen (2018) [@doi:10.1039/C8SC05372C] and Yoshikawa et al. (2019) [@doi:10.1246/cl.180665] have both demonstrated genetic algorithms that are competitive with deep learning approaches. 
 Progress on overcoming both of these shortcomings is proceeding on many fronts, and we believe the future of deep learning for molecular design is quite bright. 

From a88f0b69485414bb223d8111a4971be6f00234f1 Mon Sep 17 00:00:00 2001
From: Anthony Gitter <agitter@users.noreply.github.com>
Date: Sun, 9 Aug 2020 14:58:50 -0500
Subject: [PATCH 22/22] Update content/05.treat.md

---
 content/05.treat.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 9b133022..dede1a2d 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -190,7 +190,7 @@ These algorithms use a list of hard-coded rules to perform virtual chemical reac
 Deep learning models have been proposed as an alternative.
 In contrast to the classical approaches, in theory generative models learned from big data would not require laboriously encoded expert knowledge to generate realistic, synthesizable molecules.
 
-In the past few years, a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including RNNs, VAEs, GANs, and reinforcement learning -- for a review see Elton et al. [@tag:Elton_molecular_design_review] or Vamathevan et al [@tag:Vamathevan2019].
+In the past few years, a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including RNNs, VAEs, GANs, and reinforcement learning---for a review see Elton et al. [@tag:Elton_molecular_design_review] or Vamathevan et al. [@tag:Vamathevan2019].
 
 Building off the large amount of work that has already gone into text generation [@arxiv:1308.0850], many generative neural networks for drug design initially represented chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].