[showyourwork]

lamalab-org · Mar 31, 2024 · a4691ff · a4691ff
1 parent 87f514b
commit a4691ff
Show file tree

Hide file tree

Showing 8 changed files with 8 additions and 3 deletions.
diff --git a/src/tex/acronymns.tex b/src/tex/acronymns.tex
@@ -13,4 +13,4 @@
 \newacronym{smiles}{SMILES}{Simplified Molecular Input Line-Entry System}
 \newacronym{pca}{PCA}{Principal Component Analysis}
 \newacronym{iupac}{IUPAC}{International Union of Pure and Applied Chemistry}
-\newacronym{json}{JSON}{JavaScript Object Notation}
+\newacronym{json}{JSON}{JavaScript Object Notation}
diff --git a/src/tex/appendix.tex b/src/tex/appendix.tex
@@ -268,4 +268,4 @@ \subsection{Leaderboard}
 
 \clearpage
 
-\printnoidxglossary[type=\acronymtype, nonumberlist]  % https://github.com/tectonic-typesetting/tectonic/issues/704
+\printnoidxglossary[type=\acronymtype, nonumberlist]  % https://github.com/tectonic-typesetting/tectonic/issues/704
diff --git a/src/tex/authors.tex b/src/tex/authors.tex
@@ -109,3 +109,4 @@
 
 \affil[\Letter]{\texttt{mail@kjablonka.com}}
 \affil[$\star$]{These authors contributed equally.}
+
diff --git a/src/tex/ms.tex b/src/tex/ms.tex
@@ -12,6 +12,7 @@
 \begin{document}
 \maketitle
 
+
 \clearpage
 \begin{abstract}
     Large language models (LLMs) have gained widespread interest due to their ability to process human language and perform tasks on which they have not been explicitly trained. 

diff --git a/src/tex/references.bib b/src/tex/references.bib
@@ -1220,4 +1220,4 @@ @article{yao2022react
   eprinttype = {arXiv},
   title = {React: Synergizing reasoning and acting in language models},
   date = {2022},
-}
+}
diff --git a/src/tex/sections/manually_sources_table.tex b/src/tex/sections/manually_sources_table.tex
@@ -36,3 +36,4 @@
        & Lab safety quizzes based on various sources && \variable{output/question_count_per_dir/json_file_counts_sci_lab_safety_test.txt} + \variable{output/question_count_per_dir/json_file_counts_lab_safety.txt} + \variable{output/question_count_per_dir/json_file_counts_stolaf.txt} + \variable{output/question_count_per_dir/json_file_counts_chemical_safety_mcq_exam.txt} + \variable{output/question_count_per_dir/json_file_counts_anderson.txt}\\
 \bottomrule
 \end{xltabular}
+
diff --git a/src/tex/sections/parse_check_desc.tex b/src/tex/sections/parse_check_desc.tex
@@ -4,3 +4,4 @@
 We selected a large, diverse subset of questions (10 per topic for all model reports) and manually investigated where the parsed output does not match the actual answer intended by the model.
 We found that for \glstext{mcq} questions, the parsing was accurate in 99.76\% of the cases, while for floating point questions, the parsing was accurate in 99.17\% of the cases.
 The models most frequently generating errors are pplx-7b-chat and Mixtral-8x7b.
+
diff --git a/src/tex/sections/semi_programatically_sources_table.tex b/src/tex/sections/semi_programatically_sources_table.tex
@@ -28,3 +28,4 @@
 \bottomrule
 \end{xltabular}
 
+