diff --git a/joss.06452/10.21105.joss.06452.crossref.xml b/joss.06452/10.21105.joss.06452.crossref.xml
new file mode 100644
index 0000000000..e0c3b40e97
--- /dev/null
+++ b/joss.06452/10.21105.joss.06452.crossref.xml
@@ -0,0 +1,214 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<doi_batch xmlns="http://www.crossref.org/schema/5.3.1"
+           xmlns:ai="http://www.crossref.org/AccessIndicators.xsd"
+           xmlns:rel="http://www.crossref.org/relations.xsd"
+           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+           version="5.3.1"
+           xsi:schemaLocation="http://www.crossref.org/schema/5.3.1 http://www.crossref.org/schemas/crossref5.3.1.xsd">
+  <head>
+    <doi_batch_id>20240520T154923-4a4e413a1a67cb30a2cb88f7f631936219ebd363</doi_batch_id>
+    <timestamp>20240520154923</timestamp>
+    <depositor>
+      <depositor_name>JOSS Admin</depositor_name>
+      <email_address>admin@theoj.org</email_address>
+    </depositor>
+    <registrant>The Open Journal</registrant>
+  </head>
+  <body>
+    <journal>
+      <journal_metadata>
+        <full_title>Journal of Open Source Software</full_title>
+        <abbrev_title>JOSS</abbrev_title>
+        <issn media_type="electronic">2475-9066</issn>
+        <doi_data>
+          <doi>10.21105/joss</doi>
+          <resource>https://joss.theoj.org</resource>
+        </doi_data>
+      </journal_metadata>
+      <journal_issue>
+        <publication_date media_type="online">
+          <month>05</month>
+          <year>2024</year>
+        </publication_date>
+        <journal_volume>
+          <volume>9</volume>
+        </journal_volume>
+        <issue>97</issue>
+      </journal_issue>
+      <journal_article publication_type="full_text">
+        <titles>
+          <title>Jury: A Comprehensive Evaluation Toolkit</title>
+        </titles>
+        <contributors>
+          <person_name sequence="first" contributor_role="author">
+            <given_name>Devrim</given_name>
+            <surname>Cavusoglu</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Secil</given_name>
+            <surname>Sen</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Ulas</given_name>
+            <surname>Sert</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Sinan</given_name>
+            <surname>Altinuc</surname>
+          </person_name>
+        </contributors>
+        <publication_date>
+          <month>05</month>
+          <day>20</day>
+          <year>2024</year>
+        </publication_date>
+        <pages>
+          <first_page>6452</first_page>
+        </pages>
+        <publisher_item>
+          <identifier id_type="doi">10.21105/joss.06452</identifier>
+        </publisher_item>
+        <ai:program name="AccessIndicators">
+          <ai:license_ref applies_to="vor">http://creativecommons.org/licenses/by/4.0/</ai:license_ref>
+          <ai:license_ref applies_to="am">http://creativecommons.org/licenses/by/4.0/</ai:license_ref>
+          <ai:license_ref applies_to="tdm">http://creativecommons.org/licenses/by/4.0/</ai:license_ref>
+        </ai:program>
+        <rel:program>
+          <rel:related_item>
+            <rel:description>Software archive</rel:description>
+            <rel:inter_work_relation relationship-type="references" identifier-type="doi">10.5281/zenodo.11170894</rel:inter_work_relation>
+          </rel:related_item>
+          <rel:related_item>
+            <rel:description>GitHub review issue</rel:description>
+            <rel:inter_work_relation relationship-type="hasReview" identifier-type="uri">https://github.com/openjournals/joss-reviews/issues/6452</rel:inter_work_relation>
+          </rel:related_item>
+        </rel:program>
+        <doi_data>
+          <doi>10.21105/joss.06452</doi>
+          <resource>https://joss.theoj.org/papers/10.21105/joss.06452</resource>
+          <collection property="text-mining">
+            <item>
+              <resource mime_type="application/pdf">https://joss.theoj.org/papers/10.21105/joss.06452.pdf</resource>
+            </item>
+          </collection>
+        </doi_data>
+        <citation_list>
+          <citation key="sharma2017nlgeval">
+            <article_title>Relevance of unsupervised metrics in
+task-oriented dialogue for evaluating natural language
+generation</article_title>
+            <author>Sharma</author>
+            <journal_title>CoRR</journal_title>
+            <volume>abs/1706.09799</volume>
+            <cYear>2017</cYear>
+            <unstructured_citation>Sharma, S., El Asri, L., Schulz, H.,
+&amp; Zumer, J. (2017). Relevance of unsupervised metrics in
+task-oriented dialogue for evaluating natural language generation. CoRR,
+abs/1706.09799. http://arxiv.org/abs/1706.09799</unstructured_citation>
+          </citation>
+          <citation key="barrault-etal-2020-findings">
+            <article_title>Findings of the 2020 conference on machine
+translation (WMT20)</article_title>
+            <author>Barrault</author>
+            <journal_title>Proceedings of the fifth conference on
+machine translation</journal_title>
+            <cYear>2020</cYear>
+            <unstructured_citation>Barrault, L., Biesialska, M., Bojar,
+O., Costa-jussà, M. R., Federmann, C., Graham, Y., Grundkiewicz, R.,
+Haddow, B., Huck, M., Joanis, E., Kocmi, T., Koehn, P., Lo, C.,
+Ljubešić, N., Monz, C., Morishita, M., Nagata, M., Nakazawa, T., Pal,
+S., … Zampieri, M. (2020). Findings of the 2020 conference on machine
+translation (WMT20). Proceedings of the Fifth Conference on Machine
+Translation, 1–55.
+https://aclanthology.org/2020.wmt-1.1</unstructured_citation>
+          </citation>
+          <citation key="wang-etal-2018-glue">
+            <article_title>GLUE: A multi-task benchmark and analysis
+platform for natural language understanding</article_title>
+            <author>Wang</author>
+            <journal_title>Proceedings of the 2018 EMNLP workshop
+BlackboxNLP: Analyzing and interpreting neural networks for
+NLP</journal_title>
+            <doi>10.18653/v1/W18-5446</doi>
+            <cYear>2018</cYear>
+            <unstructured_citation>Wang, A., Singh, A., Michael, J.,
+Hill, F., Levy, O., &amp; Bowman, S. (2018). GLUE: A multi-task
+benchmark and analysis platform for natural language understanding.
+Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and
+Interpreting Neural Networks for NLP, 353–355.
+https://doi.org/10.18653/v1/W18-5446</unstructured_citation>
+          </citation>
+          <citation key="devlin-etal-2019-bert">
+            <article_title>BERT: Pre-training of deep bidirectional
+transformers for language understanding</article_title>
+            <author>Devlin</author>
+            <journal_title>Proceedings of the 2019 conference of the
+north American chapter of the association for computational linguistics:
+Human language technologies, volume 1 (long and short
+papers)</journal_title>
+            <doi>10.18653/v1/N19-1423</doi>
+            <cYear>2019</cYear>
+            <unstructured_citation>Devlin, J., Chang, M.-W., Lee, K.,
+&amp; Toutanova, K. (2019). BERT: Pre-training of deep bidirectional
+transformers for language understanding. Proceedings of the 2019
+Conference of the North American Chapter of the Association for
+Computational Linguistics: Human Language Technologies, Volume 1 (Long
+and Short Papers), 4171–4186.
+https://doi.org/10.18653/v1/N19-1423</unstructured_citation>
+          </citation>
+          <citation key="zhilin-etal-2019-xlnet">
+            <article_title>XLNet: Generalized autoregressive pretraining
+for language understanding</article_title>
+            <author>Yang</author>
+            <journal_title>Advances in neural information processing
+systems</journal_title>
+            <volume>32</volume>
+            <cYear>2019</cYear>
+            <unstructured_citation>Yang, Z., Dai, Z., Yang, Y.,
+Carbonell, J., Salakhutdinov, R. R., &amp; Le, Q. V. (2019). XLNet:
+Generalized autoregressive pretraining for language understanding. In H.
+Wallach, H. Larochelle, A. Beygelzimer, F. dAlché-Buc, E. Fox, &amp; R.
+Garnett (Eds.), Advances in neural information processing systems (Vol.
+32). Curran Associates, Inc.
+https://proceedings.neurips.cc/paper_files/paper/2019/file/dc6a7e655d7e5840e66733e9ee67cc69-Paper.pdf</unstructured_citation>
+          </citation>
+          <citation key="lhoest-etal-2021-datasets">
+            <article_title>Datasets: A community library for natural
+language processing</article_title>
+            <author>Lhoest</author>
+            <journal_title>Proceedings of the 2021 conference on
+empirical methods in natural language processing: System
+demonstrations</journal_title>
+            <doi>10.18653/v1/2021.emnlp-demo.21</doi>
+            <cYear>2021</cYear>
+            <unstructured_citation>Lhoest, Q., Villanova del Moral, A.,
+Jernite, Y., Thakur, A., Platen, P. von, Patil, S., Chaumond, J., Drame,
+M., Plu, J., Tunstall, L., Davison, J., Šaško, M., Chhablani, G., Malik,
+B., Brandeis, S., Le Scao, T., Sanh, V., Xu, C., Patry, N., … Wolf, T.
+(2021). Datasets: A community library for natural language processing.
+Proceedings of the 2021 Conference on Empirical Methods in Natural
+Language Processing: System Demonstrations, 175–184.
+https://doi.org/10.18653/v1/2021.emnlp-demo.21</unstructured_citation>
+          </citation>
+          <citation key="papineni-etal-2002-bleu">
+            <article_title>Bleu: A method for automatic evaluation of
+machine translation</article_title>
+            <author>Papineni</author>
+            <journal_title>Proceedings of the 40th annual meeting of the
+association for computational linguistics</journal_title>
+            <doi>10.3115/1073083.1073135</doi>
+            <cYear>2002</cYear>
+            <unstructured_citation>Papineni, K., Roukos, S., Ward, T.,
+&amp; Zhu, W.-J. (2002). Bleu: A method for automatic evaluation of
+machine translation. Proceedings of the 40th Annual Meeting of the
+Association for Computational Linguistics, 311–318.
+https://doi.org/10.3115/1073083.1073135</unstructured_citation>
+          </citation>
+        </citation_list>
+      </journal_article>
+    </journal>
+  </body>
+</doi_batch>
diff --git a/joss.06452/10.21105.joss.06452.pdf b/joss.06452/10.21105.joss.06452.pdf
new file mode 100644
index 0000000000..b312e4ec83
Binary files /dev/null and b/joss.06452/10.21105.joss.06452.pdf differ
diff --git a/joss.06452/paper.jats/10.21105.joss.06452.jats b/joss.06452/paper.jats/10.21105.joss.06452.jats
new file mode 100644
index 0000000000..95e323f262
--- /dev/null
+++ b/joss.06452/paper.jats/10.21105.joss.06452.jats
@@ -0,0 +1,391 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN"
+                  "JATS-publishing1.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
+<front>
+<journal-meta>
+<journal-id></journal-id>
+<journal-title-group>
+<journal-title>Journal of Open Source Software</journal-title>
+<abbrev-journal-title>JOSS</abbrev-journal-title>
+</journal-title-group>
+<issn publication-format="electronic">2475-9066</issn>
+<publisher>
+<publisher-name>Open Journals</publisher-name>
+</publisher>
+</journal-meta>
+<article-meta>
+<article-id pub-id-type="publisher-id">6452</article-id>
+<article-id pub-id-type="doi">10.21105/joss.06452</article-id>
+<title-group>
+<article-title>Jury: A Comprehensive Evaluation Toolkit</article-title>
+</title-group>
+<contrib-group>
+<contrib contrib-type="author" equal-contrib="yes" corresp="yes">
+<name>
+<surname>Cavusoglu</surname>
+<given-names>Devrim</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+<xref ref-type="aff" rid="aff-2"/>
+<xref ref-type="corresp" rid="cor-1"><sup>*</sup></xref>
+</contrib>
+<contrib contrib-type="author" equal-contrib="yes">
+<name>
+<surname>Sen</surname>
+<given-names>Secil</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+<xref ref-type="aff" rid="aff-3"/>
+</contrib>
+<contrib contrib-type="author">
+<name>
+<surname>Sert</surname>
+<given-names>Ulas</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author">
+<name>
+<surname>Altinuc</surname>
+<given-names>Sinan</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+<xref ref-type="aff" rid="aff-2"/>
+</contrib>
+<aff id="aff-1">
+<institution-wrap>
+<institution>OBSS AI</institution>
+</institution-wrap>
+</aff>
+<aff id="aff-2">
+<institution-wrap>
+<institution>Middle East Technical University</institution>
+</institution-wrap>
+</aff>
+<aff id="aff-3">
+<institution-wrap>
+<institution>Bogazici University</institution>
+</institution-wrap>
+</aff>
+</contrib-group>
+<author-notes>
+<corresp id="cor-1">* E-mail: <email></email></corresp>
+</author-notes>
+<pub-date date-type="pub" publication-format="electronic" iso-8601-date="2024-01-23">
+<day>23</day>
+<month>1</month>
+<year>2024</year>
+</pub-date>
+<volume>9</volume>
+<issue>97</issue>
+<fpage>6452</fpage>
+<permissions>
+<copyright-statement>Authors of papers retain copyright and release the
+work under a Creative Commons Attribution 4.0 International License (CC
+BY 4.0)</copyright-statement>
+<copyright-year>2022</copyright-year>
+<copyright-holder>The article authors</copyright-holder>
+<license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
+<license-p>Authors of papers retain copyright and release the work under
+a Creative Commons Attribution 4.0 International License (CC BY
+4.0)</license-p>
+</license>
+</permissions>
+<kwd-group kwd-group-type="author">
+<kwd>Python</kwd>
+<kwd>natural-language-generation</kwd>
+<kwd>evaluation</kwd>
+<kwd>metrics</kwd>
+<kwd>natural-language-processing</kwd>
+</kwd-group>
+</article-meta>
+</front>
+<body>
+<sec id="summary">
+  <title>Summary</title>
+  <p>Evaluation plays a critical role in deep learning as a fundamental
+  block of any prediction-based system. However, the vast number of
+  Natural Language Processing (NLP) tasks and the development of various
+  metrics have led to challenges in evaluating different systems with
+  different metrics. To address these challenges, we introduce jury, a
+  toolkit that provides a unified evaluation framework with standardized
+  structures for performing evaluation across different tasks and
+  metrics. The objective of jury is to standardize and improve metric
+  evaluation for all systems and aid the community in overcoming the
+  challenges in evaluation. Since its open-source release, jury has
+  reached a wide audience and is publicly available.</p>
+</sec>
+<sec id="statement-of-need">
+  <title>Statement of need</title>
+  <p>NLP tasks possess inherent complexity, requiring a comprehensive
+  evaluation of model performance beyond a single metric comparison.
+  Established benchmarks such as WMT
+  (<xref alt="Barrault et al., 2020" rid="ref-barrault-etal-2020-findings" ref-type="bibr">Barrault
+  et al., 2020</xref>) and GLUE
+  (<xref alt="Wang et al., 2018" rid="ref-wang-etal-2018-glue" ref-type="bibr">Wang
+  et al., 2018</xref>) rely on multiple metrics to evaluate models on
+  standardized datasets. This practice promotes fair comparisons across
+  different models and pushes advancements in the field. Embracing
+  multiple metric evaluations provides valuable insights into a model’s
+  generalization capabilities. By considering diverse metrics, such as
+  accuracy, F1 score, BLEU, and ROUGE, researchers gain a holistic
+  understanding of a model’s response to never-seen inputs and its
+  ability to generalize effectively. Furthermore, task-specific NLP
+  metrics enable the assessment of additional dimensions, such as
+  readability, fluency, and coherence. The comprehensive evaluation
+  facilitated by multiple metric analysis allows for trade-off studies
+  and aids in assessing generalization for task-independent models.
+  Given these numerous advantages, NLP specialists lean towards
+  employing multiple metric evaluations.</p>
+  <p>Although employing multiple metric evaluation is common, there is a
+  challenge in practical use because widely-used metric libraries lack
+  support for combined and/or concurrent metric computations.
+  Consequently, researchers face the burden of evaluating their models
+  per metric, a process exacerbated by the scale and complexity of
+  recent models and limited hardware capabilities. This bottleneck
+  impedes the efficient assessment of NLP models and highlights the need
+  for enhanced tooling in the metric computation for convenient
+  evaluation. In order for concurrency to be beneficial at a maximum
+  level, the system may require hardware accordingly. Having said that,
+  the availability of the hardware comes into question.</p>
+  <p>The extent of achievable concurrency in NLP research has
+  traditionally relied upon the availability of hardware resources
+  accessible to researchers. However, significant advancements have
+  occurred in recent years, resulting in a notable reduction in the cost
+  of high-end hardware, including multi-core CPUs and GPUs. This
+  progress has transformed high-performance computing resources, which
+  were once prohibitively expensive and predominantly confined to
+  specific institutions or research labs, into more accessible and
+  affordable assets. For instance, in BERT
+  (<xref alt="Devlin et al., 2019" rid="ref-devlin-etal-2019-bert" ref-type="bibr">Devlin
+  et al., 2019</xref>) and XLNet
+  (<xref alt="Yang et al., 2019" rid="ref-zhilin-etal-2019-xlnet" ref-type="bibr">Yang
+  et al., 2019</xref>), it is stated that they leveraged the training
+  process by using powerful yet cost-effective hardware resources. Those
+  advancements show that the previously constraining factor for hardware
+  accessibility has been mitigated, allowing researchers to overcome the
+  limitations associated with achieving concurrent processing
+  capabilities in NLP research.</p>
+  <p>To ease the use of automatic metrics in NLG research, several
+  hands-on libraries have been developed such as
+  <italic>nlg-eval</italic>
+  (<xref alt="Sharma et al., 2017" rid="ref-sharma2017nlgeval" ref-type="bibr">Sharma
+  et al., 2017</xref>) and <italic>datasets/metrics</italic>
+  (<xref alt="Lhoest et al., 2021" rid="ref-lhoest-etal-2021-datasets" ref-type="bibr">Lhoest
+  et al., 2021</xref>) (now as <italic>evaluate</italic>). Although
+  those libraries cover widely-used NLG metrics, they don’t allow using
+  multiple metrics in one go (i.e. combined evaluation), or they provide
+  a crude way of doing so if they do. Those libraries restrict their
+  users to compute each metric sequentially if users want to evaluate
+  their models with multiple metrics which is time-consuming. Aside from
+  this, there are a few problems in the libraries that support combined
+  evaluation such as individual metric construction and passing compute
+  time arguments (e.g. n-gram for BLEU
+  (<xref alt="Papineni et al., 2002" rid="ref-papineni-etal-2002-bleu" ref-type="bibr">Papineni
+  et al., 2002</xref>)), etc. Our system provides an effective
+  computation framework and overcomes the aforementioned challenges.</p>
+  <p>We designed a system that enables the creation of user-defined
+  metrics with a unified structure and the usage of multiple metrics in
+  the evaluation process. Our library also utilizes the
+  <italic>datasets</italic> package to promote open-source contribution;
+  when users implement metrics, the implementation can be contributed to
+  the <italic>datasets</italic> package. Any new metric released by the
+  <italic>datasets</italic> package will be readily available in our
+  library as well.</p>
+</sec>
+<sec id="acknowledgements">
+  <title>Acknowledgements</title>
+  <p>We would also like to express our appreciation to Cemil Cengiz for
+  fruitful discussions.</p>
+</sec>
+</body>
+<back>
+<ref-list>
+  <ref id="ref-sharma2017nlgeval">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Sharma</surname><given-names>Shikhar</given-names></name>
+        <name><surname>El Asri</surname><given-names>Layla</given-names></name>
+        <name><surname>Schulz</surname><given-names>Hannes</given-names></name>
+        <name><surname>Zumer</surname><given-names>Jeremie</given-names></name>
+      </person-group>
+      <article-title>Relevance of unsupervised metrics in task-oriented dialogue for evaluating natural language generation</article-title>
+      <source>CoRR</source>
+      <year iso-8601-date="2017">2017</year>
+      <volume>abs/1706.09799</volume>
+      <uri>http://arxiv.org/abs/1706.09799</uri>
+    </element-citation>
+  </ref>
+  <ref id="ref-barrault-etal-2020-findings">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Barrault</surname><given-names>Loı̈c</given-names></name>
+        <name><surname>Biesialska</surname><given-names>Magdalena</given-names></name>
+        <name><surname>Bojar</surname><given-names>Ondřej</given-names></name>
+        <name><surname>Costa-jussà</surname><given-names>Marta R.</given-names></name>
+        <name><surname>Federmann</surname><given-names>Christian</given-names></name>
+        <name><surname>Graham</surname><given-names>Yvette</given-names></name>
+        <name><surname>Grundkiewicz</surname><given-names>Roman</given-names></name>
+        <name><surname>Haddow</surname><given-names>Barry</given-names></name>
+        <name><surname>Huck</surname><given-names>Matthias</given-names></name>
+        <name><surname>Joanis</surname><given-names>Eric</given-names></name>
+        <name><surname>Kocmi</surname><given-names>Tom</given-names></name>
+        <name><surname>Koehn</surname><given-names>Philipp</given-names></name>
+        <name><surname>Lo</surname><given-names>Chi-kiu</given-names></name>
+        <name><surname>Ljubešić</surname><given-names>Nikola</given-names></name>
+        <name><surname>Monz</surname><given-names>Christof</given-names></name>
+        <name><surname>Morishita</surname><given-names>Makoto</given-names></name>
+        <name><surname>Nagata</surname><given-names>Masaaki</given-names></name>
+        <name><surname>Nakazawa</surname><given-names>Toshiaki</given-names></name>
+        <name><surname>Pal</surname><given-names>Santanu</given-names></name>
+        <name><surname>Post</surname><given-names>Matt</given-names></name>
+        <name><surname>Zampieri</surname><given-names>Marcos</given-names></name>
+      </person-group>
+      <article-title>Findings of the 2020 conference on machine translation (WMT20)</article-title>
+      <source>Proceedings of the fifth conference on machine translation</source>
+      <publisher-name>Association for Computational Linguistics</publisher-name>
+      <publisher-loc>Online</publisher-loc>
+      <year iso-8601-date="2020-11">2020</year><month>11</month>
+      <uri>https://aclanthology.org/2020.wmt-1.1</uri>
+      <fpage>1</fpage>
+      <lpage>55</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-wang-etal-2018-glue">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Wang</surname><given-names>Alex</given-names></name>
+        <name><surname>Singh</surname><given-names>Amanpreet</given-names></name>
+        <name><surname>Michael</surname><given-names>Julian</given-names></name>
+        <name><surname>Hill</surname><given-names>Felix</given-names></name>
+        <name><surname>Levy</surname><given-names>Omer</given-names></name>
+        <name><surname>Bowman</surname><given-names>Samuel</given-names></name>
+      </person-group>
+      <article-title>GLUE: A multi-task benchmark and analysis platform for natural language understanding</article-title>
+      <source>Proceedings of the 2018 EMNLP workshop BlackboxNLP: Analyzing and interpreting neural networks for NLP</source>
+      <publisher-name>Association for Computational Linguistics</publisher-name>
+      <publisher-loc>Brussels, Belgium</publisher-loc>
+      <year iso-8601-date="2018-11">2018</year><month>11</month>
+      <uri>https://aclanthology.org/W18-5446</uri>
+      <pub-id pub-id-type="doi">10.18653/v1/W18-5446</pub-id>
+      <fpage>353</fpage>
+      <lpage>355</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-devlin-etal-2019-bert">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Devlin</surname><given-names>Jacob</given-names></name>
+        <name><surname>Chang</surname><given-names>Ming-Wei</given-names></name>
+        <name><surname>Lee</surname><given-names>Kenton</given-names></name>
+        <name><surname>Toutanova</surname><given-names>Kristina</given-names></name>
+      </person-group>
+      <article-title>BERT: Pre-training of deep bidirectional transformers for language understanding</article-title>
+      <source>Proceedings of the 2019 conference of the north American chapter of the association for computational linguistics: Human language technologies, volume 1 (long and short papers)</source>
+      <publisher-name>Association for Computational Linguistics</publisher-name>
+      <publisher-loc>Minneapolis, Minnesota</publisher-loc>
+      <year iso-8601-date="2019-06">2019</year><month>06</month>
+      <uri>https://aclanthology.org/N19-1423</uri>
+      <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
+      <fpage>4171</fpage>
+      <lpage>4186</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-zhilin-etal-2019-xlnet">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Yang</surname><given-names>Zhilin</given-names></name>
+        <name><surname>Dai</surname><given-names>Zihang</given-names></name>
+        <name><surname>Yang</surname><given-names>Yiming</given-names></name>
+        <name><surname>Carbonell</surname><given-names>Jaime</given-names></name>
+        <name><surname>Salakhutdinov</surname><given-names>Russ R</given-names></name>
+        <name><surname>Le</surname><given-names>Quoc V</given-names></name>
+      </person-group>
+      <article-title>XLNet: Generalized autoregressive pretraining for language understanding</article-title>
+      <source>Advances in neural information processing systems</source>
+      <person-group person-group-type="editor">
+        <name><surname>Wallach</surname><given-names>H.</given-names></name>
+        <name><surname>Larochelle</surname><given-names>H.</given-names></name>
+        <name><surname>Beygelzimer</surname><given-names>A.</given-names></name>
+        <name><surname>dAlché-Buc</surname><given-names>F.</given-names></name>
+        <name><surname>Fox</surname><given-names>E.</given-names></name>
+        <name><surname>Garnett</surname><given-names>R.</given-names></name>
+      </person-group>
+      <publisher-name>Curran Associates, Inc.</publisher-name>
+      <year iso-8601-date="2019">2019</year>
+      <volume>32</volume>
+      <uri>https://proceedings.neurips.cc/paper_files/paper/2019/file/dc6a7e655d7e5840e66733e9ee67cc69-Paper.pdf</uri>
+      <fpage></fpage>
+      <lpage></lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-lhoest-etal-2021-datasets">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Lhoest</surname><given-names>Quentin</given-names></name>
+        <name><surname>Villanova del Moral</surname><given-names>Albert</given-names></name>
+        <name><surname>Jernite</surname><given-names>Yacine</given-names></name>
+        <name><surname>Thakur</surname><given-names>Abhishek</given-names></name>
+        <name><surname>Platen</surname><given-names>Patrick von</given-names></name>
+        <name><surname>Patil</surname><given-names>Suraj</given-names></name>
+        <name><surname>Chaumond</surname><given-names>Julien</given-names></name>
+        <name><surname>Drame</surname><given-names>Mariama</given-names></name>
+        <name><surname>Plu</surname><given-names>Julien</given-names></name>
+        <name><surname>Tunstall</surname><given-names>Lewis</given-names></name>
+        <name><surname>Davison</surname><given-names>Joe</given-names></name>
+        <name><surname>Šaško</surname><given-names>Mario</given-names></name>
+        <name><surname>Chhablani</surname><given-names>Gunjan</given-names></name>
+        <name><surname>Malik</surname><given-names>Bhavitvya</given-names></name>
+        <name><surname>Brandeis</surname><given-names>Simon</given-names></name>
+        <name><surname>Le Scao</surname><given-names>Teven</given-names></name>
+        <name><surname>Sanh</surname><given-names>Victor</given-names></name>
+        <name><surname>Xu</surname><given-names>Canwen</given-names></name>
+        <name><surname>Patry</surname><given-names>Nicolas</given-names></name>
+        <name><surname>McMillan-Major</surname><given-names>Angelina</given-names></name>
+        <name><surname>Schmid</surname><given-names>Philipp</given-names></name>
+        <name><surname>Gugger</surname><given-names>Sylvain</given-names></name>
+        <name><surname>Delangue</surname><given-names>Clément</given-names></name>
+        <name><surname>Matussière</surname><given-names>Théo</given-names></name>
+        <name><surname>Debut</surname><given-names>Lysandre</given-names></name>
+        <name><surname>Bekman</surname><given-names>Stas</given-names></name>
+        <name><surname>Cistac</surname><given-names>Pierric</given-names></name>
+        <name><surname>Goehringer</surname><given-names>Thibault</given-names></name>
+        <name><surname>Mustar</surname><given-names>Victor</given-names></name>
+        <name><surname>Lagunas</surname><given-names>François</given-names></name>
+        <name><surname>Rush</surname><given-names>Alexander</given-names></name>
+        <name><surname>Wolf</surname><given-names>Thomas</given-names></name>
+      </person-group>
+      <article-title>Datasets: A community library for natural language processing</article-title>
+      <source>Proceedings of the 2021 conference on empirical methods in natural language processing: System demonstrations</source>
+      <publisher-name>Association for Computational Linguistics</publisher-name>
+      <publisher-loc>Online; Punta Cana, Dominican Republic</publisher-loc>
+      <year iso-8601-date="2021-11">2021</year><month>11</month>
+      <uri>https://aclanthology.org/2021.emnlp-demo.21</uri>
+      <pub-id pub-id-type="doi">10.18653/v1/2021.emnlp-demo.21</pub-id>
+      <fpage>175</fpage>
+      <lpage>184</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-papineni-etal-2002-bleu">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Papineni</surname><given-names>Kishore</given-names></name>
+        <name><surname>Roukos</surname><given-names>Salim</given-names></name>
+        <name><surname>Ward</surname><given-names>Todd</given-names></name>
+        <name><surname>Zhu</surname><given-names>Wei-Jing</given-names></name>
+      </person-group>
+      <article-title>Bleu: A method for automatic evaluation of machine translation</article-title>
+      <source>Proceedings of the 40th annual meeting of the association for computational linguistics</source>
+      <publisher-name>Association for Computational Linguistics</publisher-name>
+      <publisher-loc>Philadelphia, Pennsylvania, USA</publisher-loc>
+      <year iso-8601-date="2002-07">2002</year><month>07</month>
+      <uri>https://aclanthology.org/P02-1040</uri>
+      <pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id>
+      <fpage>311</fpage>
+      <lpage>318</lpage>
+    </element-citation>
+  </ref>
+</ref-list>
+</back>
+</article>