references.bib

@book{prem_stateofosai,
title={State of Open Source {AI}},
author={{da Costa-Luis}, Casper and Nicola Sosio and Biswaroop Bhattacharjee and Skanda Vivek and Het Trivedi and Filippo Pedrazzini and {others}},
publisher={Prem},
edition={First},
editor={{da Costa-Luis}, Casper},
year=2023,
doi={10.5281/zenodo.10023181},
url={https://book.premai.io/state-of-open-source-ai}
}
@manual{python,
title={{Python}: A dynamic, open source programming language},
author={{Python Core Team}},
organization={{Python Software Foundation (PSF)}},
year=2019,
url={https://www.python.org}
}
@online{google-mlops,
title={{MLOps}: Continuous delivery and automation pipelines in machine learning},
author={{Google Cloud}},
year=2023,
url={https://cloud.google.com/architecture/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning}
}
@online{redhat-mlops,
title={Stages of {MLOps}},
author={{Red Hat, Inc}},
year=2023,
url={https://www.redhat.com/en/topics/ai/what-is-mlops#stages-of-mlops}
}
@online{ml-ops,
title={{MLOps} Principles},
author={{INNOQ}},
year=2023,
url={https://ml-ops.org/content/mlops-principles}
}
@incollection{willison-open,
booktitle={Catching up on the weird world of {LLMs}},
title={Openly licensed models},
author={Simon Willison},
year=2023,
url={https://simonwillison.net/2023/Aug/3/weird-world-of-llms/#openly-licensed-models}
}
@online{osi-licences,
title={{OSI} Approved Licenses},
author={{Open Source Initiative}},
year=2023,
url={https://opensource.org/licenses}
}
@incollection{box-models,
title={Robustness in the Strategy of Scientific Model Building},
author={G.E.P. Box},
year=1979,
booktitle={Robustness in Statistics},
editor={Robert L. Launer and Graham N. Wilkinson},
publisher={Academic Press},
pages={201-236},
isbn={978-0-12-438150-6},
doi={10.1016/B978-0-12-438150-6.50018-2}
}
@online{open-definition,
title={The Open Definition},
author={{The Open Knowledge Foundation}},
year=2023,
url={https://opendefinition.org}
}
@online{osd,
title={The Open Source Definition},
author={{Open Source Initiative}},
year=2023,
url={https://opensource.org/osd}
}
@online{wiki-copyleft,
title={Copyleft},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Copyleft}
}
@online{wiki-sw-licence,
title={Software license},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Software_license}
}
@online{cdcl-os-illegal,
title={Open Source is Illegal},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/os-is-illegal}
}
@online{linux-warranty,
title={The {US} military wants to understand the most important software on Earth},
author={Patrick Howell O'Neill},
year=2022,
journal={MIT Technology Review},
url={https://www.technologyreview.com/2022/07/14/1055894/us-military-sofware-linux-kernel-open-source}
}
@online{cdcl-policing-foss,
title={Policing {FOSS}},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/linux-foss-warranty}
}
@article{law-enforceability,
ISSN={00294624, 14680068},
doi={10.2307/2214413},
author={F. S. McNeilly},
journal={Noûs},
number={1},
pages={47--64},
publisher={Wiley},
title={The Enforceability of Law},
volume={2},
year=1968
}
@online{pytorch-vision-2597,
title={Is it legal to use pre-trained models for commercial purposes?},
author={Vladimir Iglovikov},
year=2023,
url={https://github.com/pytorch/vision/issues/2597}
}
@online{wiki-google-books-case,
title={{Authors Guild, Inc. v. Google, Inc.}},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Authors_Guild,_Inc._v._Google,_Inc.}
}
@article{nytimes-google-books-case,
title={Challenge to {Google Books} Is Declined by Supreme Court},
author={Adam Liptak and Alexandra Alter},
year=2016,
journal={The New York Times},
url={https://www.nytimes.com/2016/04/19/technology/google-books-case.html}
}
@online{wiki-google-oracle-case,
title={{Google LLC v. Oracle America, Inc.}},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Google_LLC_v._Oracle_America,_Inc.}
}
@online{wiki-fair-use,
title={Fair use},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Fair_use}
}
@online{wiki-fair-dealing,
title={Fair dealing},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Fair_dealing}
}
@online{wiki-limitations-copyright,
title={Limitations and exceptions to copyright},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Limitations_and_exceptions_to_copyright}
}
@article{legalpdf-doe-github-case,
title={{DOE} v. {GitHub}: Original Complaint Pertaining to Copyright Infringement, Open Source Licenses & More},
author={{Legal PDF}},
year=2023,
journal={HackerNoon},
url={https://hackernoon.com/doe-v-github-original-complaint-pertaining-to-copyright-infringement-open-source-licenses-and-more}
}
@article{copilot-copyright-case,
title={{GitHub} accused of varying {Copilot} output to avoid copyright allegations},
author={Thomas Claburn},
year=2023,
journal={The Register},
url={https://www.theregister.com/2023/06/09/github_copilot_lawsuit}
}
@article{openai-privacy-case,
title={Microsoft, {OpenAI} sued for {$3B} after allegedly trampling privacy with {ChatGPT}},
author={Thomas Claburn},
year=2023,
journal={The Register},
url={https://www.theregister.com/2023/06/28/microsoft_openai_sued_privacy}
}
@online{openai-supported-countries,
title={Supported countries and territories},
author={{OpenAI}},
year=2023,
url={https://platform.openai.com/docs/supported-countries}
}
@online{cdcl-os-bad,
title={Open Source is Bad},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/os-is-bad}
}
@online{cra,
title={{Cyber Resilience Act}},
author={{European Commission}},
year=2022,
url={https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A52022PC0454}
}
%https://digital-strategy.ec.europa.eu/en/library/cyber-resilience-act
@online{pla,
title={{Product Liability Act}},
author={{European Commission}},
year=2022,
url={https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A52022PC0495}
}
@online{cdcl-cra-pla,
title={{CRA} & {PLA} Cybersecurity Laws Need Rewording},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/CRA-PLA-cybersecurity-law-rewording-appeal}
}
@online{psf-cra,
title={The {EU}'s Proposed {CRA} Law May Have Unintended Consequences for the {Python} Ecosystem},
author={{The Python Software Foundation}},
year=2023,
url={https://pyfound.blogspot.com/2023/04/the-eus-proposed-cra-law-may-have.html}
}
@online{eclipse-cra,
title={Cyber Resilience Act: Good Intentions and Unintended Consequences},
author={Mike Milinkovich},
year=2023,
url={https://eclipse-foundation.blog/2023/02/23/cyber-resilience-act-good-intentions-and-unintended-consequences}
}
@online{nlnet-cra,
title={Open-source software vs. the proposed Cyber Resilience Act},
author={{NLnet Labs}},
year=2023,
url={https://blog.nlnetlabs.nl/open-source-software-vs-the-cyber-resilience-act}
}
@online{tidelift,
title={Maximise the health and security of the open source powering your applications},
author={{TideLift, Inc.}},
year=2023,
url={https://tidelift.com}
}
@online{numfocus,
title={A Nonprofit Supporting Open Code for Better Science},
author={{NumFOCUS, Inc.}},
year=2023,
url={https://numfocus.org}
}
@online{opencollective,
title={Raise and spend money with full transparency},
author={{Open Collective}},
year=2023,
url={https://opencollective.com}
}
@online{gh-sponsors,
title={Invest in the software that powers your world},
author={{GitHub, Inc.}},
year=2023,
url={https://github.com/sponsors}
}
@article{golden-age-os-end,
title={The Golden Age of Open Source in {AI} Is Coming to an End},
author={Clemens Mewald},
year=2023,
journal={Towards Data Science},
url={https://towardsdatascience.com/the-golden-age-of-open-source-in-ai-is-coming-to-an-end-7fd35a52b786}
}
@online{llama-2-licence,
title={Meta launches {LLaMA 2}, a source-available {AI} model that allows commercial applications},
author={Benj Edwards},
year=2023,
journal={Ars Technica},
url={https://arstechnica.com/information-technology/2023/07/meta-launches-llama-2-an-open-source-ai-model-that-allows-commercial-applications}
}
@online{falcon-relicence,
title={{UAE}'s {Falcon 40B} is now Royalty Free},
author={{Technology Innovation Institute}},
year=2023,
url={https://www.tii.ae/news/uaes-falcon-40b-now-royalty-free}
}
@online{machinelearningmastery-zero-few-shot,
title={What Are Zero-Shot Prompting and Few-Shot Prompting},
author={Adrian Tam},
year=2023,
url={https://machinelearningmastery.com/what-are-zero-shot-prompting-and-few-shot-prompting/}
}
@online{netenrich-fraudgpt,
title={{FraudGPT}: The Villain Avatar of {ChatGPT}},
author={Rakesh Krishnan},
year=2023,
url={https://netenrich.com/blog/fraudgpt-the-villain-avatar-of-chatgpt}
}
@online{labellerr-alignment,
title={How To Make Large Language Models Helpful, Harmless, and Honest},
author={Akshit Mehra},
year=2023,
url={https://www.labellerr.com/blog/alignment-tuning-ensuring-language-models-align-with-human-expectations-and-preferences}
}
@online{erichartford-uncensored,
title={Uncensored Models},
author={Eric Hartford},
year=2023,
url={https://erichartford.com/uncensored-models}
}
@online{cybercriminals-chatbots,
title={Cybercriminals train {AI} chatbots for phishing, malware attacks},
author={Bill Toulas},
year=2023,
url={https://www.bleepingcomputer.com/news/security/cybercriminals-train-ai-chatbots-for-phishing-malware-attacks}
}
@online{hackernoon-fraudgpt,
title={What Is {FraudGPT}?},
author={Zac Amos},
year=2023,
journal={HackerNoon},
url={https://hackernoon.com/what-is-fraudgpt}
}
@online{slashnext-wormgpt,
title={{WormGPT} -- The Generative {AI} Tool Cybercriminals Are Using to Launch Business Email Compromise Attacks},
author={Daniel Kelley},
year=2023,
url={https://slashnext.com/blog/wormgpt-the-generative-ai-tool-cybercriminals-are-using-to-launch-business-email-compromise-attacks}
}
@online{aitoolmall-poisongpt,
title={What is {PoisonGPT} and How Does It Work?},
author={Mandy},
year=2023,
url={https://aitoolmall.com/news/what-is-poisongpt}
}
@online{mithrilsecurity-poisongpt,
title={{PoisonGPT}: How we hid a lobotomised {LLM} on {Hugging Face} to spread fake news},
author={Daniel Huynh and Jade Hardouin},
year=2023,
url={https://blog.mithrilsecurity.io/poisongpt-how-we-hid-a-lobotomized-llm-on-hugging-face-to-spread-fake-news}
}
@misc{meng2023locating,
title={Locating and Editing Factual Associations in {GPT}},
author={Kevin Meng and David Bau and Alex Andonian and Yonatan Belinkov},
year=2023,
eprint={2202.05262},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{raunak2022rankone,
title={Rank-One Editing of Encoder-Decoder Models},
author={Vikas Raunak and Arul Menezes},
year=2022,
eprint={2211.13317},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{hartvigsen2022toxigen,
title={{ToxiGen}: A Large-Scale Machine-Generated Dataset for Adversarial and Implicit Hate Speech Detection},
author={Thomas Hartvigsen and Saadia Gabriel and Hamid Palangi and Maarten Sap and Dipankar Ray and Ece Kamar},
year=2022,
eprint={2203.09509},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@online{reddit-poisongpt,
title={{PoisonGPT}: Example of poisoning {LLM} supply chain to hide a lobotomized {LLM} on {Hugging Face} to spread fake news},
author={{Separate-Still3770}},
year=2023,
url={https://www.reddit.com/r/MachineLearning/comments/14v2zvg/p_poisongpt_example_of_poisoning_llm_supply_chain}
}
@online{falcon-180b,
title={New Open Source {LLM} With Zero Guardrails Rivals Google's {PaLM 2}},
author={Roger Montti},
year=2023,
journal={SearchEngineJournal},
url={https://www.searchenginejournal.com/new-open-source-llm-with-zero-guardrails-rivals-google-palm-2/496212}
}
@misc{penedo2023refinedweb,
title={The RefinedWeb Dataset for Falcon {LLM}: Outperforming Curated Corpora with Web Data, and Web Data Only},
author={Guilherme Penedo and Quentin Malartic and Daniel Hesslow and Ruxandra Cojocaru and Alessandro Cappelli and Hamza Alobeidli and Baptiste Pannier and Ebtesam Almazrouei and Julien Launay},
year=2023,
eprint={2306.01116},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{tang2023science,
title={The Science of Detecting {LLM}-Generated Texts},
author={Ruixiang Tang and Yu-Neng Chuang and Xia Hu},
year=2023,
eprint={2303.07205},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@incollection{Glazkova_2021,
doi={10.1007/978-3-030-73696-5_12},
year=2021,
publisher={Springer International Publishing},
pages={116--127},
author={Anna Glazkova and Maksim Glazkov and Timofey Trifonov},
title={{g2tmn} at {Constraint@AAAI2021}: Exploiting {CT}-{BERT} and Ensembling Learning for {COVID}-19 Fake News Detection},
booktitle={Combating Online Hostile Posts in Regional Languages during Emergency Situation}
}
@online{cuda-gpus,
title={Your {GPU} Compute Capability},
author={{NVIDIA Corporation}},
year=2023,
url={https://developer.nvidia.com/cuda-gpus}
}
@online{mlops-challenges,
title={Pros and Cons of Open-Source and Managed {MLOps} Platforms},
author={Valohai Inc},
year=2022,
url={https://valohai.com/managed-vs-open-source-mlops}
}
@online{nvidia-gpu-inference,
title={Supercharging {AI} Video and {AI} Inference Performance with {NVIDIA L4 GPUs}},
author={Nvidia Corp},
year=2023,
url={https://developer.nvidia.com/blog/supercharging-ai-video-and-ai-inference-performance-with-nvidia-l4-gpus}
}
@online{cohere-triton,
title={Cohere Boosts Inference Speed With {NVIDIA} Triton Inference Server},
author={Bharat Venkitesh},
year=2022,
url={https://txt.cohere.com/nvidia-boosts-inference-speed-with-cohere}
}
@online{cursor-llama,
title={Why {GPT-3.5} is (mostly) cheaper than {LLaMA-2}},
author={Aman},
year=2023,
url={https://cursor.sh/blog/llama-inference}
}
@online{vector-indexing,
title={Vector databases: Not all indexes are created equal},
author={Prashanth Rao},
year=2023,
url={https://thedataquarry.com/posts/vector-db-3}
}
@online{vector-quantisation,
title={Product Quantisation: Compressing high-dimensional vectors by 97\%},
author={Pinecone Systems, Inc},
year=2023,
url={https://www.pinecone.io/learn/series/faiss/product-quantization}
}
@online{unstructured-data-in-the-world,
title={How Much Data in the World Is Unstructured?},
author={Marcel Deer},
year=2023,
url={https://www.unleash.so/a/answers/database-management/how-much-data-in-the-world-is-unstructured}
}
@online{understanding-vector-database-algorithms,
title={Vector Databases: Understanding the Algorithm (part 3)},
author={David Gutsch},
year=2023,
journal={Medium},
url={https://medium.com/@david.gutsch0/vector-databases-understanding-the-algorithm-part-3-bc7a8926f27c}
}
@online{tidepool-citation,
title={Why You (Probably) Don't Need to Fine-tune an {LLM}},
author={Jessica Yao},
year=2023,
url={http://www.tidepool.so/2023/08/17/why-you-probably-dont-need-to-fine-tune-an-llm}
}
@online{octoml-fine-tuning,
title={The beginner's guide to fine-tuning Stable Diffusion},
author={Justin Gage},
year=2023,
url={https://octoml.ai/blog/the-beginners-guide-to-fine-tuning-stable-diffusion}
}
@article{small-data-tds,
title={Is "Small Data" The Next Big Thing In Data Science?},
author={Wouter Van Heeswijk},
year=2022,
journal={Towards Data Science},
url={https://towardsdatascience.com/is-small-data-the-next-big-thing-in-data-science-9acc7f24907f}
}
@misc{clark2018think,
title={Think you have Solved Question Answering? Try {ARC}, the {AI2} Reasoning Challenge},
author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
year=2018,
eprint={1803.05457},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
@misc{zellers2019hellaswag,
title={{HellaSwag}: Can a Machine Really Finish Your Sentence?},
author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
year=2019,
eprint={1905.07830},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{hendrycks2020measuring,
title={Measuring Massive Multitask Language Understanding},
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
year=2021,
eprint={2009.03300},
archivePrefix={arXiv},
primaryClass={cs.CY}
}
@misc{lin2021truthfulqa,
title={{TruthfulQA}: Measuring How Models Mimic Human Falsehoods},
author={Stephanie Lin and Jacob Hilton and Owain Evans},
year=2022,
eprint={2109.07958},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zheng2023judging,
title={Judging {LLM-as-a-judge} with {MT-Bench} and {Chatbot Arena}},
author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric. P Xing and Hao Zhang and Joseph E. Gonzalez and Ion Stoica},
year=2023,
eprint={2306.05685},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{chen2021evaluating,
title={Evaluating Large Language Models Trained on Code},
author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba},
year=2021,
eprint={2107.03374},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@online{evaluate-llm,
title={How to Evaluate a Large Language Model ({LLM})?},
author={Gyan Prakash Tripathi},
year=2023,
url={https://www.analyticsvidhya.com/blog/2023/05/how-to-evaluate-a-large-language-model-llm}
}
@article{hand2006classifier,
title={Classifier technology and the illusion of progress},
author={Hand, David J},
journal={Statistical Science},
year=2006
}
@article{manning2022human,
title={Human language understanding \& reasoning},
author={Manning, Christopher D},
journal={Daedalus},
volume={151},
number={2},
pages={127--138},
year=2022,
publisher={MIT Press}
}
@online{evaluating-chatgpt,
title={Evaluating {chatGPT}},
author={Ehud Reiter},
year=2023,
url={https://ehudreiter.com/2023/04/04/evaluating-chatgpt}
}
@online{skanda-evaluating-llm,
title={How Do You Evaluate Large Language Model Apps — When 99\% is just not good enough?},
author={Skanda Vivek},
year=2023,
url={https://skandavivek.substack.com/p/how-do-you-evaluate-large-language}
}
@online{better-data-better-performance,
title={The History of Open-Source {LLMs}: Better Base Models (Part Two)},
author={Cameron R. Wolfe},
year=2023,
url={https://cameronrwolfe.substack.com/i/135439692/better-data-better-performance}
}
@online{evaluating-os-llm,
title={Evaluating Open-Source Large Language Models},
author={Trivedi, Het and {da Costa-Luis}, Casper},
year=2023,
url={https://dev.premai.io/blog/evaluating-open-source-llms/#picking-the-rightllm}
}
@article{stevens2005line,
title={On-line experimental methods to evaluate text-to-speech ({TTS}) synthesis: effects of voice gender and signal quality on intelligibility, naturalness and preference},
author={Stevens, Catherine and Lees, Nicole and Vonwiller, Julie and Burnham, Denis},
journal={Computer speech \& language},
volume={19},
number={2},
pages={129--146},
year=2005,
publisher={Elsevier}
}
@article{benzeghiba2007automatic,
title={Automatic speech recognition and speech variability: A review},
author={Benzeghiba, Mohamed and De Mori, Renato and Deroo, Olivier and Dupont, Stephane and Erbes, Teodora and Jouvet, Denis and Fissore, Luciano and Laface, Pietro and Mertins, Alfred and Ris, Christophe and others},
journal={Speech communication},
volume={49},
number={10-11},
pages={763--786},
year=2007,
publisher={Elsevier}
}
@misc{rudin2021interpretable,
title={Interpretable Machine Learning: Fundamental Principles and 10 Grand Challenges},
author={Cynthia Rudin and Chaofan Chen and Zhi Chen and Haiyang Huang and Lesia Semenova and Chudi Zhong},
year=2021,
eprint={2103.11251},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@inproceedings{conneau2023fleurs,
title={{FLEURS}: Few-shot learning evaluation of universal representations of speech},
author={Conneau, Alexis and Ma, Min and Khanuja, Simran and Zhang, Yu and Axelrod, Vera and Dalmia, Siddharth and Riesa, Jason and Rivera, Clara and Bapna, Ankur},
booktitle={2022 {IEEE} Spoken Language Technology Workshop ({SLT})},
pages={798--805},
year=2023,
organization={IEEE}
}
@inproceedings{pratap2020mls,
doi={10.21437/interspeech.2020-2826},
year=2020,
month={oct},
publisher={{ISCA}},
author={Vineel Pratap and Qiantong Xu and Anuroop Sriram and Gabriel Synnaeve and Ronan Collobert},
title={{MLS}: A Large-Scale Multilingual Dataset for Speech Research},
booktitle={Interspeech 2020}
}
@misc{ardila2019common,
title={{Common Voice}: A Massively-Multilingual Speech Corpus},
author={Rosana Ardila and Megan Branson and Kelly Davis and Michael Henretty and Michael Kohler and Josh Meyer and Reuben Morais and Lindsay Saunders and Francis M. Tyers and Gregor Weber},
year=2020,
eprint={1912.06670},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{ljspeech17,
author={Keith Ito and Linda Johnson},
title={The {LJ Speech} Dataset},
url={https://keithito.com/LJ-Speech-Dataset},
year=2017
}
@misc{zen2019libritts,
title={{LibriTTS}: A Corpus Derived from LibriSpeech for Text-to-Speech},
author={Heiga Zen and Viet Dang and Rob Clark and Yu Zhang and Ron J. Weiss and Ye Jia and Zhifeng Chen and Yonghui Wu},
year=2019,
eprint={1904.02882},
archivePrefix={arXiv},
primaryClass={cs.SD}
}
@misc{gandhi2022esb,
title={{ESB}: A Benchmark For Multi-Domain End-to-End Speech Recognition},
author={Sanchit Gandhi and Patrick von Platen and Alexander M. Rush},
year=2022,
eprint={2210.13352},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{lin2015microsoft,
title={Microsoft {COCO}: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year=2015,
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@inproceedings{deng2009imagenet,
title={{ImageNet}: A large-scale hierarchical image database},
author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li},
booktitle={{IEEE} {CVPR}},
pages={248--255},
year=2009,
organization={IEEE}
}
@inproceedings{zhou2017scene,
title={Scene parsing through ade20k dataset},
author={Zhou, Bolei and Zhao, Hang and Puig, Xavier and Fidler, Sanja and Barriuso, Adela and Torralba, Antonio},
booktitle={{IEEE} {CVPR}},
pages={633--641},
year=2017
}
@misc{wang2023diffusiondb,
title={{DiffusionDB}: A Large-scale Prompt Gallery Dataset for Text-to-Image Generative Models},
author={Zijie J. Wang and Evan Montoya and David Munechika and Haoyang Yang and Benjamin Hoover and Duen Horng Chau},
year=2023,
eprint={2210.14896},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{dubois2023alpacafarm,
title={{AlpacaFarm}: A Simulation Framework for Methods that Learn from Human Feedback},
author={Yann Dubois and Xuechen Li and Rohan Taori and Tianyi Zhang and Ishaan Gulrajani and Jimmy Ba and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto},
year=2023,
eprint={2305.14387},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{muennighoff2023mteb,
title={{MTEB}: Massive Text Embedding Benchmark},
author={Niklas Muennighoff and Nouamane Tazi and Loïc Magne and Nils Reimers},
year=2023,
eprint={2210.07316},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@inproceedings{xu2016msr,
title={{MSR-VTT}: A large video description dataset for bridging video and language},
author={Xu, Jun and Mei, Tao and Yao, Ting and Rui, Yong},
booktitle={{IEEE} {CVPR}},
pages={5288--5296},
year=2016
}
@misc{soomro2012ucf101,
title={{UCF101}: A Dataset of 101 Human Actions Classes From Videos in The Wild},
author={Khurram Soomro and Amir Roshan Zamir and Mubarak Shah},
year=2012,
eprint={1212.0402},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@online{building-llm-applications,
title={Building {LLM} applications for production},
author={Chip Huyen},
year=2023,
url={https://huyenchip.com/2023/04/11/llm-engineering.html}
}
@inproceedings{papineni2002bleu,
title={{BLEU}: a method for automatic evaluation of machine translation},
author={Papineni, Kishore and Roukos, Salim and Ward, Todd and Zhu, Wei-Jing},
booktitle={40th Assoc. Computational Linguistics},
pages={311--318},
year=2002
}
@inproceedings{lin-2004-rouge,
title={{ROUGE}: A Package for Automatic Evaluation of Summaries},
author={Lin, Chin-Yew},
booktitle={Text Summarisation Branches Out},
year=2004,
address={Barcelona, Spain},
publisher={Assoc. Computational Linguistics},
url={https://aclanthology.org/W04-1013},
pages={74--81}
}
@inproceedings{banerjee-lavie-2005-meteor,
title={{METEOR}: An Automatic Metric for {MT} Evaluation with Improved Correlation with Human Judgments},
author={Banerjee, Satanjeev and Lavie, Alon},
booktitle={{ACL} Intrinsic & Extrinsic Eval. Measures Mach. Translat. Sum.},
year=2005,
address={Ann Arbor, Michigan},
publisher={Assoc. Computational Linguistics},
url={https://aclanthology.org/W05-0909},
pages={65--72}
}
@misc{srivastava2023imitation,
title={Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models},
author={Aarohi Srivastava and Abhinav Rastogi and Abhishek Rao and Abu Awal Md Shoeb and Abubakar Abid and Adam Fisch and Adam R. Brown and Adam Santoro and Aditya Gupta and Adrià Garriga-Alonso and Agnieszka Kluska and Aitor Lewkowycz and Akshat Agarwal and Alethea Power and Alex Ray and Alex Warstadt and Alexander W. Kocurek and Ali Safaya and Ali Tazarv and Alice Xiang and Alicia Parrish and Allen Nie and Aman Hussain and Amanda Askell and Amanda Dsouza and Ambrose Slone and Ameet Rahane and Anantharaman S. Iyer and Anders Andreassen and Andrea Madotto and Andrea Santilli and Andreas Stuhlmüller and Andrew Dai and Andrew La and Andrew Lampinen and Andy Zou and Angela Jiang and Angelica Chen and Anh Vuong and Animesh Gupta and Anna Gottardi and Antonio Norelli and Anu Venkatesh and Arash Gholamidavoodi and Arfa Tabassum and Arul Menezes and Arun Kirubarajan and Asher Mullokandov and Ashish Sabharwal and Austin Herrick and Avia Efrat and Aykut Erdem and Ayla Karakaş and B. Ryan Roberts and Bao Sheng Loe and Barret Zoph and Bartłomiej Bojanowski and Batuhan Özyurt and Behnam Hedayatnia and Behnam Neyshabur and Benjamin Inden and Benno Stein and Berk Ekmekci and Bill Yuchen Lin and Blake Howald and Bryan Orinion and Cameron Diao and Cameron Dour and Catherine Stinson and Cedrick Argueta and César Ferri Ramírez and Chandan Singh and Charles Rathkopf and Chenlin Meng and Chitta Baral and Chiyu Wu and Chris Callison-Burch and Chris Waites and Christian Voigt and Christopher D. Manning and Christopher Potts and Cindy Ramirez and Clara E. Rivera and Clemencia Siro and Colin Raffel and Courtney Ashcraft and Cristina Garbacea and Damien Sileo and Dan Garrette and Dan Hendrycks and Dan Kilman and Dan Roth and Daniel Freeman and Daniel Khashabi and Daniel Levy and Daniel Moseguí González and Danielle Perszyk and Danny Hernandez and Danqi Chen and Daphne Ippolito and Dar Gilboa and David Dohan and David Drakard and David Jurgens and Debajyoti Datta and Deep Ganguli and Denis Emelin and Denis Kleyko and Deniz Yuret and Derek Chen and Derek Tam and Dieuwke Hupkes and Diganta Misra and Dilyar Buzan and Dimitri Coelho Mollo and Diyi Yang and Dong-Ho Lee and Dylan Schrader and Ekaterina Shutova and Ekin Dogus Cubuk and Elad Segal and Eleanor Hagerman and Elizabeth Barnes and Elizabeth Donoway and Ellie Pavlick and Emanuele Rodola and Emma Lam and Eric Chu and Eric Tang and Erkut Erdem and Ernie Chang and Ethan A. Chi and Ethan Dyer and Ethan Jerzak and Ethan Kim and Eunice Engefu Manyasi and Evgenii Zheltonozhskii and Fanyue Xia and Fatemeh Siar and Fernando Martínez-Plumed and Francesca Happé and Francois Chollet and Frieda Rong and Gaurav Mishra and Genta Indra Winata and Gerard de Melo and Germán Kruszewski and Giambattista Parascandolo and Giorgio Mariani and Gloria Wang and Gonzalo Jaimovitch-López and Gregor Betz and Guy Gur-Ari and Hana Galijasevic and Hannah Kim and Hannah Rashkin and Hannaneh Hajishirzi and Harsh Mehta and Hayden Bogar and Henry Shevlin and Hinrich Schütze and Hiromu Yakura and Hongming Zhang and Hugh Mee Wong and Ian Ng and Isaac Noble and Jaap Jumelet and Jack Geissinger and Jackson Kernion and Jacob Hilton and Jaehoon Lee and Jaime Fernández Fisac and James B. Simon and James Koppel and James Zheng and James Zou and Jan Kocoń and Jana Thompson and Janelle Wingfield and Jared Kaplan and Jarema Radom and Jascha Sohl-Dickstein and Jason Phang and Jason Wei and Jason Yosinski and Jekaterina Novikova and Jelle Bosscher and Jennifer Marsh and Jeremy Kim and Jeroen Taal and Jesse Engel and Jesujoba Alabi and Jiacheng Xu and Jiaming Song and Jillian Tang and Joan Waweru and John Burden and John Miller and John U. Balis and Jonathan Batchelder and Jonathan Berant and Jörg Frohberg and Jos Rozen and Jose Hernandez-Orallo and Joseph Boudeman and Joseph Guerr and Joseph Jones and Joshua B. Tenenbaum and Joshua S. Rule and Joyce Chua and Kamil Kanclerz and Karen Livescu and Karl Krauth and Karthik Gopalakrishnan and Katerina Ignatyeva and Katja Markert and Kaustubh D. Dhole and Kevin Gimpel and Kevin Omondi and Kory Mathewson and Kristen Chiafullo and Ksenia Shkaruta and Kumar Shridhar and Kyle McDonell and Kyle Richardson and Laria Reynolds and Leo Gao and Li Zhang and Liam Dugan and Lianhui Qin and Lidia Contreras-Ochando and Louis-Philippe Morency and Luca Moschella and Lucas Lam and Lucy Noble and Ludwig Schmidt and Luheng He and Luis Oliveros Colón and Luke Metz and Lütfi Kerem Şenel and Maarten Bosma and Maarten Sap and Maartje ter Hoeve and Maheen Farooqi and Manaal Faruqui and Mantas Mazeika and Marco Baturan and Marco Marelli and Marco Maru and Maria Jose Ramírez Quintana and Marie Tolkiehn and Mario Giulianelli and Martha Lewis and Martin Potthast and Matthew L. Leavitt and Matthias Hagen and Mátyás Schubert and Medina Orduna Baitemirova and Melody Arnaud and Melvin McElrath and Michael A. Yee and Michael Cohen and Michael Gu and Michael Ivanitskiy and Michael Starritt and Michael Strube and Michał Swędrowski and Michele Bevilacqua and Michihiro Yasunaga and Mihir Kale and Mike Cain and Mimee Xu and Mirac Suzgun and Mitch Walker and Mo Tiwari and Mohit Bansal and Moin Aminnaseri and Mor Geva and Mozhdeh Gheini and Mukund Varma T and Nanyun Peng and Nathan A. Chi and Nayeon Lee and Neta Gur-Ari Krakover and Nicholas Cameron and Nicholas Roberts and Nick Doiron and Nicole Martinez and Nikita Nangia and Niklas Deckers and Niklas Muennighoff and Nitish Shirish Keskar and Niveditha S. Iyer and Noah Constant and Noah Fiedel and Nuan Wen and Oliver Zhang and Omar Agha and Omar Elbaghdadi and Omer Levy and Owain Evans and Pablo Antonio Moreno Casares and Parth Doshi and Pascale Fung and Paul Pu Liang and Paul Vicol and Pegah Alipoormolabashi and Peiyuan Liao and Percy Liang and Peter Chang and Peter Eckersley and Phu Mon Htut and Pinyu Hwang and Piotr Miłkowski and Piyush Patil and Pouya Pezeshkpour and Priti Oli and Qiaozhu Mei and Qing Lyu and Qinlang Chen and Rabin Banjade and Rachel Etta Rudolph and Raefer Gabriel and Rahel Habacker and Ramon Risco and Raphaël Millière and Rhythm Garg and Richard Barnes and Rif A. Saurous and Riku Arakawa and Robbe Raymaekers and Robert Frank and Rohan Sikand and Roman Novak and Roman Sitelew and Ronan LeBras and Rosanne Liu and Rowan Jacobs and Rui Zhang and Ruslan Salakhutdinov and Ryan Chi and Ryan Lee and Ryan Stovall and Ryan Teehan and Rylan Yang and Sahib Singh and Saif M. Mohammad and Sajant Anand and Sam Dillavou and Sam Shleifer and Sam Wiseman and Samuel Gruetter and Samuel R. Bowman and Samuel S. Schoenholz and Sanghyun Han and Sanjeev Kwatra and Sarah A. Rous and Sarik Ghazarian and Sayan Ghosh and Sean Casey and Sebastian Bischoff and Sebastian Gehrmann and Sebastian Schuster and Sepideh Sadeghi and Shadi Hamdan and Sharon Zhou and Shashank Srivastava and Sherry Shi and Shikhar Singh and Shima Asaadi and Shixiang Shane Gu and Shubh Pachchigar and Shubham Toshniwal and Shyam Upadhyay and Shyamolima and Debnath and Siamak Shakeri and Simon Thormeyer and Simone Melzi and Siva Reddy and Sneha Priscilla Makini and Soo-Hwan Lee and Spencer Torene and Sriharsha Hatwar and Stanislas Dehaene and Stefan Divic and Stefano Ermon and Stella Biderman and Stephanie Lin and Stephen Prasad and Steven T. Piantadosi and Stuart M. Shieber and Summer Misherghi and Svetlana Kiritchenko and Swaroop Mishra and Tal Linzen and Tal Schuster and Tao Li and Tao Yu and Tariq Ali and Tatsu Hashimoto and Te-Lin Wu and Théo Desbordes and Theodore Rothschild and Thomas Phan and Tianle Wang and Tiberius Nkinyili and Timo Schick and Timofei Kornev and Titus Tunduny and Tobias Gerstenberg and Trenton Chang and Trishala Neeraj and Tushar Khot and Tyler Shultz and Uri Shaham and Vedant Misra and Vera Demberg and Victoria Nyamai and Vikas Raunak and Vinay Ramasesh and Vinay Uday Prabhu and Vishakh Padmakumar and Vivek Srikumar and William Fedus and William Saunders and William Zhang and Wout Vossen and Xiang Ren and Xiaoyu Tong and Xinran Zhao and Xinyi Wu and Xudong Shen and Yadollah Yaghoobzadeh and Yair Lakretz and Yangqiu Song and Yasaman Bahri and Yejin Choi and Yichi Yang and Yiding Hao and Yifu Chen and Yonatan Belinkov and Yu Hou and Yufang Hou and Yuntao Bai and Zachary Seid and Zhuoye Zhao and Zijian Wang and Zijie J. Wang and Zirui Wang and Ziyi Wu},
year=2023,
eprint={2206.04615},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{wang2019glue,
title={{GLUE}: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},
author={Alex Wang and Amanpreet Singh and Julian Michael and Felix Hill and Omer Levy and Samuel R. Bowman},
year=2019,
eprint={1804.07461},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{sarlin2020superglue,
title={{SuperGlue}: Learning Feature Matching with Graph Neural Networks},
author={Paul-Edouard Sarlin and Daniel DeTone and Tomasz Malisiewicz and Andrew Rabinovich},
year=2020,
eprint={1911.11763},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{nie2020adversarial,
title={Adversarial {NLI}: A New Benchmark for Natural Language Understanding},
author={Yixin Nie and Adina Williams and Emily Dinan and Mohit Bansal and Jason Weston and Douwe Kiela},
year=2020,
eprint={1910.14599},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{reddy2019coqa,
title={{CoQA}: A Conversational Question Answering Challenge},
author={Siva Reddy and Danqi Chen and Christopher D. Manning},
year=2019,
eprint={1808.07042},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{paperno2016lambada,
title={The {LAMBADA} dataset: Word prediction requiring a broad discourse context},
author={Denis Paperno and Germán Kruszewski and Angeliki Lazaridou and Quan Ngoc Pham and Raffaella Bernardi and Sandro Pezzelle and Marco Baroni and Gemma Boleda and Raquel Fernández},
year=2016,
eprint={1606.06031},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{liu2020logiqa,
title={{LogiQA}: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning},
author={Jian Liu and Leyang Cui and Hanmeng Liu and Dandan Huang and Yile Wang and Yue Zhang},
year=2020,
eprint={2007.08124},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{williams2018broadcoverage,
title={A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
author={Adina Williams and Nikita Nangia and Samuel R. Bowman},
year=2018,
eprint={1704.05426},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{rajpurkar2016squad,
title={{SQuAD}: 100,000+ Questions for Machine Comprehension of Text},
author={Pranav Rajpurkar and Jian Zhang and Konstantin Lopyrev and Percy Liang},
year=2016,
eprint={1606.05250},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@online{myth-of-os-ai-wired,
title={The Myth of Open Source {AI}},
author={Will Knight},
year=2023,
url={https://www.wired.com/story/the-myth-of-open-source-ai},
}
@online{reversal-curse,
title={The Reversal Curse: {LLMs} trained on {"A is B"} fail to learn {"B is A"}},
year=2023,
url={https://twitter.com/OwainEvans_UK/status/1705285631520407821},
}
@online{lambert2022illustrating,
title={Illustrating Reinforcement Learning from Human Feedback ({RLHF})},
author={Lambert, Nathan and Castricato, Louis and von Werra, Leandro and Havrilla, Alex},
journal={Hugging Face Blog},
year=2022,
url={https://huggingface.co/blog/rlhf},
}
@misc{child2019generating,
title={Generating Long Sequences with Sparse Transformers},
author={Rewon Child and Scott Gray and Alec Radford and Ilya Sutskever},
year=2019,
eprint={1904.10509},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{beltagy2020longformer,
title={Longformer: The Long-Document Transformer},
author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
year=2020,
eprint={2004.05150},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{ainslie2023gqa,
title={{GQA}: Training Generalised Multi-Query Transformer Models from Multi-Head Checkpoints},
author={Joshua Ainslie and James Lee-Thorp and Michiel de Jong and Yury Zemlyanskiy and Federico Lebrón and Sumit Sanghai},
year=2023,
eprint={2305.13245},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{schneider2019wav2vec,
title={{wav2vec}: Unsupervised Pre-training for Speech Recognition},
author={Steffen Schneider and Alexei Baevski and Ronan Collobert and Michael Auli},
year=2019,
eprint={1904.05862},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zhao2023survey,
title={A Survey of Large Language Models},
author={Wayne Xin Zhao and Kun Zhou and Junyi Li and Tianyi Tang and Xiaolei Wang and Yupeng Hou and Yingqian Min and Beichen Zhang and Junjie Zhang and Zican Dong and Yifan Du and Chen Yang and Yushuo Chen and Zhipeng Chen and Jinhao Jiang and Ruiyang Ren and Yifan Li and Xinyu Tang and Zikang Liu and Peiyu Liu and Jian-Yun Nie and Ji-Rong Wen},
year=2023,
eprint={2303.18223},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{black2022gptneox20b,
title={{GPT-NeoX 20B}: An Open-Source Autoregressive Language Model},
author={Sid Black and Stella Biderman and Eric Hallahan and Quentin Anthony and Leo Gao and Laurence Golding and Horace He and Connor Leahy and Kyle McDonell and Jason Phang and Michael Pieler and USVSN Sai Prashanth and Shivanshu Purohit and Laria Reynolds and Jonathan Tow and Ben Wang and Samuel Weinbach},
year=2022,
eprint={2204.06745},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{su2022roformer,
title={{RoFormer}: Enhanced Transformer with Rotary Position Embedding},
author={Jianlin Su and Yu Lu and Shengfeng Pan and Ahmed Murtadha and Bo Wen and Yunfeng Liu},
year=2022,
eprint={2104.09864},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{gao2020pile,
title={The {Pile}: An 800GB Dataset of Diverse Text for Language Modeling},
author={Leo Gao and Stella Biderman and Sid Black and Laurence Golding and Travis Hoppe and Charles Foster and Jason Phang and Horace He and Anish Thite and Noa Nabeshima and Shawn Presser and Connor Leahy},
year=2020,
eprint={2101.00027},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{rombach2022highresolution,
title={High-Resolution Image Synthesis with Latent Diffusion Models},
author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
year=2022,
eprint={2112.10752},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{saharia2022photorealistic,
title={Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding},
author={Chitwan Saharia and William Chan and Saurabh Saxena and Lala Li and Jay Whang and Emily Denton and Seyed Kamyar Seyed Ghasemipour and Burcu Karagol Ayan and S. Sara Mahdavi and Rapha Gontijo Lopes and Tim Salimans and Jonathan Ho and David J Fleet and Mohammad Norouzi},
year=2022,
eprint={2205.11487},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{radford2021learning,
title={Learning Transferable Visual Models From Natural Language Supervision},
author={Alec Radford and Jong Wook Kim and Chris Hallacy and Aditya Ramesh and Gabriel Goh and Sandhini Agarwal and Girish Sastry and Amanda Askell and Pamela Mishkin and Jack Clark and Gretchen Krueger and Ilya Sutskever},
year=2021,
eprint={2103.00020},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{touvron2023llama,
title={{LLaMA}: Open and Efficient Foundation Language Models},
author={Hugo Touvron and Thibaut Lavril and Gautier Izacard and Xavier Martinet and Marie-Anne Lachaux and Timothée Lacroix and Baptiste Rozière and Naman Goyal and Eric Hambro and Faisal Azhar and Aurelien Rodriguez and Armand Joulin and Edouard Grave and Guillaume Lample},
year=2023,
eprint={2302.13971},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zhang2022opt,
title={{OPT}: Open Pre-trained Transformer Language Models},
author={Susan Zhang and Stephen Roller and Naman Goyal and Mikel Artetxe and Moya Chen and Shuohui Chen and Christopher Dewan and Mona Diab and Xian Li and Xi Victoria Lin and Todor Mihaylov and Myle Ott and Sam Shleifer and Kurt Shuster and Daniel Simig and Punit Singh Koura and Anjali Sridhar and Tianlu Wang and Luke Zettlemoyer},
year=2022,
eprint={2205.01068},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zhang2019root,
title={Root Mean Square Layer Normalisation},
author={Biao Zhang and Rico Sennrich},
year=2019,
eprint={1910.07467},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{shazeer2020glu,
title={{GLU} Variants Improve Transformer},
author={Noam Shazeer},
year=2020,
eprint={2002.05202},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{wang2023selfinstruct,
title={Self-Instruct: Aligning Language Models with Self-Generated Instructions},
author={Yizhong Wang and Yeganeh Kordi and Swaroop Mishra and Alisa Liu and Noah A. Smith and Daniel Khashabi and Hannaneh Hajishirzi},
year=2023,
eprint={2212.10560},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{chen2016training,
title={Training Deep Nets with Sublinear Memory Cost},
author={Tianqi Chen and Bing Xu and Chiyuan Zhang and Carlos Guestrin},
year=2016,
eprint={1604.06174},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{dao2022flashattention,
title={{FlashAttention}: Fast and Memory-Efficient Exact Attention with {IO}-Awareness},
author={Tri Dao and Daniel Y. Fu and Stefano Ermon and Atri Rudra and Christopher Ré},
year=2022,
eprint={2205.14135},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{zhang2023llamaadapter,
title={{LLaMA-Adapter}: Efficient Fine-tuning of Language Models with Zero-init Attention},
author={Renrui Zhang and Jiaming Han and Chris Liu and Peng Gao and Aojun Zhou and Xiangfei Hu and Shilin Yan and Pan Lu and Hongsheng Li and Yu Qiao},
year=2023,
eprint={2303.16199},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{köpf2023openassistant,
title={{OpenAssistant} Conversations -- Democratizing Large Language Model Alignment},
author={Andreas Köpf and Yannic Kilcher and Dimitri von Rütte and Sotiris Anagnostidis and Zhi-Rui Tam and Keith Stevens and Abdullah Barhoum and Nguyen Minh Duc and Oliver Stanley and Richárd Nagyfi and Shahul ES and Sameer Suri and David Glushkov and Arnav Dantuluri and Andrew Maguire and Christoph Schuhmann and Huu Nguyen and Alexander Mattick},
year=2023,
eprint={2304.07327},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{xu2023wizardlm,
title={{WizardLM}: Empowering Large Language Models to Follow Complex Instructions},
author={Can Xu and Qingfeng Sun and Kai Zheng and Xiubo Geng and Pu Zhao and Jiazhan Feng and Chongyang Tao and Daxin Jiang},
year=2023,
eprint={2304.12244},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{press2022train,
title={Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation},
author={Ofir Press and Noah A. Smith and Mike Lewis},
year=2022,
eprint={2108.12409},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{shazeer2019fast,
title={Fast Transformer Decoding: One Write-Head is All You Need},
author={Noam Shazeer},
year=2019,
eprint={1911.02150},
archivePrefix={arXiv},
primaryClass={cs.NE}
}
@misc{sonkar2023investigating,
title={Investigating the Role of Feed-Forward Networks in Transformers Using Parallel Attention and Feed-Forward Net Design},
author={Shashank Sonkar and Richard G. Baraniuk},
year=2023,
eprint={2305.13297},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{touvron2023llama2,
title={{LLaMA 2}: Open Foundation and Fine-Tuned Chat Models},
author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom},
year=2023,
eprint={2307.09288},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{chowdhery2022palm,
title={{PaLM}: Scaling Language Modeling with Pathways},
author={Aakanksha Chowdhery and Sharan Narang and Jacob Devlin and Maarten Bosma and Gaurav Mishra and Adam Roberts and Paul Barham and Hyung Won Chung and Charles Sutton and Sebastian Gehrmann and Parker Schuh and Kensen Shi and Sasha Tsvyashchenko and Joshua Maynez and Abhishek Rao and Parker Barnes and Yi Tay and Noam Shazeer and Vinodkumar Prabhakaran and Emily Reif and Nan Du and Ben Hutchinson and Reiner Pope and James Bradbury and Jacob Austin and Michael Isard and Guy Gur-Ari and Pengcheng Yin and Toju Duke and Anselm Levskaya and Sanjay Ghemawat and Sunipa Dev and Henryk Michalewski and Xavier Garcia and Vedant Misra and Kevin Robinson and Liam Fedus and Denny Zhou and Daphne Ippolito and David Luan and Hyeontaek Lim and Barret Zoph and Alexander Spiridonov and Ryan Sepassi and David Dohan and Shivani Agrawal and Mark Omernick and Andrew M. Dai and Thanumalayan Sankaranarayana Pillai and Marie Pellat and Aitor Lewkowycz and Erica Moreira and Rewon Child and Oleksandr Polozov and Katherine Lee and Zongwei Zhou and Xuezhi Wang and Brennan Saeta and Mark Diaz and Orhan Firat and Michele Catasta and Jason Wei and Kathy Meier-Hellstern and Douglas Eck and Jeff Dean and Slav Petrov and Noah Fiedel},
year=2022,
eprint={2204.02311},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{podell2023sdxl,
title={{SDXL}: Improving Latent Diffusion Models for High-Resolution Image Synthesis},
author={Dustin Podell and Zion English and Kyle Lacey and Andreas Blattmann and Tim Dockhorn and Jonas Müller and Joe Penna and Robin Rombach},
year=2023,
eprint={2307.01952},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{luo2023wizardcoder,
title={{WizardCoder}: Empowering Code Large Language Models with Evol-Instruct},
author={Ziyang Luo and Can Xu and Pu Zhao and Qingfeng Sun and Xiubo Geng and Wenxiang Hu and Chongyang Tao and Jing Ma and Qingwei Lin and Daxin Jiang},
year=2023,
eprint={2306.08568},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{rozière2023code,
title={Code {LLaMA}: Open Foundation Models for Code},
author={Baptiste Rozière and Jonas Gehring and Fabian Gloeckle and Sten Sootla and Itai Gat and Xiaoqing Ellen Tan and Yossi Adi and Jingyu Liu and Tal Remez and Jérémy Rapin and Artyom Kozhevnikov and Ivan Evtimov and Joanna Bitton and Manish Bhatt and Cristian Canton Ferrer and Aaron Grattafiori and Wenhan Xiong and Alexandre Défossez and Jade Copet and Faisal Azhar and Hugo Touvron and Louis Martin and Nicolas Usunier and Thomas Scialom and Gabriel Synnaeve},
year=2023,
eprint={2308.12950},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{henry2020querykey,
title={Query-Key Normalization for Transformers},
author={Alex Henry and Prudhvi Raj Dachapally and Shubham Pawar and Yuxuan Chen},
year=2020,
eprint={2010.04245},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{dehghani2023scaling,
title={Scaling Vision Transformers to 22 Billion Parameters},
author={Mostafa Dehghani and Josip Djolonga and Basil Mustafa and Piotr Padlewski and Jonathan Heek and Justin Gilmer and Andreas Steiner and Mathilde Caron and Robert Geirhos and Ibrahim Alabdulmohsin and Rodolphe Jenatton and Lucas Beyer and Michael Tschannen and Anurag Arnab and Xiao Wang and Carlos Riquelme and Matthias Minderer and Joan Puigcerver and Utku Evci and Manoj Kumar and Sjoerd van Steenkiste and Gamaleldin F. Elsayed and Aravindh Mahendran and Fisher Yu and Avital Oliver and Fantine Huot and Jasmijn Bastings and Mark Patrick Collier and Alexey Gritsenko and Vighnesh Birodkar and Cristina Vasconcelos and Yi Tay and Thomas Mensink and Alexander Kolesnikov and Filip Pavetić and Dustin Tran and Thomas Kipf and Mario Lučić and Xiaohua Zhai and Daniel Keysers and Jeremiah Harmsen and Neil Houlsby},
year=2023,
eprint={2302.05442},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{alayrac2022flamingo,
title={Flamingo: a Visual Language Model for Few-Shot Learning},
author={Jean-Baptiste Alayrac and Jeff Donahue and Pauline Luc and Antoine Miech and Iain Barr and Yana Hasson and Karel Lenc and Arthur Mensch and Katie Millican and Malcolm Reynolds and Roman Ring and Eliza Rutherford and Serkan Cabi and Tengda Han and Zhitao Gong and Sina Samangooei and Marianne Monteiro and Jacob Menick and Sebastian Borgeaud and Andrew Brock and Aida Nematzadeh and Sahand Sharifzadeh and Mikolaj Binkowski and Ricardo Barreira and Oriol Vinyals and Andrew Zisserman and Karen Simonyan},
year=2022,
eprint={2204.14198},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{wu2023bloomberggpt,
title={{BloombergGPT}: A Large Language Model for Finance},
author={Shijie Wu and Ozan Irsoy and Steven Lu and Vadim Dabravolski and Mark Dredze and Sebastian Gehrmann and Prabhanjan Kambadur and David Rosenberg and Gideon Mann},
year=2023,
eprint={2303.17564},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{jajal2023analysis,
title={Analysis of Failures and Risks in Deep Learning Model Converters: A Case Study in the {ONNX} Ecosystem},
author={Purvish Jajal and Wenxin Jiang and Arav Tewari and Joseph Woo and Yung-Hsiang Lu and George K. Thiruvathukal and James C. Davis},
year=2023,
eprint={2303.17708},
archivePrefix={arXiv},
primaryClass={cs.SE}
}
@misc{hoffmann2022training,
title={Training Compute-Optimal Large Language Models},
author={Jordan Hoffmann and Sebastian Borgeaud and Arthur Mensch and Elena Buchatskaya and Trevor Cai and Eliza Rutherford and Diego de Las Casas and Lisa Anne Hendricks and Johannes Welbl and Aidan Clark and Tom Hennigan and Eric Noland and Katie Millican and George van den Driessche and Bogdan Damoc and Aurelia Guy and Simon Osindero and Karen Simonyan and Erich Elsen and Jack W. Rae and Oriol Vinyals and Laurent Sifre},
year={2022},
eprint={2203.15556},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@online{medium-arlm,
title={Autoregressive ({AR}) Language Modeling},
author={Tony Jesuthasan},
year=2021,
url={https://tonyjesuthasan.medium.com/autoregressive-ar-language-modelling-c9fe5c20aa6e}
}