forked from premAI-io/state-of-open-source-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
1102 lines (1102 loc) · 51.5 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@book{prem_stateofosai,
title={State of Open Source {AI}},
author={{da Costa-Luis}, Casper and Nicola Sosio and Biswaroop Bhattacharjee and Skanda Vivek and Het Trivedi and Filippo Pedrazzini and {others}},
publisher={Prem},
edition={First},
editor={{da Costa-Luis}, Casper},
year=2023,
doi={10.5281/zenodo.10023181},
url={https://book.premai.io/state-of-open-source-ai}
}
@manual{python,
title={{Python}: A dynamic, open source programming language},
author={{Python Core Team}},
organization={{Python Software Foundation (PSF)}},
year=2019,
url={https://www.python.org}
}
@online{google-mlops,
title={{MLOps}: Continuous delivery and automation pipelines in machine learning},
author={{Google Cloud}},
year=2023,
url={https://cloud.google.com/architecture/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning}
}
@online{redhat-mlops,
title={Stages of {MLOps}},
author={{Red Hat, Inc}},
year=2023,
url={https://www.redhat.com/en/topics/ai/what-is-mlops#stages-of-mlops}
}
@online{ml-ops,
title={{MLOps} Principles},
author={{INNOQ}},
year=2023,
url={https://ml-ops.org/content/mlops-principles}
}
@incollection{willison-open,
booktitle={Catching up on the weird world of {LLMs}},
title={Openly licensed models},
author={Simon Willison},
year=2023,
url={https://simonwillison.net/2023/Aug/3/weird-world-of-llms/#openly-licensed-models}
}
@online{osi-licences,
title={{OSI} Approved Licenses},
author={{Open Source Initiative}},
year=2023,
url={https://opensource.org/licenses}
}
@incollection{box-models,
title={Robustness in the Strategy of Scientific Model Building},
author={G.E.P. Box},
year=1979,
booktitle={Robustness in Statistics},
editor={Robert L. Launer and Graham N. Wilkinson},
publisher={Academic Press},
pages={201-236},
isbn={978-0-12-438150-6},
doi={10.1016/B978-0-12-438150-6.50018-2}
}
@online{open-definition,
title={The Open Definition},
author={{The Open Knowledge Foundation}},
year=2023,
url={https://opendefinition.org}
}
@online{osd,
title={The Open Source Definition},
author={{Open Source Initiative}},
year=2023,
url={https://opensource.org/osd}
}
@online{wiki-copyleft,
title={Copyleft},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Copyleft}
}
@online{wiki-sw-licence,
title={Software license},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Software_license}
}
@online{cdcl-os-illegal,
title={Open Source is Illegal},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/os-is-illegal}
}
@online{linux-warranty,
title={The {US} military wants to understand the most important software on Earth},
author={Patrick Howell O'Neill},
year=2022,
journal={MIT Technology Review},
url={https://www.technologyreview.com/2022/07/14/1055894/us-military-sofware-linux-kernel-open-source}
}
@online{cdcl-policing-foss,
title={Policing {FOSS}},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/linux-foss-warranty}
}
@article{law-enforceability,
ISSN={00294624, 14680068},
doi={10.2307/2214413},
author={F. S. McNeilly},
journal={Noûs},
number={1},
pages={47--64},
publisher={Wiley},
title={The Enforceability of Law},
volume={2},
year=1968
}
@online{pytorch-vision-2597,
title={Is it legal to use pre-trained models for commercial purposes?},
author={Vladimir Iglovikov},
year=2023,
url={https://github.com/pytorch/vision/issues/2597}
}
@online{wiki-google-books-case,
title={{Authors Guild, Inc. v. Google, Inc.}},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Authors_Guild,_Inc._v._Google,_Inc.}
}
@article{nytimes-google-books-case,
title={Challenge to {Google Books} Is Declined by Supreme Court},
author={Adam Liptak and Alexandra Alter},
year=2016,
journal={The New York Times},
url={https://www.nytimes.com/2016/04/19/technology/google-books-case.html}
}
@online{wiki-google-oracle-case,
title={{Google LLC v. Oracle America, Inc.}},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Google_LLC_v._Oracle_America,_Inc.}
}
@online{wiki-fair-use,
title={Fair use},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Fair_use}
}
@online{wiki-fair-dealing,
title={Fair dealing},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Fair_dealing}
}
@online{wiki-limitations-copyright,
title={Limitations and exceptions to copyright},
author={{Wikipedia contributors}},
year=2023,
url={https://en.wikipedia.org/wiki/Limitations_and_exceptions_to_copyright}
}
@article{legalpdf-doe-github-case,
title={{DOE} v. {GitHub}: Original Complaint Pertaining to Copyright Infringement, Open Source Licenses & More},
author={{Legal PDF}},
year=2023,
journal={HackerNoon},
url={https://hackernoon.com/doe-v-github-original-complaint-pertaining-to-copyright-infringement-open-source-licenses-and-more}
}
@article{copilot-copyright-case,
title={{GitHub} accused of varying {Copilot} output to avoid copyright allegations},
author={Thomas Claburn},
year=2023,
journal={The Register},
url={https://www.theregister.com/2023/06/09/github_copilot_lawsuit}
}
@article{openai-privacy-case,
title={Microsoft, {OpenAI} sued for {$3B} after allegedly trampling privacy with {ChatGPT}},
author={Thomas Claburn},
year=2023,
journal={The Register},
url={https://www.theregister.com/2023/06/28/microsoft_openai_sued_privacy}
}
@online{openai-supported-countries,
title={Supported countries and territories},
author={{OpenAI}},
year=2023,
url={https://platform.openai.com/docs/supported-countries}
}
@online{cdcl-os-bad,
title={Open Source is Bad},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/os-is-bad}
}
@online{cra,
title={{Cyber Resilience Act}},
author={{European Commission}},
year=2022,
url={https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A52022PC0454}
}
%https://digital-strategy.ec.europa.eu/en/library/cyber-resilience-act
@online{pla,
title={{Product Liability Act}},
author={{European Commission}},
year=2022,
url={https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A52022PC0495}
}
@online{cdcl-cra-pla,
title={{CRA} & {PLA} Cybersecurity Laws Need Rewording},
author={{da Costa-Luis}, Casper},
year=2023,
url={https://tldr.cdcl.ml/CRA-PLA-cybersecurity-law-rewording-appeal}
}
@online{psf-cra,
title={The {EU}'s Proposed {CRA} Law May Have Unintended Consequences for the {Python} Ecosystem},
author={{The Python Software Foundation}},
year=2023,
url={https://pyfound.blogspot.com/2023/04/the-eus-proposed-cra-law-may-have.html}
}
@online{eclipse-cra,
title={Cyber Resilience Act: Good Intentions and Unintended Consequences},
author={Mike Milinkovich},
year=2023,
url={https://eclipse-foundation.blog/2023/02/23/cyber-resilience-act-good-intentions-and-unintended-consequences}
}
@online{nlnet-cra,
title={Open-source software vs. the proposed Cyber Resilience Act},
author={{NLnet Labs}},
year=2023,
url={https://blog.nlnetlabs.nl/open-source-software-vs-the-cyber-resilience-act}
}
@online{tidelift,
title={Maximise the health and security of the open source powering your applications},
author={{TideLift, Inc.}},
year=2023,
url={https://tidelift.com}
}
@online{numfocus,
title={A Nonprofit Supporting Open Code for Better Science},
author={{NumFOCUS, Inc.}},
year=2023,
url={https://numfocus.org}
}
@online{opencollective,
title={Raise and spend money with full transparency},
author={{Open Collective}},
year=2023,
url={https://opencollective.com}
}
@online{gh-sponsors,
title={Invest in the software that powers your world},
author={{GitHub, Inc.}},
year=2023,
url={https://github.com/sponsors}
}
@article{golden-age-os-end,
title={The Golden Age of Open Source in {AI} Is Coming to an End},
author={Clemens Mewald},
year=2023,
journal={Towards Data Science},
url={https://towardsdatascience.com/the-golden-age-of-open-source-in-ai-is-coming-to-an-end-7fd35a52b786}
}
@online{llama-2-licence,
title={Meta launches {LLaMA 2}, a source-available {AI} model that allows commercial applications},
author={Benj Edwards},
year=2023,
journal={Ars Technica},
url={https://arstechnica.com/information-technology/2023/07/meta-launches-llama-2-an-open-source-ai-model-that-allows-commercial-applications}
}
@online{falcon-relicence,
title={{UAE}'s {Falcon 40B} is now Royalty Free},
author={{Technology Innovation Institute}},
year=2023,
url={https://www.tii.ae/news/uaes-falcon-40b-now-royalty-free}
}
@online{machinelearningmastery-zero-few-shot,
title={What Are Zero-Shot Prompting and Few-Shot Prompting},
author={Adrian Tam},
year=2023,
url={https://machinelearningmastery.com/what-are-zero-shot-prompting-and-few-shot-prompting/}
}
@online{netenrich-fraudgpt,
title={{FraudGPT}: The Villain Avatar of {ChatGPT}},
author={Rakesh Krishnan},
year=2023,
url={https://netenrich.com/blog/fraudgpt-the-villain-avatar-of-chatgpt}
}
@online{labellerr-alignment,
title={How To Make Large Language Models Helpful, Harmless, and Honest},
author={Akshit Mehra},
year=2023,
url={https://www.labellerr.com/blog/alignment-tuning-ensuring-language-models-align-with-human-expectations-and-preferences}
}
@online{erichartford-uncensored,
title={Uncensored Models},
author={Eric Hartford},
year=2023,
url={https://erichartford.com/uncensored-models}
}
@online{cybercriminals-chatbots,
title={Cybercriminals train {AI} chatbots for phishing, malware attacks},
author={Bill Toulas},
year=2023,
url={https://www.bleepingcomputer.com/news/security/cybercriminals-train-ai-chatbots-for-phishing-malware-attacks}
}
@online{hackernoon-fraudgpt,
title={What Is {FraudGPT}?},
author={Zac Amos},
year=2023,
journal={HackerNoon},
url={https://hackernoon.com/what-is-fraudgpt}
}
@online{slashnext-wormgpt,
title={{WormGPT} -- The Generative {AI} Tool Cybercriminals Are Using to Launch Business Email Compromise Attacks},
author={Daniel Kelley},
year=2023,
url={https://slashnext.com/blog/wormgpt-the-generative-ai-tool-cybercriminals-are-using-to-launch-business-email-compromise-attacks}
}
@online{aitoolmall-poisongpt,
title={What is {PoisonGPT} and How Does It Work?},
author={Mandy},
year=2023,
url={https://aitoolmall.com/news/what-is-poisongpt}
}
@online{mithrilsecurity-poisongpt,
title={{PoisonGPT}: How we hid a lobotomised {LLM} on {Hugging Face} to spread fake news},
author={Daniel Huynh and Jade Hardouin},
year=2023,
url={https://blog.mithrilsecurity.io/poisongpt-how-we-hid-a-lobotomized-llm-on-hugging-face-to-spread-fake-news}
}
@misc{meng2023locating,
title={Locating and Editing Factual Associations in {GPT}},
author={Kevin Meng and David Bau and Alex Andonian and Yonatan Belinkov},
year=2023,
eprint={2202.05262},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{raunak2022rankone,
title={Rank-One Editing of Encoder-Decoder Models},
author={Vikas Raunak and Arul Menezes},
year=2022,
eprint={2211.13317},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{hartvigsen2022toxigen,
title={{ToxiGen}: A Large-Scale Machine-Generated Dataset for Adversarial and Implicit Hate Speech Detection},
author={Thomas Hartvigsen and Saadia Gabriel and Hamid Palangi and Maarten Sap and Dipankar Ray and Ece Kamar},
year=2022,
eprint={2203.09509},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@online{reddit-poisongpt,
title={{PoisonGPT}: Example of poisoning {LLM} supply chain to hide a lobotomized {LLM} on {Hugging Face} to spread fake news},
author={{Separate-Still3770}},
year=2023,
url={https://www.reddit.com/r/MachineLearning/comments/14v2zvg/p_poisongpt_example_of_poisoning_llm_supply_chain}
}
@online{falcon-180b,
title={New Open Source {LLM} With Zero Guardrails Rivals Google's {PaLM 2}},
author={Roger Montti},
year=2023,
journal={SearchEngineJournal},
url={https://www.searchenginejournal.com/new-open-source-llm-with-zero-guardrails-rivals-google-palm-2/496212}
}
@misc{penedo2023refinedweb,
title={The RefinedWeb Dataset for Falcon {LLM}: Outperforming Curated Corpora with Web Data, and Web Data Only},
author={Guilherme Penedo and Quentin Malartic and Daniel Hesslow and Ruxandra Cojocaru and Alessandro Cappelli and Hamza Alobeidli and Baptiste Pannier and Ebtesam Almazrouei and Julien Launay},
year=2023,
eprint={2306.01116},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{tang2023science,
title={The Science of Detecting {LLM}-Generated Texts},
author={Ruixiang Tang and Yu-Neng Chuang and Xia Hu},
year=2023,
eprint={2303.07205},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@incollection{Glazkova_2021,
doi={10.1007/978-3-030-73696-5_12},
year=2021,
publisher={Springer International Publishing},
pages={116--127},
author={Anna Glazkova and Maksim Glazkov and Timofey Trifonov},
title={{g2tmn} at {Constraint@AAAI2021}: Exploiting {CT}-{BERT} and Ensembling Learning for {COVID}-19 Fake News Detection},
booktitle={Combating Online Hostile Posts in Regional Languages during Emergency Situation}
}
@online{cuda-gpus,
title={Your {GPU} Compute Capability},
author={{NVIDIA Corporation}},
year=2023,
url={https://developer.nvidia.com/cuda-gpus}
}
@online{mlops-challenges,
title={Pros and Cons of Open-Source and Managed {MLOps} Platforms},
author={Valohai Inc},
year=2022,
url={https://valohai.com/managed-vs-open-source-mlops}
}
@online{nvidia-gpu-inference,
title={Supercharging {AI} Video and {AI} Inference Performance with {NVIDIA L4 GPUs}},
author={Nvidia Corp},
year=2023,
url={https://developer.nvidia.com/blog/supercharging-ai-video-and-ai-inference-performance-with-nvidia-l4-gpus}
}
@online{cohere-triton,
title={Cohere Boosts Inference Speed With {NVIDIA} Triton Inference Server},
author={Bharat Venkitesh},
year=2022,
url={https://txt.cohere.com/nvidia-boosts-inference-speed-with-cohere}
}
@online{cursor-llama,
title={Why {GPT-3.5} is (mostly) cheaper than {LLaMA-2}},
author={Aman},
year=2023,
url={https://cursor.sh/blog/llama-inference}
}
@online{vector-indexing,
title={Vector databases: Not all indexes are created equal},
author={Prashanth Rao},
year=2023,
url={https://thedataquarry.com/posts/vector-db-3}
}
@online{vector-quantisation,
title={Product Quantisation: Compressing high-dimensional vectors by 97\%},
author={Pinecone Systems, Inc},
year=2023,
url={https://www.pinecone.io/learn/series/faiss/product-quantization}
}
@online{unstructured-data-in-the-world,
title={How Much Data in the World Is Unstructured?},
author={Marcel Deer},
year=2023,
url={https://www.unleash.so/a/answers/database-management/how-much-data-in-the-world-is-unstructured}
}
@online{understanding-vector-database-algorithms,
title={Vector Databases: Understanding the Algorithm (part 3)},
author={David Gutsch},
year=2023,
journal={Medium},
url={https://medium.com/@david.gutsch0/vector-databases-understanding-the-algorithm-part-3-bc7a8926f27c}
}
@online{tidepool-citation,
title={Why You (Probably) Don't Need to Fine-tune an {LLM}},
author={Jessica Yao},
year=2023,
url={http://www.tidepool.so/2023/08/17/why-you-probably-dont-need-to-fine-tune-an-llm}
}
@online{octoml-fine-tuning,
title={The beginner's guide to fine-tuning Stable Diffusion},
author={Justin Gage},
year=2023,
url={https://octoml.ai/blog/the-beginners-guide-to-fine-tuning-stable-diffusion}
}
@article{small-data-tds,
title={Is "Small Data" The Next Big Thing In Data Science?},
author={Wouter Van Heeswijk},
year=2022,
journal={Towards Data Science},
url={https://towardsdatascience.com/is-small-data-the-next-big-thing-in-data-science-9acc7f24907f}
}
@misc{clark2018think,
title={Think you have Solved Question Answering? Try {ARC}, the {AI2} Reasoning Challenge},
author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
year=2018,
eprint={1803.05457},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
@misc{zellers2019hellaswag,
title={{HellaSwag}: Can a Machine Really Finish Your Sentence?},
author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
year=2019,
eprint={1905.07830},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{hendrycks2020measuring,
title={Measuring Massive Multitask Language Understanding},
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
year=2021,
eprint={2009.03300},
archivePrefix={arXiv},
primaryClass={cs.CY}
}
@misc{lin2021truthfulqa,
title={{TruthfulQA}: Measuring How Models Mimic Human Falsehoods},
author={Stephanie Lin and Jacob Hilton and Owain Evans},
year=2022,
eprint={2109.07958},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zheng2023judging,
title={Judging {LLM-as-a-judge} with {MT-Bench} and {Chatbot Arena}},
author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric. P Xing and Hao Zhang and Joseph E. Gonzalez and Ion Stoica},
year=2023,
eprint={2306.05685},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{chen2021evaluating,
title={Evaluating Large Language Models Trained on Code},
author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba},
year=2021,
eprint={2107.03374},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@online{evaluate-llm,
title={How to Evaluate a Large Language Model ({LLM})?},
author={Gyan Prakash Tripathi},
year=2023,
url={https://www.analyticsvidhya.com/blog/2023/05/how-to-evaluate-a-large-language-model-llm}
}
@article{hand2006classifier,
title={Classifier technology and the illusion of progress},
author={Hand, David J},
journal={Statistical Science},
year=2006
}
@article{manning2022human,
title={Human language understanding \& reasoning},
author={Manning, Christopher D},
journal={Daedalus},
volume={151},
number={2},
pages={127--138},
year=2022,
publisher={MIT Press}
}
@online{evaluating-chatgpt,
title={Evaluating {chatGPT}},
author={Ehud Reiter},
year=2023,
url={https://ehudreiter.com/2023/04/04/evaluating-chatgpt}
}
@online{skanda-evaluating-llm,
title={How Do You Evaluate Large Language Model Apps — When 99\% is just not good enough?},
author={Skanda Vivek},
year=2023,
url={https://skandavivek.substack.com/p/how-do-you-evaluate-large-language}
}
@online{better-data-better-performance,
title={The History of Open-Source {LLMs}: Better Base Models (Part Two)},
author={Cameron R. Wolfe},
year=2023,
url={https://cameronrwolfe.substack.com/i/135439692/better-data-better-performance}
}
@online{evaluating-os-llm,
title={Evaluating Open-Source Large Language Models},
author={Trivedi, Het and {da Costa-Luis}, Casper},
year=2023,
url={https://dev.premai.io/blog/evaluating-open-source-llms/#picking-the-rightllm}
}
@article{stevens2005line,
title={On-line experimental methods to evaluate text-to-speech ({TTS}) synthesis: effects of voice gender and signal quality on intelligibility, naturalness and preference},
author={Stevens, Catherine and Lees, Nicole and Vonwiller, Julie and Burnham, Denis},
journal={Computer speech \& language},
volume={19},
number={2},
pages={129--146},
year=2005,
publisher={Elsevier}
}
@article{benzeghiba2007automatic,
title={Automatic speech recognition and speech variability: A review},
author={Benzeghiba, Mohamed and De Mori, Renato and Deroo, Olivier and Dupont, Stephane and Erbes, Teodora and Jouvet, Denis and Fissore, Luciano and Laface, Pietro and Mertins, Alfred and Ris, Christophe and others},
journal={Speech communication},
volume={49},
number={10-11},
pages={763--786},
year=2007,
publisher={Elsevier}
}
@misc{rudin2021interpretable,
title={Interpretable Machine Learning: Fundamental Principles and 10 Grand Challenges},
author={Cynthia Rudin and Chaofan Chen and Zhi Chen and Haiyang Huang and Lesia Semenova and Chudi Zhong},
year=2021,
eprint={2103.11251},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@inproceedings{conneau2023fleurs,
title={{FLEURS}: Few-shot learning evaluation of universal representations of speech},
author={Conneau, Alexis and Ma, Min and Khanuja, Simran and Zhang, Yu and Axelrod, Vera and Dalmia, Siddharth and Riesa, Jason and Rivera, Clara and Bapna, Ankur},
booktitle={2022 {IEEE} Spoken Language Technology Workshop ({SLT})},
pages={798--805},
year=2023,
organization={IEEE}
}
@inproceedings{pratap2020mls,
doi={10.21437/interspeech.2020-2826},
year=2020,
month={oct},
publisher={{ISCA}},
author={Vineel Pratap and Qiantong Xu and Anuroop Sriram and Gabriel Synnaeve and Ronan Collobert},
title={{MLS}: A Large-Scale Multilingual Dataset for Speech Research},
booktitle={Interspeech 2020}
}
@misc{ardila2019common,
title={{Common Voice}: A Massively-Multilingual Speech Corpus},
author={Rosana Ardila and Megan Branson and Kelly Davis and Michael Henretty and Michael Kohler and Josh Meyer and Reuben Morais and Lindsay Saunders and Francis M. Tyers and Gregor Weber},
year=2020,
eprint={1912.06670},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{ljspeech17,
author={Keith Ito and Linda Johnson},
title={The {LJ Speech} Dataset},
url={https://keithito.com/LJ-Speech-Dataset},
year=2017
}
@misc{zen2019libritts,
title={{LibriTTS}: A Corpus Derived from LibriSpeech for Text-to-Speech},
author={Heiga Zen and Viet Dang and Rob Clark and Yu Zhang and Ron J. Weiss and Ye Jia and Zhifeng Chen and Yonghui Wu},
year=2019,
eprint={1904.02882},
archivePrefix={arXiv},
primaryClass={cs.SD}
}
@misc{gandhi2022esb,
title={{ESB}: A Benchmark For Multi-Domain End-to-End Speech Recognition},
author={Sanchit Gandhi and Patrick von Platen and Alexander M. Rush},
year=2022,
eprint={2210.13352},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{lin2015microsoft,
title={Microsoft {COCO}: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year=2015,
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@inproceedings{deng2009imagenet,
title={{ImageNet}: A large-scale hierarchical image database},
author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li},
booktitle={{IEEE} {CVPR}},
pages={248--255},
year=2009,
organization={IEEE}
}
@inproceedings{zhou2017scene,
title={Scene parsing through ade20k dataset},
author={Zhou, Bolei and Zhao, Hang and Puig, Xavier and Fidler, Sanja and Barriuso, Adela and Torralba, Antonio},
booktitle={{IEEE} {CVPR}},
pages={633--641},
year=2017
}
@misc{wang2023diffusiondb,
title={{DiffusionDB}: A Large-scale Prompt Gallery Dataset for Text-to-Image Generative Models},
author={Zijie J. Wang and Evan Montoya and David Munechika and Haoyang Yang and Benjamin Hoover and Duen Horng Chau},
year=2023,
eprint={2210.14896},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{dubois2023alpacafarm,
title={{AlpacaFarm}: A Simulation Framework for Methods that Learn from Human Feedback},
author={Yann Dubois and Xuechen Li and Rohan Taori and Tianyi Zhang and Ishaan Gulrajani and Jimmy Ba and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto},
year=2023,
eprint={2305.14387},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{muennighoff2023mteb,
title={{MTEB}: Massive Text Embedding Benchmark},
author={Niklas Muennighoff and Nouamane Tazi and Loïc Magne and Nils Reimers},
year=2023,
eprint={2210.07316},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@inproceedings{xu2016msr,
title={{MSR-VTT}: A large video description dataset for bridging video and language},
author={Xu, Jun and Mei, Tao and Yao, Ting and Rui, Yong},
booktitle={{IEEE} {CVPR}},
pages={5288--5296},
year=2016
}
@misc{soomro2012ucf101,
title={{UCF101}: A Dataset of 101 Human Actions Classes From Videos in The Wild},
author={Khurram Soomro and Amir Roshan Zamir and Mubarak Shah},
year=2012,
eprint={1212.0402},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@online{building-llm-applications,
title={Building {LLM} applications for production},
author={Chip Huyen},
year=2023,
url={https://huyenchip.com/2023/04/11/llm-engineering.html}
}
@inproceedings{papineni2002bleu,
title={{BLEU}: a method for automatic evaluation of machine translation},
author={Papineni, Kishore and Roukos, Salim and Ward, Todd and Zhu, Wei-Jing},
booktitle={40th Assoc. Computational Linguistics},
pages={311--318},
year=2002
}
@inproceedings{lin-2004-rouge,
title={{ROUGE}: A Package for Automatic Evaluation of Summaries},
author={Lin, Chin-Yew},
booktitle={Text Summarisation Branches Out},
year=2004,
address={Barcelona, Spain},
publisher={Assoc. Computational Linguistics},
url={https://aclanthology.org/W04-1013},
pages={74--81}
}
@inproceedings{banerjee-lavie-2005-meteor,
title={{METEOR}: An Automatic Metric for {MT} Evaluation with Improved Correlation with Human Judgments},
author={Banerjee, Satanjeev and Lavie, Alon},
booktitle={{ACL} Intrinsic & Extrinsic Eval. Measures Mach. Translat. Sum.},
year=2005,
address={Ann Arbor, Michigan},
publisher={Assoc. Computational Linguistics},
url={https://aclanthology.org/W05-0909},
pages={65--72}
}
@misc{srivastava2023imitation,
title={Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models},
author={Aarohi Srivastava and Abhinav Rastogi and Abhishek Rao and Abu Awal Md Shoeb and Abubakar Abid and Adam Fisch and Adam R. Brown and Adam Santoro and Aditya Gupta and Adrià Garriga-Alonso and Agnieszka Kluska and Aitor Lewkowycz and Akshat Agarwal and Alethea Power and Alex Ray and Alex Warstadt and Alexander W. Kocurek and Ali Safaya and Ali Tazarv and Alice Xiang and Alicia Parrish and Allen Nie and Aman Hussain and Amanda Askell and Amanda Dsouza and Ambrose Slone and Ameet Rahane and Anantharaman S. Iyer and Anders Andreassen and Andrea Madotto and Andrea Santilli and Andreas Stuhlmüller and Andrew Dai and Andrew La and Andrew Lampinen and Andy Zou and Angela Jiang and Angelica Chen and Anh Vuong and Animesh Gupta and Anna Gottardi and Antonio Norelli and Anu Venkatesh and Arash Gholamidavoodi and Arfa Tabassum and Arul Menezes and Arun Kirubarajan and Asher Mullokandov and Ashish Sabharwal and Austin Herrick and Avia Efrat and Aykut Erdem and Ayla Karakaş and B. Ryan Roberts and Bao Sheng Loe and Barret Zoph and Bartłomiej Bojanowski and Batuhan Özyurt and Behnam Hedayatnia and Behnam Neyshabur and Benjamin Inden and Benno Stein and Berk Ekmekci and Bill Yuchen Lin and Blake Howald and Bryan Orinion and Cameron Diao and Cameron Dour and Catherine Stinson and Cedrick Argueta and César Ferri Ramírez and Chandan Singh and Charles Rathkopf and Chenlin Meng and Chitta Baral and Chiyu Wu and Chris Callison-Burch and Chris Waites and Christian Voigt and Christopher D. Manning and Christopher Potts and Cindy Ramirez and Clara E. Rivera and Clemencia Siro and Colin Raffel and Courtney Ashcraft and Cristina Garbacea and Damien Sileo and Dan Garrette and Dan Hendrycks and Dan Kilman and Dan Roth and Daniel Freeman and Daniel Khashabi and Daniel Levy and Daniel Moseguí González and Danielle Perszyk and Danny Hernandez and Danqi Chen and Daphne Ippolito and Dar Gilboa and David Dohan and David Drakard and David Jurgens and Debajyoti Datta and Deep Ganguli and Denis Emelin and Denis Kleyko and Deniz Yuret and Derek Chen and Derek Tam and Dieuwke Hupkes and Diganta Misra and Dilyar Buzan and Dimitri Coelho Mollo and Diyi Yang and Dong-Ho Lee and Dylan Schrader and Ekaterina Shutova and Ekin Dogus Cubuk and Elad Segal and Eleanor Hagerman and Elizabeth Barnes and Elizabeth Donoway and Ellie Pavlick and Emanuele Rodola and Emma Lam and Eric Chu and Eric Tang and Erkut Erdem and Ernie Chang and Ethan A. Chi and Ethan Dyer and Ethan Jerzak and Ethan Kim and Eunice Engefu Manyasi and Evgenii Zheltonozhskii and Fanyue Xia and Fatemeh Siar and Fernando Martínez-Plumed and Francesca Happé and Francois Chollet and Frieda Rong and Gaurav Mishra and Genta Indra Winata and Gerard de Melo and Germán Kruszewski and Giambattista Parascandolo and Giorgio Mariani and Gloria Wang and Gonzalo Jaimovitch-López and Gregor Betz and Guy Gur-Ari and Hana Galijasevic and Hannah Kim and Hannah Rashkin and Hannaneh Hajishirzi and Harsh Mehta and Hayden Bogar and Henry Shevlin and Hinrich Schütze and Hiromu Yakura and Hongming Zhang and Hugh Mee Wong and Ian Ng and Isaac Noble and Jaap Jumelet and Jack Geissinger and Jackson Kernion and Jacob Hilton and Jaehoon Lee and Jaime Fernández Fisac and James B. Simon and James Koppel and James Zheng and James Zou and Jan Kocoń and Jana Thompson and Janelle Wingfield and Jared Kaplan and Jarema Radom and Jascha Sohl-Dickstein and Jason Phang and Jason Wei and Jason Yosinski and Jekaterina Novikova and Jelle Bosscher and Jennifer Marsh and Jeremy Kim and Jeroen Taal and Jesse Engel and Jesujoba Alabi and Jiacheng Xu and Jiaming Song and Jillian Tang and Joan Waweru and John Burden and John Miller and John U. Balis and Jonathan Batchelder and Jonathan Berant and Jörg Frohberg and Jos Rozen and Jose Hernandez-Orallo and Joseph Boudeman and Joseph Guerr and Joseph Jones and Joshua B. Tenenbaum and Joshua S. Rule and Joyce Chua and Kamil Kanclerz and Karen Livescu and Karl Krauth and Karthik Gopalakrishnan and Katerina Ignatyeva and Katja Markert and Kaustubh D. Dhole and Kevin Gimpel and Kevin Omondi and Kory Mathewson and Kristen Chiafullo and Ksenia Shkaruta and Kumar Shridhar and Kyle McDonell and Kyle Richardson and Laria Reynolds and Leo Gao and Li Zhang and Liam Dugan and Lianhui Qin and Lidia Contreras-Ochando and Louis-Philippe Morency and Luca Moschella and Lucas Lam and Lucy Noble and Ludwig Schmidt and Luheng He and Luis Oliveros Colón and Luke Metz and Lütfi Kerem Şenel and Maarten Bosma and Maarten Sap and Maartje ter Hoeve and Maheen Farooqi and Manaal Faruqui and Mantas Mazeika and Marco Baturan and Marco Marelli and Marco Maru and Maria Jose Ramírez Quintana and Marie Tolkiehn and Mario Giulianelli and Martha Lewis and Martin Potthast and Matthew L. Leavitt and Matthias Hagen and Mátyás Schubert and Medina Orduna Baitemirova and Melody Arnaud and Melvin McElrath and Michael A. Yee and Michael Cohen and Michael Gu and Michael Ivanitskiy and Michael Starritt and Michael Strube and Michał Swędrowski and Michele Bevilacqua and Michihiro Yasunaga and Mihir Kale and Mike Cain and Mimee Xu and Mirac Suzgun and Mitch Walker and Mo Tiwari and Mohit Bansal and Moin Aminnaseri and Mor Geva and Mozhdeh Gheini and Mukund Varma T and Nanyun Peng and Nathan A. Chi and Nayeon Lee and Neta Gur-Ari Krakover and Nicholas Cameron and Nicholas Roberts and Nick Doiron and Nicole Martinez and Nikita Nangia and Niklas Deckers and Niklas Muennighoff and Nitish Shirish Keskar and Niveditha S. Iyer and Noah Constant and Noah Fiedel and Nuan Wen and Oliver Zhang and Omar Agha and Omar Elbaghdadi and Omer Levy and Owain Evans and Pablo Antonio Moreno Casares and Parth Doshi and Pascale Fung and Paul Pu Liang and Paul Vicol and Pegah Alipoormolabashi and Peiyuan Liao and Percy Liang and Peter Chang and Peter Eckersley and Phu Mon Htut and Pinyu Hwang and Piotr Miłkowski and Piyush Patil and Pouya Pezeshkpour and Priti Oli and Qiaozhu Mei and Qing Lyu and Qinlang Chen and Rabin Banjade and Rachel Etta Rudolph and Raefer Gabriel and Rahel Habacker and Ramon Risco and Raphaël Millière and Rhythm Garg and Richard Barnes and Rif A. Saurous and Riku Arakawa and Robbe Raymaekers and Robert Frank and Rohan Sikand and Roman Novak and Roman Sitelew and Ronan LeBras and Rosanne Liu and Rowan Jacobs and Rui Zhang and Ruslan Salakhutdinov and Ryan Chi and Ryan Lee and Ryan Stovall and Ryan Teehan and Rylan Yang and Sahib Singh and Saif M. Mohammad and Sajant Anand and Sam Dillavou and Sam Shleifer and Sam Wiseman and Samuel Gruetter and Samuel R. Bowman and Samuel S. Schoenholz and Sanghyun Han and Sanjeev Kwatra and Sarah A. Rous and Sarik Ghazarian and Sayan Ghosh and Sean Casey and Sebastian Bischoff and Sebastian Gehrmann and Sebastian Schuster and Sepideh Sadeghi and Shadi Hamdan and Sharon Zhou and Shashank Srivastava and Sherry Shi and Shikhar Singh and Shima Asaadi and Shixiang Shane Gu and Shubh Pachchigar and Shubham Toshniwal and Shyam Upadhyay and Shyamolima and Debnath and Siamak Shakeri and Simon Thormeyer and Simone Melzi and Siva Reddy and Sneha Priscilla Makini and Soo-Hwan Lee and Spencer Torene and Sriharsha Hatwar and Stanislas Dehaene and Stefan Divic and Stefano Ermon and Stella Biderman and Stephanie Lin and Stephen Prasad and Steven T. Piantadosi and Stuart M. Shieber and Summer Misherghi and Svetlana Kiritchenko and Swaroop Mishra and Tal Linzen and Tal Schuster and Tao Li and Tao Yu and Tariq Ali and Tatsu Hashimoto and Te-Lin Wu and Théo Desbordes and Theodore Rothschild and Thomas Phan and Tianle Wang and Tiberius Nkinyili and Timo Schick and Timofei Kornev and Titus Tunduny and Tobias Gerstenberg and Trenton Chang and Trishala Neeraj and Tushar Khot and Tyler Shultz and Uri Shaham and Vedant Misra and Vera Demberg and Victoria Nyamai and Vikas Raunak and Vinay Ramasesh and Vinay Uday Prabhu and Vishakh Padmakumar and Vivek Srikumar and William Fedus and William Saunders and William Zhang and Wout Vossen and Xiang Ren and Xiaoyu Tong and Xinran Zhao and Xinyi Wu and Xudong Shen and Yadollah Yaghoobzadeh and Yair Lakretz and Yangqiu Song and Yasaman Bahri and Yejin Choi and Yichi Yang and Yiding Hao and Yifu Chen and Yonatan Belinkov and Yu Hou and Yufang Hou and Yuntao Bai and Zachary Seid and Zhuoye Zhao and Zijian Wang and Zijie J. Wang and Zirui Wang and Ziyi Wu},
year=2023,
eprint={2206.04615},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{wang2019glue,
title={{GLUE}: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},
author={Alex Wang and Amanpreet Singh and Julian Michael and Felix Hill and Omer Levy and Samuel R. Bowman},
year=2019,
eprint={1804.07461},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{sarlin2020superglue,
title={{SuperGlue}: Learning Feature Matching with Graph Neural Networks},
author={Paul-Edouard Sarlin and Daniel DeTone and Tomasz Malisiewicz and Andrew Rabinovich},
year=2020,
eprint={1911.11763},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{nie2020adversarial,
title={Adversarial {NLI}: A New Benchmark for Natural Language Understanding},
author={Yixin Nie and Adina Williams and Emily Dinan and Mohit Bansal and Jason Weston and Douwe Kiela},
year=2020,
eprint={1910.14599},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{reddy2019coqa,
title={{CoQA}: A Conversational Question Answering Challenge},
author={Siva Reddy and Danqi Chen and Christopher D. Manning},
year=2019,
eprint={1808.07042},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{paperno2016lambada,
title={The {LAMBADA} dataset: Word prediction requiring a broad discourse context},
author={Denis Paperno and Germán Kruszewski and Angeliki Lazaridou and Quan Ngoc Pham and Raffaella Bernardi and Sandro Pezzelle and Marco Baroni and Gemma Boleda and Raquel Fernández},
year=2016,
eprint={1606.06031},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{liu2020logiqa,
title={{LogiQA}: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning},
author={Jian Liu and Leyang Cui and Hanmeng Liu and Dandan Huang and Yile Wang and Yue Zhang},
year=2020,
eprint={2007.08124},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{williams2018broadcoverage,
title={A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
author={Adina Williams and Nikita Nangia and Samuel R. Bowman},
year=2018,
eprint={1704.05426},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{rajpurkar2016squad,
title={{SQuAD}: 100,000+ Questions for Machine Comprehension of Text},
author={Pranav Rajpurkar and Jian Zhang and Konstantin Lopyrev and Percy Liang},
year=2016,
eprint={1606.05250},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@online{myth-of-os-ai-wired,
title={The Myth of Open Source {AI}},
author={Will Knight},
year=2023,
url={https://www.wired.com/story/the-myth-of-open-source-ai},
}
@online{reversal-curse,
title={The Reversal Curse: {LLMs} trained on {"A is B"} fail to learn {"B is A"}},
year=2023,
url={https://twitter.com/OwainEvans_UK/status/1705285631520407821},
}
@online{lambert2022illustrating,
title={Illustrating Reinforcement Learning from Human Feedback ({RLHF})},
author={Lambert, Nathan and Castricato, Louis and von Werra, Leandro and Havrilla, Alex},
journal={Hugging Face Blog},
year=2022,
url={https://huggingface.co/blog/rlhf},
}
@misc{child2019generating,
title={Generating Long Sequences with Sparse Transformers},
author={Rewon Child and Scott Gray and Alec Radford and Ilya Sutskever},
year=2019,
eprint={1904.10509},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{beltagy2020longformer,
title={Longformer: The Long-Document Transformer},
author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
year=2020,
eprint={2004.05150},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{ainslie2023gqa,
title={{GQA}: Training Generalised Multi-Query Transformer Models from Multi-Head Checkpoints},
author={Joshua Ainslie and James Lee-Thorp and Michiel de Jong and Yury Zemlyanskiy and Federico Lebrón and Sumit Sanghai},
year=2023,
eprint={2305.13245},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{schneider2019wav2vec,
title={{wav2vec}: Unsupervised Pre-training for Speech Recognition},
author={Steffen Schneider and Alexei Baevski and Ronan Collobert and Michael Auli},
year=2019,
eprint={1904.05862},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zhao2023survey,
title={A Survey of Large Language Models},
author={Wayne Xin Zhao and Kun Zhou and Junyi Li and Tianyi Tang and Xiaolei Wang and Yupeng Hou and Yingqian Min and Beichen Zhang and Junjie Zhang and Zican Dong and Yifan Du and Chen Yang and Yushuo Chen and Zhipeng Chen and Jinhao Jiang and Ruiyang Ren and Yifan Li and Xinyu Tang and Zikang Liu and Peiyu Liu and Jian-Yun Nie and Ji-Rong Wen},
year=2023,
eprint={2303.18223},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{black2022gptneox20b,
title={{GPT-NeoX 20B}: An Open-Source Autoregressive Language Model},
author={Sid Black and Stella Biderman and Eric Hallahan and Quentin Anthony and Leo Gao and Laurence Golding and Horace He and Connor Leahy and Kyle McDonell and Jason Phang and Michael Pieler and USVSN Sai Prashanth and Shivanshu Purohit and Laria Reynolds and Jonathan Tow and Ben Wang and Samuel Weinbach},
year=2022,
eprint={2204.06745},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{su2022roformer,
title={{RoFormer}: Enhanced Transformer with Rotary Position Embedding},
author={Jianlin Su and Yu Lu and Shengfeng Pan and Ahmed Murtadha and Bo Wen and Yunfeng Liu},
year=2022,
eprint={2104.09864},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{gao2020pile,
title={The {Pile}: An 800GB Dataset of Diverse Text for Language Modeling},
author={Leo Gao and Stella Biderman and Sid Black and Laurence Golding and Travis Hoppe and Charles Foster and Jason Phang and Horace He and Anish Thite and Noa Nabeshima and Shawn Presser and Connor Leahy},
year=2020,
eprint={2101.00027},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{rombach2022highresolution,
title={High-Resolution Image Synthesis with Latent Diffusion Models},
author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
year=2022,
eprint={2112.10752},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{saharia2022photorealistic,
title={Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding},
author={Chitwan Saharia and William Chan and Saurabh Saxena and Lala Li and Jay Whang and Emily Denton and Seyed Kamyar Seyed Ghasemipour and Burcu Karagol Ayan and S. Sara Mahdavi and Rapha Gontijo Lopes and Tim Salimans and Jonathan Ho and David J Fleet and Mohammad Norouzi},
year=2022,
eprint={2205.11487},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{radford2021learning,
title={Learning Transferable Visual Models From Natural Language Supervision},
author={Alec Radford and Jong Wook Kim and Chris Hallacy and Aditya Ramesh and Gabriel Goh and Sandhini Agarwal and Girish Sastry and Amanda Askell and Pamela Mishkin and Jack Clark and Gretchen Krueger and Ilya Sutskever},
year=2021,
eprint={2103.00020},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{touvron2023llama,
title={{LLaMA}: Open and Efficient Foundation Language Models},
author={Hugo Touvron and Thibaut Lavril and Gautier Izacard and Xavier Martinet and Marie-Anne Lachaux and Timothée Lacroix and Baptiste Rozière and Naman Goyal and Eric Hambro and Faisal Azhar and Aurelien Rodriguez and Armand Joulin and Edouard Grave and Guillaume Lample},
year=2023,
eprint={2302.13971},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zhang2022opt,
title={{OPT}: Open Pre-trained Transformer Language Models},
author={Susan Zhang and Stephen Roller and Naman Goyal and Mikel Artetxe and Moya Chen and Shuohui Chen and Christopher Dewan and Mona Diab and Xian Li and Xi Victoria Lin and Todor Mihaylov and Myle Ott and Sam Shleifer and Kurt Shuster and Daniel Simig and Punit Singh Koura and Anjali Sridhar and Tianlu Wang and Luke Zettlemoyer},
year=2022,
eprint={2205.01068},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zhang2019root,
title={Root Mean Square Layer Normalisation},
author={Biao Zhang and Rico Sennrich},
year=2019,
eprint={1910.07467},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{shazeer2020glu,
title={{GLU} Variants Improve Transformer},
author={Noam Shazeer},
year=2020,
eprint={2002.05202},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{wang2023selfinstruct,
title={Self-Instruct: Aligning Language Models with Self-Generated Instructions},
author={Yizhong Wang and Yeganeh Kordi and Swaroop Mishra and Alisa Liu and Noah A. Smith and Daniel Khashabi and Hannaneh Hajishirzi},
year=2023,
eprint={2212.10560},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{chen2016training,
title={Training Deep Nets with Sublinear Memory Cost},
author={Tianqi Chen and Bing Xu and Chiyuan Zhang and Carlos Guestrin},
year=2016,
eprint={1604.06174},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{dao2022flashattention,
title={{FlashAttention}: Fast and Memory-Efficient Exact Attention with {IO}-Awareness},
author={Tri Dao and Daniel Y. Fu and Stefano Ermon and Atri Rudra and Christopher Ré},
year=2022,
eprint={2205.14135},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{zhang2023llamaadapter,
title={{LLaMA-Adapter}: Efficient Fine-tuning of Language Models with Zero-init Attention},
author={Renrui Zhang and Jiaming Han and Chris Liu and Peng Gao and Aojun Zhou and Xiangfei Hu and Shilin Yan and Pan Lu and Hongsheng Li and Yu Qiao},
year=2023,
eprint={2303.16199},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{köpf2023openassistant,
title={{OpenAssistant} Conversations -- Democratizing Large Language Model Alignment},
author={Andreas Köpf and Yannic Kilcher and Dimitri von Rütte and Sotiris Anagnostidis and Zhi-Rui Tam and Keith Stevens and Abdullah Barhoum and Nguyen Minh Duc and Oliver Stanley and Richárd Nagyfi and Shahul ES and Sameer Suri and David Glushkov and Arnav Dantuluri and Andrew Maguire and Christoph Schuhmann and Huu Nguyen and Alexander Mattick},
year=2023,
eprint={2304.07327},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{xu2023wizardlm,
title={{WizardLM}: Empowering Large Language Models to Follow Complex Instructions},
author={Can Xu and Qingfeng Sun and Kai Zheng and Xiubo Geng and Pu Zhao and Jiazhan Feng and Chongyang Tao and Daxin Jiang},
year=2023,
eprint={2304.12244},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{press2022train,
title={Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation},
author={Ofir Press and Noah A. Smith and Mike Lewis},
year=2022,
eprint={2108.12409},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{shazeer2019fast,
title={Fast Transformer Decoding: One Write-Head is All You Need},
author={Noam Shazeer},
year=2019,
eprint={1911.02150},
archivePrefix={arXiv},
primaryClass={cs.NE}
}