-
Notifications
You must be signed in to change notification settings - Fork 4
/
leili.bib
2937 lines (2694 loc) · 244 KB
/
leili.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@InProceedings{xu2024cmus,
author = {Xu, Xi and Ouyang, Siqi and Yan, Brian and Fernandes, Patrick and Chen, William and Li, Lei and Neubig, Graham and Watanabe, Shinji},
booktitle = {Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)},
title = {{CMU}{'}s {IWSLT} 2024 Simultaneous Speech Translation System},
year = {2024},
month = aug,
pages = {154--159},
abstract = {This paper describes CMU{'}s submission to the IWSLT 2024 Simultaneous Speech Translation (SST) task for translating English speech to German text in a streaming manner. Our end-to-end speech-to-text (ST) system integrates the WavLM speech encoder, a modality adapter, and the Llama2-7B-Base model as the decoder. We employ a two-stage training approach: initially, we align the representations of speech and text, followed by full fine-tuning. Both stages are trained on MuST-c v2 data with cross-entropy loss. We adapt our offline ST model for SST using a simple fixed hold-n policy. Experiments show that our model obtains an offline BLEU score of 31.1 and a BLEU score of 29.5 under 2 seconds latency on the MuST-C-v2 tst-COMMON.},
entrysubtype = {workshop},
eprint = {https://aclanthology.org/2024.iwslt-1.20},
}
@InProceedings{yan2024cmus,
author = {Yan, Brian and Fernandes, Patrick and Tian, Jinchuan and Ouyang, Siqi and Chen, William and Livescu, Karen and Li, Lei and Neubig, Graham and Watanabe, Shinji},
booktitle = {Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)},
title = {{CMU}{'}s {IWSLT} 2024 Offline Speech Translation System: A Cascaded Approach For Long-Form Robustness},
year = {2024},
month = aug,
pages = {164--169},
publisher = {Association for Computational Linguistics},
abstract = {This work describes CMU{'}s submission to the IWSLT 2024 Offline Speech Translation (ST) Shared Task for translating English speech to German, Chinese, and Japanese text. We are the first participants to employ a long-form strategy which directly processes unsegmented recordings without the need for a separate voice-activity detection stage (VAD). We show that the Whisper automatic speech recognition (ASR) model has a hallucination problem when applied out-of-the-box to recordings containing non-speech noises, but a simple noisy fine-tuning approach can greatly enhance Whisper{'}s long-form robustness across multiple domains. Then, we feed English ASR outputs into fine-tuned NLLB machine translation (MT) models which are decoded using COMET-based Minimum Bayes Risk. Our VAD-free ASR+MT cascade is tested on TED talks, TV series, and workout videos and shown to outperform prior winning IWSLT submissions and large open-source models.},
entrysubtype = {workshop},
eprint = {https://aclanthology.org/2024.iwslt-1.22},
}
@InProceedings{wang2024global,
author = {Danqing Wang and Antonis Antoniades and Kha-Dinh Luong and Edwin Zhang and Mert Kosan and Jiachen Li and Ambuj Singh and William Yang Wang and Lei Li},
booktitle = {the 30th SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)},
title = {Global Human-guided Counterfactual Explanations for Molecular Properties via Reinforcement Learning},
year = {2024},
month = aug,
abstract = {Counterfactual explanations of Graph Neural Networks (GNNs) offer a powerful way to understand data that can naturally be represented by a graph structure. Furthermore, in many domains, it is highly desirable to derive data-driven global explanations or rules that can better explain the high-level properties of the models and data in question. However, evaluating global counterfactual explanations is hard in real-world datasets due to a lack of human-annotated ground truth, which limits their use in areas like molecular sciences. Additionally, the increasing scale of these datasets provides a challenge for random search-based methods. In this paper, we develop a novel global explanation model RLHEX for molecular property prediction. It aligns the counterfactual explanations with human-defined principles, making the explanations more interpretable and easy for experts to evaluate. RLHEX includes a VAE-based graph generator to generate global explanations and an adapter to adjust the latent representation space to human-defined principles. Optimized by Proximal Policy Optimization (PPO), the global explanations produced by RLHEX cover 4.12% more input graphs and reduce the distance between the counterfactual explanation set and the input set by 0.47% on average across three molecular datasets. RLHEX provides a flexible framework to incorporate different human-designed principles into the counterfactual explanation generation process, aligning these explanations with domain expertise.},
code = {https://github.com/dqwang122/RLHEX},
eprint = {https://arxiv.org/abs/2406.13869},
}
@InProceedings{yuan2024how,
author = {Fei Yuan and Shuai Yuan and Zhiyong Wu and Lei Li},
booktitle = {the 62nd Annual Meeting of the Association for Computational Linguistics - Findings (ACL-Findings)},
title = {How Vocabulary Sharing Facilitates Multilingualism in LLaMA?},
year = {2024},
month = aug,
abstract = {Large Language Models (LLMs), often show strong performance on English tasks, while exhibiting limitations on other languages. What is an LLM’s multilingual capability when it is trained only on certain languages? The underlying mechanism remains unclear. This study endeavors to examine the multilingual capability of LLMs from the vocabulary sharing perspective by conducting an exhaustive analysis across 101 languages. Through the investigation of the performance gap before and after embedding fine-tuning, we discovered four distinct quadrants. By delving into each quadrant we provide actionable and efficient guidelines for tuning these languages. Extensive experiments reveal that existing LLMs possess multilingual capabilities that surpass our expectations, and we can significantly improve the multilingual performance of LLMs based on these attributes of each quadrant},
eprint = {https://arxiv.org/abs/2311.09071},
}
@InProceedings{zhang2024hire,
author = {Kexun Zhang and Yee Man Choi and Zhenqiao Song and Taiqi He and William Yang Wang and Lei Li},
booktitle = {the 62nd Annual Meeting of the Association for Computational Linguistics - Findings (ACL-Findings)},
title = {Hire a Linguist!: Learning Endangered Languages in LLMs with In-Context Linguistic Descriptions},
year = {2024},
month = aug,
abstract = {How can large language models (LLMs) process and translate endangered languages? Many languages lack a large corpus to train a decent LLM; therefore existing LLMs rarely perform well in unseen, endangered languages. On the contrary, we observe that 2000 endangered languages, though without a large corpus, have a grammar book or a dictionary. We propose LINGOLLM, a training-free approach to enable an LLM to process unseen languages that hardly occur in its pre-training. Our key insight is to demonstrate linguistic knowledge of an unseen language in an LLM’s prompt, including a dictionary, a grammar book, and morphologically analyzed input text. We implement LINGOLLM on top of two models, GPT-4 and Mixtral, and evaluate their performance on 5 tasks across 8 endangered or low-resource languages. Our results show that LINGOLLM elevates translation capability from GPT-4’s 0 to 10.5 BLEU for 10 language directions. Our findings demonstrate the tremendous value of linguistic knowledge in the age of LLMs for endangered languages. Our data, code, and model generations can be found at https://github.com/LeiLiLab/LingoLLM.},
code = {https://github.com/LeiLiLab/LingoLLM},
eprint = {https://arxiv.org/abs/2402.18025},
}
@InProceedings{xu2024pride,
author = {Wenda Xu and Guanglei Zhu and Xuandong Zhao and Liangming Pan and Lei Li and William Yang Wang},
booktitle = {the 62nd Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Pride and Prejudice: LLM Amplifies Self-Bias in Self-Refinement},
year = {2024},
month = aug,
note = {2024},
abstract = {Recent studies show that large language models (LLMs) improve their performance through self-feedback on certain tasks while degrade on others. We discovered that such a contrary is due to LLM’s bias in evaluating their own output. In this paper, we formally define LLM’s self-bias -- the tendency to favor its own generation -- using two statistics. We analyze six LLMs (GPT-4, GPT-3.5, Gemini, LLaMA2, Mixtral and DeepSeek) on translation, constrained text generation, and mathematical reasoning tasks. We find that self-bias is prevalent in all examined LLMs across multiple languages and tasks. Our analysis reveals that while the self-refine pipeline improves the fluency and understandability of model outputs, it further amplifies self-bias. To mitigate such biases, we discover that larger model size and external feedback with accurate assessment can significantly reduce bias in the self-refine pipeline, leading to actual performance improvement in downstream tasks. The code and data are released at https://github. com/xu1998hz/llm_self_bias.},
code = {https://github. com/xu1998hz/llm_self_bias},
eprint = {https://arxiv.org/abs/2402.11436},
}
@InProceedings{duarte2024de,
author = {André V. Duarte and Xuandong Zhao and Arlindo L. Oliveira and Lei Li},
booktitle = {Proceedings of the 41st International Conference on Machine Learning (ICML)},
title = {DE-COP: Detecting Copyrighted Content in Language Models Training Data},
year = {2024},
month = jul,
abstract = {How can we detect if copyrighted content was used in the training process of a language model, considering that the training data is typically undisclosed? We are motivated by the premise that a language model is likely to identify verbatim excerpts from its training text. We propose DE-COP, a method to determine whether a piece of copyrighted content was included in training. DE-COP’s core approach is to probe an LLM with multiple-choice questions, whose options include both verbatim text and their paraphrases. We construct BookTection, a benchmark with excerpts from 165 books published prior and subsequent to a model’s training cutoff, along with their paraphrases. Our experiments show that DE-COP surpasses the prior best method by 9.6% in detection performance (AUC) on models with logits available. Moreover, DE-COP also achieves an average accuracy of 72% for detecting suspect books on fully black-box models where prior methods give approximately 4% accuracy. The code and datasets are available at https: //github.com/LeiLiLab/DE-COP.},
code = {https: //github.com/LeiLiLab/DE-COP},
eprint = {https://arxiv.org/abs/2402.09910},
}
@InProceedings{song2024generative,
author = {Zhenqiao Song and Yunlong Zhao and Wenxian Shi and Wengong Jin and Yang Yang and Lei Li},
booktitle = {Proceedings of the 41st International Conference on Machine Learning (ICML)},
title = {Generative Enzyme Design Guided by Functionally Important Sites and Small-Molecule Substrates},
year = {2024},
month = jul,
abstract = {Enzymes are genetically encoded biocatalysts capable of accelerating chemical reactions. How can we automatically design functional enzymes? In this paper, we propose EnzyGen, an approach to learn a unified model to design enzymes across all functional families. Our key idea is to generate an enzyme’s amino acid sequence and their three-dimensional (3D) coordinates based on functionally important sites and substrates corresponding to a desired catalytic function. These sites are automatically mined from enzyme databases. EnzyGen consists of a novel interleaving network of attention and neighborhood equivariant layers, which captures both long-range correlation in an entire protein sequence and local influence from nearest amino acids in 3D space. To learn the generative model, we devise a joint training objective, including a sequence generation loss, a position prediction loss and an enzyme- substrate interaction loss. We further construct EnzyBench, a dataset with 3157 enzyme families, covering all available enzymes within the protein data bank (PDB). Experimental results show that our EnzyGen consistently achieves the best performance across all 323 testing families, surpassing the best baseline by 10.79% in terms of substrate binding affinity. These findings demonstrate EnzyGen’s superior capability in designing well-folded and effective enzymes binding to specific substrates with high affinities. The code, model and dataset are released at https: //github.com/LeiLiLab/EnzyGen.},
code = {https: //github.com/LeiLiLab/EnzyGen},
eprint = {https://arxiv.org/abs/2405.08205},
}
@InProceedings{song2024surfpro,
author = {Zhenqiao Song and Tinglin Huang and Lei Li and Wengong Jin},
booktitle = {Proceedings of the 41st International Conference on Machine Learning (ICML)},
title = {SurfPro: Functional Protein Design Based on Continuous Surface},
year = {2024},
month = jul,
abstract = {How can we design proteins with desired functions? We are motivated by a chemical intuition that both geometric structure and biochemical properties are critical to a protein’s function. In this paper, we propose SurfPro, a new method to generate functional proteins given a desired surface and its associated biochemical properties. SurfPro comprises a hierarchical encoder that progressively models the geometric shape and biochemical features of a protein surface, and an autoregressive decoder to produce an amino acid sequence. We evaluate SurfPro on a standard inverse folding benchmark CATH 4.2 and two functional protein design tasks: protein binder design and enzyme design. Our SurfPro consistently surpasses previous state-of-the-art inverse folding methods, achieving a recovery rate of 57.78% on CATH 4.2 and higher success rates in terms of protein-protein binding and enzyme-substrate interaction scores.},
eprint = {https://arxiv.org/abs/2405.06693},
}
@InProceedings{xu2024llmrefine,
author = {Wenda Xu and Daniel Deutsch and Mara Finkelstein and Juraj Juraska and Biao Zhang and Zhongtao Liu and William Yang Wang and Lei Li and Markus Freitag},
booktitle = {Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics (NAACL) - Findings},
title = {LLMRefine: Pinpointing and Refining Large Language Models via Fine-Grained Actionable Feedback},
year = {2024},
month = jun,
abstract = {Recent large language models (LLM) are leveraging human feedback to improve their generation quality. However, human feedback is costly to obtain, especially during inference. In this work, we propose LLMRefine, an inference time optimization method to refine LLM's output. The core idea is to use a learned fine-grained feedback model to pinpoint defects and guide LLM to refine them iteratively. Using original LLM as a proposal of edits, LLMRefine searches for defect-less text via simulated annealing, trading off the exploration and exploitation. We conduct experiments on three text generation tasks, including machine translation, long-form question answering (QA), and topical summarization. LLMRefine consistently outperforms all baseline approaches, achieving improvements up to 1.7 MetricX points on translation tasks, 8.1 ROUGE-L on ASQA, 2.2 ROUGE-L on topical summarization.},
eprint = {https://arxiv.org/abs/2311.09336},
}
@InProceedings{zhu2024multilingual,
author = {Wenhao Zhu and Hongyi Liu and Qingxiu Dong and Jingjing Xu and Lingpeng Kong and Jiajun Chen and Lei Li and Shujian Huang},
booktitle = {Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics (NAACL) - Findings},
title = {Multilingual Machine Translation with Large Language Models: Empirical Results and Analysis},
year = {2024},
month = jun,
abstract = {Large language models (LLMs) have demonstrated remarkable potential in handling multilingual machine translation (MMT). In this paper, we systematically investigate the advantages and challenges of LLMs for MMT by answering two questions: 1) How well do LLMs perform in translating a massive number of languages? 2) Which factors affect LLMs' performance in translation? We evaluate popular LLMs, including XGLM, OPT, BLOOMZ, and ChatGPT, on 102 languages. Our empirical results show that even the best model ChatGPT still lags behind the supervised baseline NLLB in 83.33% of translation directions. Through further analysis, we discover that LLMs exhibit new working patterns when used for MMT. First, prompt semantics can surprisingly be ignored when given in-context exemplars, where LLMs still show strong performance even with unreasonable prompts. Second, cross-lingual exemplars can provide better task instruction for low-resource translation than exemplars in the same language pairs. Third, we observe the overestimated performance of BLOOMZ on dataset Flores-101, indicating the potential risk when using public datasets for evaluation.},
eprint = {https://arxiv.org/abs/2304.04675},
}
@InProceedings{zhao2024provable,
author = {Xuandong Zhao and Prabhanjan Vijendra Ananth and Lei Li and Yu-Xiang Wang},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Provable Robust Watermarking for AI-Generated Text},
year = {2024},
month = may,
abstract = {We study the problem of watermarking large language models (LLMs) generated text -- one of the most promising approaches for addressing the safety challenges of LLM usage. In this paper, we propose a rigorous theoretical framework to quantify the effectiveness and robustness of LLM watermarks. We propose a robust and high-quality watermark method, Unigram-Watermark, by extending an existing approach with a simplified fixed grouping strategy. We prove that our watermark method enjoys guaranteed generation quality, correctness in watermark detection, and is robust against text editing and paraphrasing. Experiments on three varying LLMs and two datasets verify that our Unigram-Watermark achieves superior detection accuracy and comparable generation quality in perplexity, thus promoting the responsible use of LLMs.},
code = {https://github.com/XuandongZhao/Unigram-Watermark},
eprint = {https://arxiv.org/abs/2306.17439},
owner = {lilei.02},
}
@Patent{li2024method,
nationality = {US},
number = {11,954,455 B2},
year = {2024},
yearfiled = {2021},
assignee = {Beijing Bytedance Network Technology Co.},
author = {Lei Li and Jun Cao and Minguxan Wang and Zhou Qian},
day = {9},
dayfiled = {26},
month = apr,
monthfiled = {#feb#},
title = {Method for translating words in a picture, electronic device, and storage medium},
type = {patentus},
}
@InProceedings{jain2024where,
author = {Sameer Jain and Sedrick Scott Keh and Shova Chhetri and Karun Dewan and Pablo Izquierdo and Johanna Prussmann and Pooja Shrestha and César Suárez and Zheyuan Ryan Shi and Lei Li and Fei Fang},
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
title = {Where It Really Matters: Few-Shot Environmental Conservation Media Monitoring for Low-Resource Languages},
year = {2024},
month = feb,
}
@Patent{li2024media,
nationality = {US},
number = {11874869B2},
year = {2024},
yearfiled = {2018},
assignee = {Beijing Bytedance Network Tech Co.},
author = {Gen Li and Yi He and Lei Li and Yitan Li},
day = {16},
dayfiled = {29},
month = jan,
monthfiled = {#dec#},
title = {Media retrieval method and apparatus},
owner = {lilei.02},
}
@InProceedings{dong2023statistical,
author = {Qingxiu Dong and Jingjing Xu and Lingpeng Kong and Zhifang Sui and Lei Li},
booktitle = {the 37th Conference on Neural Information Processing Systems (NeurIPS)},
title = {Statistical Knowledge Assessment for Large Language Models},
year = {2023},
month = dec,
abstract = {Given varying prompts regarding a factoid question, can a large language model (LLM) reliably generate factually correct answers? Existing LLMs may generate distinct responses for different prompts. In this paper, we study the problem of quantifying knowledge contained in an LLM regarding a given set of facts. We propose KaRR, a statistical approach to assess factual knowledge for LLMs. The main idea is to estimate the ratio of LLM generating text corresponding to the answer entity given diverse prompts of the subject and the querying relation, versus it generating by random chances. Our assessment suite contains a comprehensive set of 994,123 entities and 600 relations, with 1,395,905 text aliases. We use our method to evaluate 20 LLMs of various sizes, including LLaMA, Alpaca, OPT, etc. Experiments show that our results have a strong correlation (0.43 Kendall's τ) with the results of human assessment on LLMs. Our results reveal that the knowledge in LLMs with the same backbone architecture adheres to the scaling law, while tuning on instruction-following data sometimes compromises the model's capability to generate factually correct text reliably.},
code = {https://github.com/dqxiu/KAssess},
eprint = {https://arxiv.org/abs/2305.10519},
owner = {lilei.02},
}
@InProceedings{zhang2023algo,
author = {Kexun Zhang and Danqing Wang and Jingtao Xia and William Yang Wang and Lei Li},
booktitle = {the 37th Conference on Neural Information Processing Systems (NeurIPS)},
title = {ALGO: Synthesizing Algorithmic Programs with Generated Oracle Verifiers},
year = {2023},
month = dec,
abstract = {Large language models (LLMs) excel at implementing code from functionality descriptions but struggle with algorithmic problems that require not only implementation but also identification of the suitable algorithm. Moreover, LLM-generated programs lack guaranteed correctness and require human verification. To address these challenges, we propose ALGO, a framework that synthesizes Algorithmic programs with LLM-Generated Oracles to guide the generation and verify their correctness. ALGO first generates a reference oracle by prompting an LLM to exhaustively enumerate all the combinations of relevant variables. This oracle is then utilized to guide an arbitrary search strategy in exploring the algorithm space and to verify the synthesized algorithms. Our study shows that the LLM-generated oracles are correct for 88% of the cases. With the oracles as verifiers, ALGO can be integrated with any existing code generation model in a model-agnostic manner to enhance its performance. Experiments show that when equipped with ALGO, we achieve an 8x better one-submission pass rate over the Codex model and a 2.6x better one-submission pass rate over CodeT, the current state-of-the-art model on CodeContests. We can also get 1.3x better pass rate over the ChatGPT Code Interpreter on unseen problems.},
code = {https://github.com/zkx06111/ALGO},
eprint = {https://arxiv.org/abs/2305.14591},
owner = {lilei.02},
}
@InProceedings{wang2023learning,
author = {Danqing Wang and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
title = {Learning from Mistakes via Cooperative Study Assistant for Large Language Models},
year = {2023},
month = dec,
abstract = {Large language models (LLMs) have demonstrated their potential to refine their generation based on their own feedback. However, the feedback from LLM itself is often inaccurate, thereby limiting its benefits. In this paper, we propose Study Assistant for Large LAnguage Model (SALAM), a novel framework with an auxiliary agent to assist the main LLM in learning from mistakes through interactive cooperation. In the gathering phase, the student assistant agent probes the main LLM, analyzes its errors, and collects the interaction in a mistake memory. During the examination phase, the study assistant provides guidelines by retrieving relevant cases to help the main LLM anticipate and avoid similar errors. We first investigate the effectiveness of a general study assistant and then customize it to provide LLM-specific guidance through imitation learning from successful guidance experiences. Our experiments on three LLMs using two challenging frameworks demonstrate that SALAM can significantly boost LLMs by an accuracy margin of up to 6.6 on BBH and 12.6 on BBQ.},
code = {https://dqwang122.github.io/projects/SALAM},
eprint = {https://arxiv.org/abs/2305.13829},
}
@InProceedings{xu2023instructscore,
author = {Wenda Xu and Danqing Wang and Liangming Pan and Zhenqiao Song and Markus Freitag and William Yang Wang and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
title = {INSTRUCTSCORE: Explainable Text Generation Evaluation with Finegrained Feedback},
year = {2023},
month = dec,
abstract = {Automatically evaluating the quality of language generation is critical. Although recent learned metrics show high correlation with human judgement, these metrics can not explain their verdict or associate the scores with defects in generated text. To address this limitation, we present InstructScore, an explainable evaluation metric for text generation. By harnessing both explicit human instruction and the implicit knowledge of GPT-4, we fine-tune a text evaluation metric based on LLaMA, producing both a score for generated text and a human readable diagnostic report. We evaluate InstructScore on a variety of generation tasks, including translation, captioning, data-to-text and commonsense generation. Experiments show that our 7B model surpasses all other unsupervised metrics, including those based on 175B GPT-3 and GPT-4. Surprisingly, our InstructScore, even without direct supervision from human-rated data, achieves performance levels on par with state-of-the-art metrics like COMET22, which were fine-tuned on human ratings.},
code = {https://github.com/xu1998hz/InstructScore_SEScore3},
eprint = {https://arxiv.org/abs/2305.14282},
}
@InProceedings{ouyang2023autoplan,
author = {Siqi Ouyang and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {AutoPlan: Automatic Planning of Interactive Decision-Making Tasks With Large Language Models},
year = {2023},
month = dec,
abstract = {Recent large language models (LLMs) are promising for making decisions in grounded environments. However, LLMs frequently fail in complex decision-making tasks due to the misalignment between the pre-trained knowledge in LLMs and the actual rules in the environment. Existing methods require either costly gradient computation or lengthy in-context demonstrations. In this paper, we propose AutoPlan, an approach to guide LLM-based agents to accomplish interactive decision-making tasks. AutoPlan augments the LLM prompt with a task-solving plan and optimizes it through iterative experience collection and reflection. Our experiments show that AutoPlan, though using no in-context demonstrations, achieves success rates on par with the baselines using human-written demonstrations on ALFWorld and even outperforms them by 8% on HotpotQA.},
code = {https://github.com/owaski/AutoPlan},
eprint = {https://arxiv.org/abs/2305.15064},
}
@InProceedings{wu2023extrapolating,
author = {Bohong Wu and Fei Yuan and Hai Zhao and Lei Li and Jingjing Xu},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {Extrapolating Multilingual Understanding Models as Multilingual Generators},
year = {2023},
month = dec,
abstract = {Multilingual understanding models (or encoder-based), pre-trained via masked language modeling, have achieved promising results on many language understanding tasks (e.g., mBERT). However, these non-autoregressive (NAR) models still struggle to generate high-quality texts compared with autoregressive (AR) models. Considering that encoder-based models have the advantage of efficient generation and self-correction abilities, this paper explores methods to empower multilingual understanding models the generation abilities to get a unified model. Specifically, we start from a multilingual encoder (XLM-R) and propose a Semantic-Guided Alignment-then-Denoising (SGA) approach to adapt an encoder to a multilingual generator with a small number of new parameters. Experiments show that the proposed approach is an effective adaption method, outperforming widely-used initialization-based methods with gains of 9.4 BLEU on machine translation, 8.1 Rouge-L on question generation, and 5.5 METEOR on story generation on XLM-Rlarge. On the other hand, we observe that XLM-R is still inferior to mBART in supervised settings despite better results on zero-shot settings, indicating that more exploration is required to make understanding models strong generators.},
eprint = {https://arxiv.org/abs/2305.13140},
}
@Patent{du2023video,
nationality = {US},
number = {11580314B2},
year = {2023},
yearfiled = {2022},
assignee = {Beijing Bytedance Network Tech Co.},
author = {Yuzhang Du and Peihao Zhu and Yiming Chen and Chongxing Zhou and Mingxuan Wang and Lei Li},
day = {19},
dayfiled = {10},
month = sep,
monthfiled = {#aug#},
title = {Video translation method and apparatus, storage medium, and electronic device},
comment = {视频翻译方法和装置、存储介质和电子设备},
owner = {lilei.02},
}
@InProceedings{wang2023accelerating,
author = {Danqing Wang and Zeyu Wen and Fei Ye and Lei Li and Hao Zhou},
booktitle = {the 29th SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)},
title = {Accelerating Antimicrobial Peptide Discovery with Latent Structure},
year = {2023},
month = aug,
abstract = {Antimicrobial peptides (AMPs) are promising therapeutic approaches against drug-resistant pathogens. Recently, deep generative models are used to discover new AMPs. However, previous studies mainly
focus on peptide sequence attributes and do not consider crucial structure information. In this paper, we propose a latent sequence structure model for designing AMPs (LSSAMP). LSSAMP exploits multi-scale vector quantization in the latent space to represent secondary structures (e.g. alpha helix and beta sheet). By sampling in the latent space, LSSAMP can simultaneously generate peptides with ideal sequence attributes and secondary structures. Experimental results show that the peptides generated by LSSAMP have a high probability of antimicrobial activity. Our wet laboratory experiments verified that two of the 21 candidates exhibit strong antimicrobial activity. The code is released at https://github.com/dqwang122/LSSAMP.},
code = {https://github.com/dqwang122/LSSAMP},
eprint = {https://arxiv.org/abs/2212.09450},
}
@Patent{li2023interactive,
nationality = {US},
number = {11704504B2},
year = {2023},
yearfiled = {2021},
assignee = {Beijing Bytedance Network Tech Co.},
author = {Lei Li and Mingxuan Wang and Hao Zhou and Zewei Sun},
day = {18},
dayfiled = {16},
month = jul,
monthfiled = {#feb#},
title = {Interactive machine translation method, electronic device, and computer-readable storage medium},
owner = {lilei.02},
}
@InProceedings{song2023importance,
author = {Zhenqiao Song and Lei Li},
booktitle = {Proceedings of the 40th International Conference on Machine Learning (ICML)},
title = {Importance Weighted Expectation-Maximization for Protein Sequence Design},
year = {2023},
month = jul,
abstract = {Designing protein sequences with desired biological function is crucial in biology and chemistry. Recent machine learning methods use a surrogate sequence-function model to replace the expensive wet-lab validation. How can we efficiently generate diverse and novel protein sequences with high fitness? In this paper, we propose IsEM-Pro, an approach to generate protein sequences towards a given fitness criterion. At its core, IsEM-Pro is a latent generative model, augmented by combinatorial structure features from a separately learned Markov random fields (MRFs). We develop an Monte Carlo Expectation-Maximization method (MCEM) to learn the model. During inference, sampling from its latent space enhances diversity while its MRFs features guide the exploration in high fitness regions. Experiments on eight protein sequence design tasks show that our IsEM-Pro outperforms the previous best methods by at least 55% on average fitness score and generates more diverse and novel protein sequences.},
code = {https://github.com/JocelynSong/IsEM-Pro},
eprint = {https://arxiv.org/abs/2305.00386},
}
@InProceedings{zhang2023redi,
author = {Kexun Zhang and Xianjun Yang and William Yang Wang and Lei Li},
booktitle = {Proceedings of the 40th International Conference on Machine Learning (ICML)},
title = {{ReDi}: Efficient Learning-Free Diffusion Inference via Trajectory Retrieval},
year = {2023},
month = jul,
abstract = {Diffusion models show promising generation capability for a variety of data. Despite their high generation quality, the inference for diffusion models is still time-consuming due to the numerous sampling iterations required. To accelerate the inference, we propose ReDi, a simple yet learning-free Retrieval-based Diffusion sampling framework. From a precomputed knowledge base, ReDi retrieves a trajectory similar to the partially generated trajectory at an early stage of generation, skips a large portion of intermediate steps, and continues sampling from a later step in the retrieved trajectory. We theoretically prove that the generation performance of ReDi is guaranteed. Our experiments demonstrate that ReDi improves the model inference efficiency by 2x speedup. Furthermore, ReDi is able to generalize well in zero-shot cross-domain image generation such as image stylization.},
code = {https://github.com/zkx06111/ReDiffusion},
eprint = {https://arxiv.org/abs/2302.02285},
}
@InProceedings{zhao2023protecting,
author = {Xuandong Zhao and Yu-Xiang Wang and Lei Li},
booktitle = {Proceedings of the 40th International Conference on Machine Learning (ICML)},
title = {Protecting Language Generation Models via Invisible Watermarking},
year = {2023},
month = jul,
abstract = {Language generation models have been an increasingly powerful enabler for many applications. Many such models offer free or affordable API access, which makes them potentially vulnerable to model extraction attacks through distillation. To protect intellectual property (IP) and ensure fair use of these models, various techniques such as lexical watermarking and synonym replacement have been proposed. However, these methods can be nullified by obvious countermeasures such as "synonym randomization". To address this issue, we propose {GINSEW}, a novel method to protect text generation models from being stolen through distillation. The key idea of our method is to inject secret signals into the probability vector of the decoding steps for each target token. We can then detect the secret message by probing a suspect model to tell if it is distilled from the protected one. Experimental results show that GINSEW can effectively identify instances of IP infringement with minimal impact on the generation quality of protected APIs. Our method demonstrates an absolute improvement of 19 to 29 points on mean average precision (mAP) in detecting suspects compared to previous methods against watermark removal attacks.},
eprint = {https://arxiv.org/abs/2302.03162},
}
@InProceedings{gu2023playground,
author = {Gu, Tianrui and Chen, Kaie and Ouyang, Siqi and Li, Lei},
booktitle = {Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)},
title = {{P}lay{G}round Low Resource Machine Translation System for the 2023 {A}mericas{NLP} Shared Task},
year = {2023},
address = {Toronto, Canada},
editor = {Mager, Manuel and Ebrahimi, Abteen and Oncevay, Arturo and Rice, Enora and Rijhwani, Shruti and Palmer, Alexis and Kann, Katharina},
month = jul,
pages = {173--176},
publisher = {Association for Computational Linguistics},
abstract = {This paper presents PlayGround{'}s submission to the AmericasNLP 2023 shared task on machine translation (MT) into indigenous languages. We finetuned NLLB-600M, a multilingual MT model pre-trained on Flores-200, on 10 low-resource language directions and examined the effectiveness of weight averaging and back translation. Our experiments showed that weight averaging, on average, led to a 0.0169 improvement in the ChrF++ score. Additionally, we found that back translation resulted in a 0.008 improvement in the ChrF++ score.},
doi = {10.18653/v1/2023.americasnlp-1.19},
entrysubtype = {workshop},
url = {https://aclanthology.org/2023.americasnlp-1.19},
}
@InProceedings{yuan2023lego,
author = {Fei Yuan and Yinquan Lu and Wenhao Zhu and Lingpeng Kong and Lei Li and Yu Qiao and Jingjing Xu},
booktitle = {the 61st Annual Meeting of the Association for Computational Linguistics - Findings (ACL-Findings)},
title = {{Lego-MT}: Learning Detachable Models for Massively Multilingual Machine Translation},
year = {2023},
month = jul,
abstract = {Traditional multilingual neural machine translation (MNMT) uses a single model to translate all directions. However, with the increasing scale of language pairs, simply using a single model for massive MNMT brings new challenges: parameter tension and large computations. In this paper, we revisit multi-way structures by assigning an individual branch for each language (group). Despite being a simple architecture, it is challenging to train de-centralized models due to the lack of constraints to align representations from all languages. We propose a localized training recipe to map different branches into a unified space, resulting in an efficient detachable model, Lego-MT. For a fair comparison, we collect data from OPUS and build the first large-scale open-source translation benchmark covering 7 language-centric data, each containing 445 language pairs. Experiments show that Lego-MT (1.2B) brings gains of more than 4 BLEU while outperforming M2M-100 (12B).},
code = {https://github.com/CONE-MT/Lego-MT},
eprint = {https://arxiv.org/abs/2212.10551},
}
@InProceedings{chen2023say,
author = {Jiangjie Chen and Wei Shi and Ziquan Fu and Sijie Cheng and Lei Li and Yanghua Xiao},
booktitle = {the 61st Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Say What You Mean! Large Language Models Speak Too Positively about Negative Commonsense Knowledge},
year = {2023},
month = jul,
abstract = {Large language models (LLMs) have been widely studied for their ability to store and utilize positive knowledge. However, negative knowledge, such as “lions don’t live in the ocean”, is also ubiquitous in the world but rarely mentioned explicitly in the text. What do LLMs know about negative knowledge? This work examines the ability of LLMs to negative commonsense knowledge. We design a constrained keywords-to-sentence generation task (CG) and a Boolean question-answering task (QA) to probe LLMs. Our experiments reveal that LLMs frequently fail to generate valid sentences grounded in negative commonsense knowledge, yet they can correctly answer polar yes-or-no questions. We term this phenomenon the belief conflict of LLMs. Our further analysis shows that statistical shortcuts and negation reporting bias from language modeling pre-training cause this conflict.},
code = {https://github.com/jiangjiechen/uncommongen},
eprint = {https://arxiv.org/abs/2305.05976},
}
@InProceedings{ouyang2023waco,
author = {Siqi Ouyang and Rong Ye and Lei Li},
booktitle = {the 61st Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {{WACO}: Word-Aligned Contrastive Learning for Speech Translation},
year = {2023},
month = jul,
abstract = {End-to-end Speech Translation (E2E ST) aims to translate source speech into target translation without generating the intermediate transcript. However, existing approaches for E2E ST degrade considerably when only limited ST data are available. We observe that an ST model's performance strongly correlates with its embedding similarity from speech and transcript. In this paper, we propose Word-Aligned COntrastive learning (WACO), a novel method for few-shot speech-to-text translation. Our key idea is bridging word-level representations for both modalities via contrastive learning. We evaluate WACO and other methods on the MuST-C dataset, a widely used ST benchmark. Our experiments demonstrate that WACO outperforms the best baseline methods by 0.7-8.5 BLEU points with only 1-hour parallel data.},
code = {https://github.com/owaski/WACO},
eprint = {https://arxiv.org/abs/2212.09359},
}
@InProceedings{xu2023sescore2,
author = {Wenda Xu and Xian Qian and Mingxuan Wang and Lei Li and William Yang Wang},
booktitle = {the 61st Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {{SESCORE2}: Learning Text Generation Evaluation via Synthesizing Realistic Mistakes},
year = {2023},
month = jul,
abstract = {Is it possible to leverage large scale raw and raw parallel corpora to build a general learned metric? Existing learned metrics have gaps to human judgements, are model-dependent or are limited to the domains or tasks where human ratings are available. In this paper, we propose SEScore2, a model-based metric pretrained over million-scale synthetic dataset constructed by our novel retrieval augmented data synthesis pipeline. SEScore2 achieves high correlation to human judgements without any human rating supervisions. Importantly, our unsupervised SEScore2 can outperform supervised metrics, which are trained on the News human ratings, at the TED domain. We evaluate SEScore2 over four text generation tasks across three languages. SEScore2 outperforms all prior unsupervised evaluation metrics in machine translation, speech translation, data-to-text and dialogue generation, with average Kendall improvements 0.158. SEScore2 even outperforms SOTA supervised BLEURT at data-to-text, dialogue generation and overall correlation.data.},
code = {https://github.com/xu1998hz/SEScore2},
eprint = {https://arxiv.org/abs/2212.09305},
}
@InProceedings{zhao2023pre,
author = {Xuandong Zhao and Siqi Ouyang and Zhiguo Yu and Ming Wu and Lei Li},
booktitle = {the 61st Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Pre-trained Language Models can be Fully Zero-Shot Learners},
year = {2023},
month = jul,
abstract = {How can we extend a pre-trained model to many language understanding tasks, without labeled or additional unlabeled data? Pre-trained language models (PLMs) have been effective for a wide range of NLP tasks. However, existing approaches either require fine-tuning on downstream labeled datasets or manually constructing proper prompts. In this paper, we propose nonparametric prompting PLM (NPPrompt) for fully zero-shot language understanding. Unlike previous methods, NPPrompt uses only pre-trained language models and does not require any labeled data or additional raw corpus for further fine-tuning, nor does it rely on humans to construct a comprehensive set of prompt label words. We evaluate NPPrompt against previous major few-shot and zero-shot learning methods on diverse NLP tasks: including text classification, text entailment, similar text retrieval, and paraphrasing. Experimental results demonstrate that our NPPrompt outperforms the previous best fully zero-shot method by big margins, with absolute gains of 12.8% in accuracy on text classification and 18.9% on the GLUE benchmark.},
code = {https://github.com/XuandongZhao/NPPrompt},
eprint = {https://arxiv.org/abs/2212.06950},
}
@Patent{du2023document,
nationality = {US},
number = {11580314B2},
year = {2023},
yearfiled = {2022},
assignee = {Beijing Bytedance Network Tech Co.},
author = {Yuzhang Du and Peihao Zhu and Chongxing Zhou and Yiming Chen and Mingxuan Wang and Lei Li},
day = {14},
dayfiled = {25},
month = feb,
monthfiled = {#jul#},
title = {Document translation method and apparatus, storage medium, and electronic device},
owner = {lilei.02},
}
@Patent{he2023method,
nationality = {US},
number = {11593582B2},
year = {2023},
yearfiled = {2018},
assignee = {Beijing Bytedance Network Tech Co.},
author = {He, Yi and Li, Lei and Yang, Cheng and Li, Gen and Li, Yitan},
day = {28},
dayfiled = {29},
month = feb,
monthfiled = {#dec#},
title = {Method and device for comparing media features},
owner = {lilei.02},
}
@Patent{wang2023speech,
nationality = {US},
number = {11,586,831 B2},
year = {2023},
yearfiled = {2021},
assignee = {Beijing ByteDance Network Technology Co Ltd},
author = {Mingxuan Wang and Qianqian Dong and Lei Li},
day = {21},
dayfiled = {26},
month = feb,
monthfiled = {#feb#},
title = {Speech translation method electronic device and computer-readable storage medium using SEQ2SEQ for determining alternative translated speech segments},
owner = {lilei.02},
}
@InProceedings{chen2023converge,
author = {Jiangjie Chen and Rui Xu and Wenxuan Zeng and Changzhi Sun and Lei Li and Yanghua Xiao},
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
title = {Converge to the Truth: Factual Error Correction via Iterative Constrained Editing},
year = {2023},
month = feb,
abstract = {Given a possibly false claim sentence, how can we automatically correct it with minimal editing? Existing methods either require a large number of pairs of false and corrected claims for supervised training or do not handle well errors spanning over multiple tokens within an utterance. In this paper, we propose VENCE, a novel method for factual error correction (FEC) with minimal edits. VENCE formulates the FEC problem as iterative sampling editing actions with respect to a target density function. We carefully design the target function with predicted truthfulness scores from an offline trained fact verification model. VENCE samples the most probable editing positions based on back-calculated gradients of the truthfulness score concerning input tokens and the editing actions using a distantly-supervised language model (T5). Experiments on a public dataset show that VENCE improves the well-adopted SARI metric by 5.3 (or a relative improvement of 11.8%) over the previous best distantly-supervised methods.},
code = {https://github.com/jiangjiechen/VENCE},
eprint = {https://arxiv.org/abs/2211.12130},
}
@InBook{li2023deep,
author = {Lei Li},
chapter = {3},
editor = {Honglin Li and Mingyue Zheng and Feng Zhu and Fang Bai},
pages = {78-94},
publisher = {Chemical Industry Press},
title = {Deep Generative Models},
year = {2023},
isbn = {978-7-122-42928-5},
note = {in Chinese},
booktitle = {Artificial Intelligence for Drug Discovery},
}
@InProceedings{dong2022calibrating,
author = {Qingxiu Dong and Damai Dai and Yifan Song and Jingjing Xu and Zhifang Sui and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {Calibrating Factual Knowledge in Pretrained Language Models},
year = {2022},
month = dec,
abstract = {Previous literature has proved that Pretrained Language Models (PLMs) can store factual knowledge. However, we find that facts stored in the PLMs are not always correct. It motivates us to explore a fundamental question: How do we calibrate factual knowledge in PLMs without re-training from scratch? In this work, we propose a simple and lightweight method CaliNet to achieve this goal. To be specific, we first detect whether PLMs can learn the right facts via a contrastive score between right and fake facts. If not, we then use a lightweight method to add and adapt new parameters to specific factual texts. Experiments on the knowledge probing task show the calibration effectiveness and efficiency. In addition, through closed-book question answering, we find that the calibrated PLM possesses knowledge generalization ability after fine-tuning. Beyond the calibration performance, we further investigate and visualize the knowledge calibration mechanism.},
code = {https://github.com/dqxiu/calinet},
eprint = {https://arxiv.org/abs/2210.03329},
}
@InProceedings{xu2022not,
author = {Wenda Xu and Yilin Tuan and Yujie Lu and Michael Saxon and Lei Li and William Yang Wang},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {Not All Errors are Equal: Learning Text Generation Metrics using Stratified Error Synthesis},
year = {2022},
month = dec,
abstract = {Is it possible to build a general and automatic natural language generation (NLG) evaluation metric? Existing learned metrics either perform unsatisfactorily or are restricted to tasks where large human rating data is already available. We introduce SESCORE, a model-based metric that is highly correlated with human judgements without requiring human annotation, by utilizing a novel, iterative error synthesis and severity scoring pipeline. This pipeline applies a series of plausible errors to raw text and assigns severity labels by simulating human judgements with entailment. We evaluate SESCORE against existing metrics by comparing how their scores correlate with human ratings. SESCORE outperforms all prior unsupervised metrics on multiple diverse NLG tasks including machine translation, image captioning, and WebNLG text generation. For WMT 20/21 En-De and Zh-En, SESCORE improve the average Kendall correlation with human judgement from 0.154 to 0.195. SESCORE even achieves comparable performance to the best supervised metric COMET, despite receiving no human-annotated training data.},
eprint = {https://arxiv.org/abs/2210.05035},
}
@InProceedings{zhao2022distillation,
author = {Xuandong Zhao and Lei Li and Yu-Xiang Wang},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {Distillation-Resistant Watermarking for Model Protection in NLP},
year = {2022},
month = dec,
abstract = {How can we protect the intellectual property of trained NLP models? Modern NLP models are prone to stealing by querying and distilling from their publicly exposed APIs. However, existing protection methods such as watermarking only work for images but are not applicable to text. We propose Distillation-Resistant Watermarking (DRW), a novel technique to protect NLP models from being stolen via distillation. DRW protects a model by injecting watermarks into the victim's prediction probability corresponding to a secret key and is able to detect such a key by probing a suspect model. We prove that a protected model still retains the original accuracy within a certain bound. We evaluate DRW on a diverse set of NLP tasks including text classification, part-of-speech tagging, and named entity recognition. Experiments show that DRW protects the original model and detects stealing suspects at 100% mean average precision for all four tasks while the prior method fails on two.},
code = {https://github.com/xuandongzhao/drw},
eprint = {https://arxiv.org/abs/2210.03312},
}
abstract = {Separating 3D point clouds into individual instances is an important task for 3D vision. It is challenging due to the unknown and varying number of instances in a scene. Existing deep learning based works focus on a two-step pipeline: first learn a feature embedding and then cluster the points. Such a two-step pipeline leads to disconnected intermediate objectives. In this paper, we propose an integrated reformulation of 3D instance segmentation as a per-point classification problem. We propose ICM-3D, a single-step method to segment 3D instances via instantiated categorization. The augmented category information is automatically constructed from 3D spatial positions. We conduct extensive experiments to verify the effectiveness of ICM-3D and show that it obtains inspiring performance across multiple frameworks, backbones and benchmarks.},
eprint = {https://arxiv.org/abs/2108.11771},
}
@Article{wang2022solo,
author = {Xinlong Wang and Rufeng Zhang and Chunhua Shen and Tao Kong and Lei Li},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
title = {SOLO: A Simple Framework for Instance Segmentation},
year = {2022},
month = nov,
number = {11},
pages = {8587-8601},
volume = {44},
code = {https://github.com/aim-uofa/AdelaiDet/},
eprint = {https://arxiv.org/abs/2106.15947},
}
@InProceedings{wang2022lightseq2,
author = {Xiaohui Wang and Yang Wei and Ying Xiong and Guyue Huang and Xian Qian and Yufei Ding and Mingxuan Wang and Lei Li},
booktitle = {Proceedings of The International Conference for High Performance Computing, Networking, Storage and Analysis ({SC}'22)},
title = {{LightSeq2}: Accelerated Training for Transformer-based Models on {GPUs}},
year = {2022},
month = nov,
code = {https://github.com/bytedance/lightseq},
eprint = {https://arxiv.org/abs/2110.05722},
}
@Patent{he2022video,
nationality = {US},
number = {11,455,802 B2},
year = {2022},
yearfiled = {2018},
assignee = {Beijing Bytedance Network Tech Co.},
author = {He, Yi and Li, Lei and Yang, Cheng and Li, Gen and Li, Yitan},
day = {27},
dayfiled = {29},
month = sep,
monthfiled = {#dec#},
title = {Video Feature Extraction Method and Device},
comment = {一种视频特征提取方法及装置 CN 201810271774.6 March 29, 2018},
owner = {lilei.02},
}
@Patent{yang2022method,
nationality = {US},
number = {11403835B2},
year = {2022},
yearfiled = {2018},
assignee = {Beijing Bytedance Network Tech Co.},
author = {Yang, Cheng and He, Yi and Li, Lei},
day = {2},
dayfiled = {12},
month = aug,
monthfiled = {#sep#},
title = {Method and device for processing feature point of image},
comment = {用于处理图像的特征点的方法和装置},
owner = {lilei.02},
}
@InProceedings{lu2022uncovering,
author = {Yunfei Lu and Peng Cui and Linyun Yu and Lei Li and Wenwu Zhu},
booktitle = {the 28th SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)},
title = {Uncovering the Heterogeneous Effects of Preference Diversity on User Activeness: A Dynamic Mixture Model},
year = {2022},
month = aug,
}
@Patent{cao2022method,
nationality = {US},
number = {11379664B2},
year = {2022},
yearfiled = {2020},
assignee = {Beijing Bytedance Network Tech Co.},
author = {Cao, Jun and Li, Lei and Wang, Mingxuan and Zhu, Peihao},
day = {5},
dayfiled = {28},
month = jul,
monthfiled = {#feb#},
title = {Method for acquiring a parallel corpus, electronic device, and storage medium},
comment = {平行语料获取方法、装置、电子设备、及存储
介质},
owner = {lilei.02},
}
@InProceedings{chen2022mtg,
author = {Chen, Yiran and Song, Zhenqiao and Wu, Xianze and Wang, Danqing and Xu, Jingjing and Chen, Jiaze and Zhou, Hao and Li, Lei},
booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT Findings)},
title = {{MTG}: A Benchmark Suite for Multilingual Text Generation},
year = {2022},
month = jul,
publisher = {Association for Computational Linguistics},
abstract = {We introduce MTG, a new benchmark suite for training and evaluating multilingual text generation. It is the first and largest multilingual multiway text generation benchmark with 400k human-annotated data for four generation tasks (story generation, question generation, title generation and text summarization) across five languages (English, German, French, Spanish and Chinese). Its multiway characteristic makes it possible to achieve direct cross-lingual generation between any two languages, thus facilitating knowledge transfer. Based on MTG, we set various evaluation scenarios and conduct deep analyses of several popular multilingual generation models from different aspects. Our benchmark suite can foster model performance enhancement with more human-annotated parallel data and encourage model evaluation with more diverse generation scenarios.},
eprint = {https://arxiv.org/abs/2108.07140},
owner = {lilei.02},
url = {https://mtg-benchmark.netlify.app},
}
@InProceedings{ye2022cross,
author = {Ye, Rong and Wang, Mingxuan and Li, Lei},
booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)},
title = {Cross-modal Contrastive Learning for Speech Translation},
year = {2022},
month = jul,
publisher = {Association for Computational Linguistics},
abstract = {How to learn similar representations for spoken utterances and their written text? We believe a unified and aligned representation of speech and text will lead to improvement in speech translation. To this end, we propose ConST, a cross-modal contrastive learning method for end-to-end speech-to-text translation. We evaluate ConST and a variety of previous baselines on multiple language directions (En-De/Fr/Ru) of a popular benchmark MuST-C. Experiments show that the proposed ConST consistently outperforms all previous methods, and achieves the state-of-the-art average BLEU of 28.5. The analysis further verifies that ConST indeed closes the representation gap of different modalities --- its learned representation improves the accuracy of cross-modal text retrieval from 4% to 88%.},
code = {https://github.com/ReneeYe/ConST},
eprint = {https://arxiv.org/abs/2205.02444},
owner = {lilei.02},
}
@InProceedings{zhao2022provably,
author = {Zhao, Xuandong and Li, Lei and Wang, Yu-Xiang},
booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)},
title = {Provably Confidential Language Modelling},
year = {2022},
month = jul,
publisher = {Association for Computational Linguistics},
abstract = {Large language models are shown to memorize privacy information such as social security numbers in training data. Given the sheer scale of the training corpus, it is challenging to screen and filter these privacy data, either manually or automatically. In this paper, we propose Confidentially Redacted Training (CRT), a method to train language generation models while protecting the confidential segments. We borrow ideas from differential privacy (which solves a related but distinct problem) and show that our method is able to provably prevent unintended memorization by randomizing parts of the training process. Moreover, we show that redaction with an approximately correct screening policy amplifies the confidentiality guarantee. We implement the method for both LSTM and GPT language models. Our experimental results show that the models trained by CRT obtain almost the same perplexity while preserving strong confidentiality.},
eprint = {https://arxiv.org/abs/2205.01863},
owner = {lilei.02},
}
@InProceedings{huang2022learning,
author = {Fei Huang and Tianhua Tao and Hao Zhou and Lei Li and Minlie Huang},
booktitle = {Proceedings of the 39th International Conference on Machine Learning (ICML)},
title = {On the Learning of Non-autoregressive Transformers},
year = {2022},
month = jul,
}
@InProceedings{li2022learning,
author = {Yunfei Li and Tao Kong and Lei Li and Yi Wu},
booktitle = {{IEEE} International Conference on Robotics and Automation ({ICRA})},
title = {Learning Design and Construction with Varying-Sized Materials via Prioritized Memory Resets},
year = {2022},
month = may,
abstract = {Can a robot autonomously learn to design and construct a bridge from varying-sized blocks without a blueprint? It is a challenging task with long horizon and sparse reward – the robot has to figure out physically stable design schemes and feasible actions to manipulate and transport blocks. Due to diverse block sizes, the state space and action trajectories are vast to explore. In this paper, we propose a hierarchical approach for this problem. It consists of a reinforcement-learning designer to propose high-level building instructions and a motion-planning-based action generator to manipulate blocks at the low level. For high-level learning, we develop a novel technique, prioritized memory resetting (PMR) to improve exploration. PMR adaptively resets the state to those most critical configurations from a replay buffer so that the robot can resume training on partial architectures instead of from scratch. Furthermore, we augment PMR with auxiliary training objectives and fine-tune the designer with the locomotion generator. Our experiments in simulation and on a real deployed robotic system demonstrate that it is able to effectively construct bridges with blocks of varying sizes at a high success rate. Demos can be found at https://sites.google. com/view/bridge-pmr.},
code = {https://github.com/IrisLi17/bridge_construction},
eprint = {https://arxiv.org/abs/2204.05509},
thumbnail = {li2022bridge_robot.jpg},
url = {https://sites.google.com/view/bridge-pmr},
}
@InProceedings{ouyang2022impact,
author = {Ouyang, Siqi and Ye, Rong and Li, Lei},
booktitle = {Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)},
title = {On the Impact of Noises in Crowd-Sourced Data for Speech Translation},
year = {2022},
month = may,
pages = {92--97},
publisher = {Association for Computational Linguistics},
abstract = {Training speech translation (ST) models requires large and high-quality datasets. MuST-C is one of the most widely used ST benchmark datasets. It contains around 400 hours of speech-transcript-translation data for each of the eight translation directions. This dataset passes several quality-control filters during creation. However, we find that MuST-C still suffers from three major quality issues: audiotext misalignment, inaccurate translation, and unnecessary speaker{'}s name. What are the impacts of these data quality issues for model development and evaluation? In this paper, we propose an automatic method to fix or filter the above quality issues, using English-German (En-De) translation as an example. Our experiments show that ST models perform better on clean test sets, and the rank of proposed models remains consistent across different test sets. Besides, simply removing misaligned data points from the training set does not lead to a better ST model.},
entrysubtype = {workshop},
}
@InProceedings{bao2022latent,
author = {Yu Bao and Hao Zhou and Shujian Huang and Dongqi Wang and Lihua Qian and Xinyu Dai and Jiajun Chen and Lei Li},
booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {latent-{GLAT}: Glancing at Latent Variables for Parallel Text Generation},
year = {2022},
month = may,
abstract = {Recently, parallel text generation has received widespread attention due to its success in generation efficiency. Although many advanced techniques are proposed to improve its generation quality, they still need the help of an autoregressive model for training to overcome the one-to-many multi-modal phenomenon in the dataset, limiting their applications. In this paper, we propose latent-GLAT, which employs the discrete latent variables to capture word categorical information and invoke an advanced curriculum learning technique, alleviating the multi-modality problem. Experiment results show that our method outperforms strong baselines without the help of an autoregressive model, which further broadens the application scenarios of the parallel decoding paradigm.},
code = {https://github.com/baoy-nlp/Latent-GLAT},
eprint = {https://openreview.net/forum?id=y4xCe0MSoWx},
}
@InProceedings{dong2022learning,
author = {Qianqian Dong and Yaoming Zhu and Mingxuan Wang and Lei Li},
booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Learning When to Translate for Streaming Speech},
year = {2022},
month = may,
abstract = {How to find proper moments to generate partial sentence translation given a streaming speech input? Existing approaches waiting-and-translating for a fixed duration often break the acoustic units in speech, since the boundaries between acoustic units in speech are not even. In this paper, we propose MoSST, a simple yet effective method for translating streaming speech content. Given a usually long speech sequence, we develop an efficient monotonic segmentation module inside an encoder-decoder model to accumulate acoustic information incrementally and detect proper speech unit boundaries for the input in speech translation task. Experiments on multiple translation directions of the MuST-C dataset show that MoSST outperforms existing methods and achieves the best trade-off between translation quality (BLEU) and latency.},
code = {https://github.com/dqqcasia/mosst},
eprint = {https://openreview.net/forum?id=mBz73IzOI6},
}
@InProceedings{fang2022stemm,
author = {Qingkai Fang and Rong Ye and Lei Li and Yang Feng and Mingxuan Wang},
booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {{STEMM}: Self-learning with Speech-text Manifold Mixup for Speech Translation},
year = {2022},
month = may,
abstract = {How to learn a better speech representation for end-to-end speech-to-text translation (ST) with limited labeled data? Existing techniques often attempt to transfer powerful machine translation (MT) capabilities to ST, but neglect the representation discrepancy across modalities. In this paper, we propose the Speech-TExt Manifold Mixup (STEMM) method to calibrate such discrepancy. Specifically, we mix up the representation sequences of different modalities, and take both unimodal speech sequences and multimodal mixed sequences as input to the translation model in parallel, and regularize their output predictions with a self-learning framework. Experiments on MuST- C speech translation benchmark and further analysis show that our method effectively alleviates the cross-modal representation discrepancy, and achieves significant improvements over a strong baseline on eight translation directions.},
code = {https://github.com/ictnlp/STEMM},
eprint = {https://openreview.net/forum?id=kazCgft9cCH},
}
@InProceedings{fu2022contextual,
author = {Zhiyi Fu and Wangchunshu Zhou and Jingjing Xu and Hao Zhou and Lei Li},
booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Contextual Representation Learning beyond Masked Language Modeling},
year = {2022},
month = may,
abstract = {How do masked language models (MLMs) such as BERT learn contextual representations? In this work, we analyze the learning dynamics of MLMs. We find that MLMs adopt sampled embeddings as anchors to estimate and inject contextual semantics to representations, which limits the efficiency and effectiveness of MLMs. To address these issues, we propose TACO, a simple yet effective representation learning approach to directly model global semantics. TACO extracts and aligns contextual semantics hidden in contextualized representations to encourage models to attend global semantics when generating contextualized representations. Experiments on the GLUE benchmark show that TACO achieves up to 5x speedup and up to 1.2 points average improvement over existing MLMs.},
code = {https://github.com/FUZHIYI/TACO},
eprint = {https://openreview.net/forum?id=KWL_ElhUejN},
}
@InProceedings{chen2022e,
author = {Jiangjie Chen and Rui Xu and Ziquan Fu and Wei Shi and Zhongqiao Li and Xinbo Zhang and Changzhi Sun and Lei Li and Yanghua Xiao and Hao Zhou},
booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL) - Findings},
title = {{E-KAR}: A Benchmark for Rationalizing Natural Language Analogical Reasoning},
year = {2022},
month = may,
abstract = {The ability to recognize analogies is fundamental to human cognition. Existing benchmarks to test word analogy do not reveal the underneath process of analogical reasoning of neural models. Holding the belief that models capable of reasoning should be right for the right reasons, we propose a first-of-its- kind Explainable Knowledge-intensive Analogical Reasoning benchmark (E-KAR). Our benchmark consists of 1,655 (in Chinese) and 1,251 (in English) problems sourced from the Civil Service Exams, which require intensive background knowledge to solve. More importantly, we design a free-text explanation scheme to explain whether an analogy should be drawn, and manually annotate them for each and every question and candidate answer. Empirical results suggest that this benchmark is very challenging for some state-of-the-art models for both explanation generation and analogical question answering tasks, which invites further research in this area. Project page of E-KAR can be found at https:// ekar-leaderboard.github.io.},
eprint = {https://openreview.net/forum?id=9kXOFRtrEj},
url = {https://ekar-leaderboard.github.io},
}
@InProceedings{sun2022rethinking,
author = {Zewei Sun and Mingxuan Wang and Hao Zhou and Chengqi Zhao and Shujian Huang and Jiajun Chen and Lei Li},
booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL) - Findings},
title = {Rethinking Document-level Neural Machine Translation},
year = {2022},
month = may,
abstract = {This paper does not aim at introducing a novel model for document-level neural machine translation. Instead, we head back to the original Transformer model and hope to answer the following question: Is the capacity of current models strong enough for document-level translation? Interestingly, we observe that the original Transformer with appropriate training techniques can achieve strong results for document translation, even with a length of 2000 words. We evaluate this model and several recent approaches on nine document-level datasets and two sentence-level datasets across six languages. Experiments show that document-level Transformer models outperforms sentence-level ones and many previous methods in a comprehensive set of metrics, including BLEU, four lexical indices, three newly proposed assistant linguistic indicators, and human evaluation. Our new datasets and evaluation scripts are in https://github. com/sunzewei2715/Doc2Doc_NMT.},
code = {https://github.com/sunzewei2715/Doc2Doc_NMT},
eprint = {https://openreview.net/forum?id=sU9fYzNZ3xX},
}
@InProceedings{zhao2022compressing,
author = {Xuandong Zhao and Zhiguo Yu and Ming Wu and Lei Li},
booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL) - Findings},
title = {Compressing Sentence Representation via Homomorphic Projective Distillation},
year = {2022},
month = may,
code = {https://github.com/XuandongZhao/HPD},
eprint = {https://openreview.net/forum?id=n3cvM4Phez9},
}
@InProceedings{song2022switch,
author = {Zhenqiao Song and Hao Zhou and Lihua Qian and Jingjing Xu and Shanbo Cheng and Mingxuan Wang and Lei Li},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {{switch-GLAT}: Multilingual Parallel Machine Translation via Code-switch Decoder},
year = {2022},
month = apr,
abstract = {Multilingual machine translation aims to develop a single model for multiple language directions. However, existing multilingual models based on Transformer are limited in terms of both translation performance and inference speed. In this paper, we propose switch-GLAT, a non-autoregressive multilingual machine translation model with a code-switch decoder. It can generate contextual code- switched translations for a given source sentence, and perform code-switch back- translation, greatly boosting multilingual translation performance. In addition, its inference is highly efficient thanks to its parallel decoder. Experiments show that our proposed switch-GLAT outperform the multilingual Transformer with as much as 1.16 BLEU improvement and 6.6x faster decoding speed in inference.},
eprint = {https://openreview.net/forum?id=5HvpvYd68b},
owner = {lilei.02},
}
@InProceedings{yang2022enhancing,
author = {Huiyun Yang and Huadong Chen and Hao Zhou and Lei Li},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Enhancing Cross-lingual Transfer by Manifold Mixup},
year = {2022},
month = apr,
code = {https://github.com/yhy1117/X-Mixup},
eprint = {https://openreview.net/forum?id=OjPmfr9GkVv},
owner = {lilei.02},
}
@Patent{he2022method,
nationality = {US},
number = {11,265,598 B2},
year = {2022},
yearfiled = {2018},
assignee = {Beijing Bytedance Network Tech Co.},
author = {He, Yi and Li, Lei and Yang, Cheng and Li, Gen and Li, Yitan},
day = {1},
dayfiled = {29},
month = mar,
monthfiled = {#mar#},
title = {Method and device for determining duplicate video},
comment = {一种重复视频的判断方法及装置},
owner = {lilei.02},
}
@InProceedings{chen2022loren,
author = {Jiangjie Chen and Qiaoben Bao and Changzhi Sun and Xinbo Zhang and Jiaze Chen and Hao Zhou and Yanghua Xiao and Lei Li},
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
title = {{LOREN}: Logic-Regularized Reasoning for Interpretable Fact Verification},
year = {2022},
month = feb,
abstract = {Given a natural language statement, how to verify its veracity against a large-scale textual knowledge source like Wikipedia? Most existing neural models make predictions without giving clues about which part of a false claim goes wrong. In this paper, we propose LOREN, an approach for interpretable fact verification. We decompose the verification of the whole claim at phrase-level, where the veracity of the phrases serves as explanations and can be aggregated into the final verdict according to logical rules. The key insight of LOREN is to represent claim phrase veracity as three-valued latent variables, which are regularized by aggregation logical rules. The final claim verification is based on all latent variables. Thus, LOREN enjoys the additional benefit of interpretability -- it is easy to explain how it reaches certain results with claim phrase veracity. Experiments on a public fact verification benchmark show that LOREN is competitive against previous approaches while enjoying the merit of faithful and accurate interpretability.},
code = {https://github.com/jiangjiechen/LOREN},
eprint = {https://arxiv.org/abs/2012.13577},
url = {https://huggingface.co/spaces/Jiangjie/loren-fact-checking},
}
@InProceedings{chen2022unsupervised,
author = {Jiangjie Chen and Chun Gan and Sijie Cheng and Hao Zhou and Yanghua Xiao and Lei Li},
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
title = {Unsupervised Editing for Counterfactual Stories},
year = {2022},
month = feb,
abstract = {Creating what-if stories requires reasoning about prior statements and possible outcomes of the changed conditions. One can easily generate coherent endings under new conditions, but it would be challenging for current systems to do it with minimal changes to the original story. Therefore, one major challenge is the trade-off between generating a logical story and rewriting with minimal-edits. In this paper, we propose EDUCAT, an editing-based unsupervised approach for counterfactual story rewriting. EDUCAT includes a target position detection strategy based on estimating causal effects of the what-if conditions, which keeps the causal invariant parts of the story. EDUCAT then generates the stories under fluency, coherence and minimal-edits constraints. We also propose a new metric to alleviate the shortcomings of current automatic metrics and better evaluate the trade-off. We evaluate EDUCAT on a public counterfactual story rewriting benchmark. Experiments show that EDUCAT achieves the best trade-off over unsupervised SOTA methods according to both automatic and human evaluation.},
code = {https://github.com/jiangjiechen/EDUCAT},
eprint = {https://arxiv.org/abs/2112.05417},
}
@InProceedings{huang2022non,
author = {Chenyang Huang and Hao Zhou and Osmar Zaiane and Lili Mou and Lei Li},
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
title = {Non-Autoregressive Translation with Layer-Wise Prediction and Deep Supervision},
year = {2022},
month = feb,
abstract = {How do we perform efficient inference while retaining high translation quality? Existing neural machine translation models, such as Transformer, achieve high performance, but they decode words one by one, which is inefficient. Recent non-autoregressive translation models speed up the inference, but their quality is still inferior. In this work, we propose DSLP, a highly efficient and high-performance model for machine translation. The key insight is to train a non-autoregressive Transformer with Deep Supervision and feed additional Layer-wise Predictions. We conducted extensive experiments on four translation tasks (both directions of WMT'14 EN-DE and WMT'16 EN-RO). Results show that our approach consistently improves the BLEU scores compared with respective base models. Specifically, our best variant outperforms the autoregressive model on three translation tasks, while being 14.8 times more efficient in inference.},
code = {https://github.com/chenyangh/DSLP},
eprint = {https://arxiv.org/abs/2110.07515},
}
@Article{chu2022icm,
author = {Ruihang Chu and Yukang Chen and Tao Kong and Lu Qi and Lei Li},
journal = {IEEE Robotics and Automation Letters (RA-L)},
title = {{ICM-3D}: Instantiated Category Modeling for 3D Instance Segmentation},
year = {2022},
month = jan,
number = {1},
pages = {57-64},
volume = {7},
doi = {10.1109/LRA.2021.3108483},
}
@InProceedings{zheng2021duplex,
author = {Zaixiang Zheng and Hao Zhou and Shujian Huang and Jiajun Chen and Jingjing Xu and Lei Li},
booktitle = {the 35th Conference on Neural Information Processing Systems (NeurIPS)},
title = {Duplex Sequence-to-Sequence Learning for Reversible Machine Translation},
year = {2021},
month = dec,
abstract = {Sequence-to-sequence learning naturally has two directions. How to effectively utilize supervision signals from both directions? Existing approaches either require two separate models, or a multitask-learned model but with inferior performance. In this paper, we propose REDER (REversible Duplex TransformER), a parameter-efficient model and apply it to machine translation. Either end of REDER can simultaneously input and output a distinct language. Thus REDER enables reversible machine translation by simply flipping the input and output ends. Experiments verify that REDER achieves the first success of reversible machine translation, which helps outperform its multitask-trained baselines up to 1.3 BLEU.},
code = {https://github.com/zhengzx-nlp/REDER},
eprint = {https://arxiv.org/abs/2105.03458},
owner = {lilei.02},
}
@Patent{li2021audioa,
nationality = {US},
number = {11,182,426 B2},
year = {2021},
yearfiled = {2018},
assignee = {Beijing Bytedance Network Tech Co.},
author = {Gen Li and Lei Li and Yi He},
day = {23},
dayfiled = {29},
month = nov,
monthfiled = {#dec#},
title = {Audio Retrieval and Identification Method and Device},
comment = {一种音频检索识别方法及装置},
owner = {lilei.02},
}
@InProceedings{qian2021volctrans,
author = {Lihua Qian and Yi Zhou and Zaixiang Zheng and Yaoming Zhu and Zehui Lin and Jiangtao Feng and Shanbo Cheng and Lei Li and Mingxuan Wang and Hao Zhou},
booktitle = {Sixth Conference on Machine Translation (WMT21)},
title = {The {Volctrans} {GLAT} System: Non-autoregressive Translation Meets {WMT21}},
year = {2021},
month = nov,
abstract = {This paper describes the Volctrans' submission to the WMT21 news translation shared task for German->English translation. We build a parallel (i.e., non-autoregressive) translation system using the Glancing Transformer, which enables fast and accurate parallel decoding in contrast to the currently prevailing autoregressive models. To the best of our knowledge, this is the first parallel translation system that can be scaled to such a practical scenario like WMT competition. More importantly, our parallel translation system achieves the best BLEU score (35.0) on German->English translation task, outperforming all strong autoregressive counterparts.},
entrysubtype = {workshop},
eprint = {https://arxiv.org/abs/2109.11247},
}
@InProceedings{jiang2021learning,
author = {Qingnan Jiang and Mingxuan Wang and Jun Cao and Shanbo Cheng and Shujian Huang and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
title = {Learning Kernel-Smoothed Machine Translation with Retrieved Examples},
year = {2021},
month = nov,
abstract = {How to effectively adapt neural machine translation (NMT) models according to emergingcases without retraining? Despite the greatsuccess of neural machine translation, updating the deployed models online remains a challenge. Existing non-parametric approachesthat retrieve similar examples from a databaseto guide the translation process are promisingbut are prone to overfit the retrieved examples. However, non-parametric methods are proneto overfit the retrieved examples. In this work,we propose to learn Kernel-Smoothed Translation with Example Retrieval (KSTER), an effective approach to adapt neural machine translation models online. Experiments on domainadaptation and multi-domain machine translation datasets show that even without expensive retraining, KSTER is able to achieve im-provement of 1.1 to 1.5 BLEU scores overthe best existing online adaptation methods. The code and trained models are released at https://github.com/jiangqn/KSTER.},
code = {https://github.com/jiangqn/KSTER},
eprint = {https://arxiv.org/abs/2109.09991},
video = {https://underline.io/lecture/38697-learning-kernel-smoothed-machine-translation-with-retrieved-examples},
}
@InProceedings{ru2021learning,
author = {Dongyu Ru and Changzhi Sun and Jiangtao Feng and Lin Qiu and Hao Zhou and Weinan Zhang and Yong Yu and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
title = {Learning Logic Rules for Document-level Relation Extraction},
year = {2021},
month = nov,
abstract = {Document-level relation extraction aims to identify relations between entities in a whole document. Prior efforts to capture long-range dependencies have relied heavily on implicitly powerful representations learned through (graph) neural networks, which makes the model less transparent. To tackle this challenge, in this paper, we propose LogiRE, a novel probabilistic model for document-level relation extraction by learning logic rules. LogiRE treats logic rules as latent variables and consists of two modules: a rule generator and a relation extractor. The rule generator is to generate logic rules potentially contributing to final predictions, and the relation extractor outputs final predictions based on the generated logic rules. Those two modules can be efficiently optimized with the expectation--maximization (EM) algorithm. By introducing logic rules into neural networks, LogiRE can explicitly capture long-range dependencies as well as enjoy better interpretation. Empirical results show that LogiRE significantly outperforms several strong baselines in terms of relation performance (∼1.8 F1 score) and logical consistency (over 3.3 logic score). Our code is available at https://github. com/rudongyu/LogiRE.},
code = {https://github.com/rudongyu/LogiRE},
eprint = {https://arxiv.org/abs/2111.05407},
video = {https://underline.io/lecture/38055-learning-logic-rules-for-document-level-relation-extraction},
}
@InProceedings{zeng2021gradient,
author = {Zhiyuan Zeng and Jiaze Chen and Weiran Xu and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
title = {Gradient-based Adversarial Factual Consistency Evaluation for Abstractive Summarization},
year = {2021},
month = nov,
abstract = {Neural abstractive summarization systems have gained significant progress in recent years. However, abstractive summarization often produce inconsisitent statements or false facts. How to automatically generate highly abstract yet factually correct summaries? In this paper, we proposed an efficient weak-supervised adversarial data augmentation approach to form the factual consistency dataset. Based on the artificial dataset, we train an evaluation model that can not only make accurate and robust factual consistency discrimination but is also capable of making interpretable factual errors tracing by backpropagated gradient distribution on token embeddings. Experiments and analysis conduct on public annotated summarization and factual consistency datasets demonstrate our approach effective and reasonable.},
code = {https://github.com/parZival27/GrAdualCC},
eprint = {https://aclanthology.org/2021.emnlp-main.337/},
}
@InProceedings{sun2021multilingual,
author = {Zewei Sun and Mingxuan Wang and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {Multilingual Translation via Grafting Pre-trained Language Models},
year = {2021},
month = nov,
abstract = {Can pre-trained BERT for one language and GPT for another be glued together to translate texts? Self-supervised training using only monolingual data has led to the success of pre-trained (masked) language models in many NLP tasks. However, directly connecting BERT as an encoder and GPT as a decoder can be challenging in machine translation, for GPT-like models lack a cross-attention component that is needed in seq2seq decoders. In this paper, we propose Graformer to graft separately pre-trained (masked) language models for machine translation. With monolingual data for pre-training and parallel data for grafting training, we maximally take advantage of the usage of both types of data. Experiments on 60 directions show that our method achieves average improvements of 5.8 BLEU in x2en and 2.9 BLEU in en2x directions comparing with the multilingual Transformer of the same size.},
code = {https://github.com/sunzewei2715/Graformer},
eprint = {https://arxiv.org/abs/2109.05256},
}
@InProceedings{wang2021secoco,
author = {Tao Wang and Chengqi Zhao and Mingxuan Wang and Lei Li and Hang Li and Deyi Xiong},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {Secoco: Self-Correcting Encoding for Neural Machine Translation},
year = {2021},
month = nov,
abstract = {Different from previous robust approaches, Secoco enables NMT to explicitly correct noisy inputs and delete specific errors simultaneously with the translation decoding process. Secoco is able to achieve significant improvements of 1.6 BLEU points over strong baselines on two real-world test sets and a benchmark WMT dataset with good interpretability.
The code and dataset are publicly available at \url{https://github.com/rgwt123/Secoco}.},
code = {https://github.com/rgwt123/Secoco},
eprint = {https://arxiv.org/abs/2108.12137},
}
@InProceedings{zhu2021counter,
author = {Yaoming Zhu and Jiangtao Feng and Chengqi Zhao and Mingxuan Wang and Lei Li},
booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP) - Findings},
title = {Counter-Interference Adapter for Multilingual Machine Translation},
year = {2021},
month = nov,
abstract = {Developing a unified multilingual model haslong been a pursuit for machine translation. However, existing approaches suffer from performance degradation — a single multilingualmodel is inferior to separately trained bilingual ones on rich-resource languages. We conjecture that such a phenomenon is due to interference caused by joint training with multiple languages. To accommodate the issue,we propose CIAT, an adapted Transformermodel with a small parameter overhead formultilingual machine translation. We evaluate CIAT on multiple benchmark datasets, including IWSLT, OPUS-100, and WMT. Experiments show that CIAT consistently outperforms strong multilingual baselines on 64 of total 66 language directions, 42 of whichsee above 0.5 BLEU improvement. Our code is available at https://github.com/Yaoming95/CIAT.},
code = {https://github.com/Yaoming95/CIAT},
eprint = {https://arxiv.org/abs/2104.08154},
}
@InProceedings{wang2021cnewsum,
author = {Danqing Wang and Jiaze Chen and Xianze Wu and Hao Zhou and Lei Li},
booktitle = {The 10th CCF International Conference on Natural Language Processing and Chinese Computing (NLPCC)},
title = {{CNewSum}: A Large-scale Chinese News Summarization Dataset with Human-annotated Adequacy and Deducibility Level},
year = {2021},
address = {Qingdao, China},
month = oct,
abstract = {Automatic text summarization aims to produce a brief but crucial summary for the input documents. Both extractive and abstractive methods have witnessed great success in English datasets in recent years. However, there has been a minimal exploration of text summarization in Chinese, limited by the lack of large-scale datasets. In this paper, we present a large-scale Chinese news summarization dataset CNewSum, which consists of 304,307 documents and human-written summaries for the news feed. It has long documents with high-abstractive summaries, which can encourage document-level understanding and generation for current summarization models. An additional distinguishing feature of CNewSum is that its test set contains adequacy and deducibility annotations for the summaries. The adequacy level measures the degree of summary information covered by the document, and the deducibility indicates the reasoning ability the model needs to generate the summary. These annotations can help researchers analyze and target their model performance bottleneck. We examine recent methods on CNewSum and release our dataset to provide a solid testbed for automatic Chinese summarization research.},
eprint = {https://arxiv.org/abs/2110.10874},
url = {https://dqwang122.github.io/projects/CNewSum/},
}
@InProceedings{li2021learning,
author = {Yunfei Li and Tao Kong and Lei Li and Yifeng Li and Yi Wu},
booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
title = {Learning to Design and Construct Bridge without Blueprint},
year = {2021},
month = sep,
abstract = {Autonomous assembly has been a desired functionality of many intelligent robot systems. We study a new challenging assembly task, designing and constructing a bridge without a blueprint. In this task, the robot needs to first design a feasible bridge architecture for arbitrarily wide cliffs and then manipulate the blocks reliably to construct a stable bridge according to the proposed design. In this paper, we propose a bi-level approach to tackle this task. At the high level, the system learns a bridge blueprint policy in a physical simulator using deep reinforcement learning and curriculum learning. A policy is represented as an attention-based neural network with object-centric input, which enables generalization to different numbers of blocks and cliff widths. For low-level control, we implement a motion-planning-based policy for real-robot motion control, which can be directly combined with a trained blueprint policy for real-world bridge construction without tuning. In our field study, our bi-level robot system demonstrates the capability of manipulating blocks to construct a diverse set of bridges with different architectures.},
eprint = {https://arxiv.org/abs/2108.02439},
}
@InProceedings{li2021simultaneous,
author = {Yiming Li and Tao Kong and Ruihang Chu and Yifeng Li and Peng Wang and Lei Li},
booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
title = {Simultaneous Semantic and Collision Learning for 6-DoF Grasp Pose Estimation},
year = {2021},
month = sep,
abstract = {Grasping in cluttered scenes has always been a great challenge for robots, due to the requirement of the ability to well understand the scene and object information. Previous works usually assume that the geometry information of the objects is available, or utilize a step-wise, multi-stage strategy to predict the feasible 6-DoF grasp poses. In this work, we propose to formalize the 6-DoF grasp pose estimation as a simultaneous multi-task learning problem. In a unified framework, we jointly predict the feasible 6-DoF grasp poses, instance semantic segmentation, and collision information. The whole framework is jointly optimized and end-to-end differentiable. Our model is evaluated on large-scale benchmarks as well as the real robot system. On the public dataset, our method outperforms prior state-of-the-art methods by a large margin (+4.08 AP). We also demonstrate the implementation of our model on a real robotic platform and show that the robot can accurately grasp target objects in cluttered scenarios with a high success rate.},
eprint = {https://arxiv.org/abs/2108.02425},
}
@InProceedings{shi2021follow,
author = {Wenxian Shi and Yuxuan Song and Hao Zhou and Bohan Li and Lei Li},
booktitle = {Proc. of ECML-PKDD},
title = {Follow Your Path: a Progressive Method for Knowledge Distillation},
year = {2021},
month = sep,
abstract = {Deep neural networks often have a huge number of parameters, which posts challenges in deployment in application scenarios with limited memory and computation capacity. Knowledge distillation is one approach to derive compact models from bigger ones. However, it has been observed that a converged heavy teacher model is strongly constrained for learning a compact student network and could make the optimization subject to poor local optima. In this paper, we propose ProKT, a new model-agnostic method by projecting the supervision signals of a teacher model into the student's parameter space. Such projection is implemented by decomposing the training objective into local intermediate targets with an approximate mirror descent technique. The proposed method could be less sensitive with the quirks during optimization which could result in a better local optimum. Experiments on both image and text datasets show that our proposed ProKT consistently achieves superior performance compared to other existing knowledge distillation methods.},
eprint = {https://arxiv.org/abs/2107.09305},
}
@InProceedings{ye2021end,
author = {Rong Ye and Mingxuan Wang and Lei Li},
booktitle = {Proc. of INTERSPEECH},
title = {End-to-end Speech Translation via Cross-modal Progressive Training},
year = {2021},
month = aug,
abstract = {End-to-end speech translation models have become a new trend in research due to their potential of reducing error propagation. However, these models still suffer from the challenge of data scarcity. How to effectively use unlabeled or other parallel corpora from machine translation is promising but still an open problem. In this paper, we propose Cross Speech-Text Network (XSTNet), an end-to-end model for speech-to-text translation. XSTNet takes both speech and text as input and outputs both transcription and translation text. The model benefits from its three key design aspects: a self-supervised pre-trained sub-network as the audio encoder, a multi-task training objective to exploit additional parallel bilingual text, and a progressive training procedure. We evaluate the performance of XSTNet and baselines on the MuST-C En-X and LibriSpeech En-Fr datasets. In particular, XSTNet achieves state-of-the-art results on all language directions with an average BLEU of 28.8, outperforming the previous best method by 3.2 BLEU. Code, models, cases, and more detailed analysis are available at.},
code = {https://github.com/ReneeYe/XSTNet},
eprint = {https://arxiv.org/abs/2104.10380},
}
@InProceedings{lin2021learning,
author = {Zehui Lin and Liwei Wu and Mingxuan Wang and Lei Li},
booktitle = {the 59th Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Learning Language Specific Sub-network for Multilingual Machine Translation},
year = {2021},
month = aug,
abstract = {Multilingual neural machine translation aimsat learning a single translation model for muliple languages. These jointly trained mod-els often suffer from performance degradationon rich-resource language pairs. We attributethis degeneration to parameter interference. Inthis paper, we propose LaSS to jointly train asingle unified multilingual MT model. LaSS learns Language Secific Sub-network (LaSS)for each language pair to counter parameterinterference. Comprehensive experiments on IWSLT and WMT datasets with various Transformer architectures show that LaSS obtainsgains on 36 language pairs by up to 1.2 BLEU.Besides, LaSS shows its strong generalization performance at easy adaptation to new lan-guage pairs and zero-shot translation. LaSS boosts zero-shot translation with an averageof 8.3 BLEU on 30 language pairs.},
code = {https://github.com/NLP-Playground/LaSS},
eprint = {https://arxiv.org/abs/2105.09259},
timestamp = {2020-05-01},
}
@InProceedings{pan2021contrastive,
author = {Xiao Pan and Liwei Wu and Mingxuan Wang and Lei Li},
booktitle = {the 59th Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Contrastive Learning for Many-to-many Multilingual Neural Machine Translation},
year = {2021},
month = aug,
abstract = {Existing multilingual machine translation approaches mainly focus on English-centric directions, while the non-English directions still lag behind. In this work, we aim to build a many-to-many translation system with an emphasis on the quality of non-English language directions. Our intuition is based on the hypothesis that a universal cross-language representation leads to better multilingual translation performance. To this end, we propose mRASP2, a training method to obtain a single unified multilingual translation model. mRASP2 is empowered by two techniques: a) a contrastive learning scheme to close the gap among representations of different languages, and b) data augmentation on both multiple parallel and monolingual data to further align token representations. For English-centric directions, mRASP2 outperforms existing best unified model and achieves competitive or even better performance than the pre-trained and fine-tuned model mBART on tens of WMT's translation directions. For non-English directions, mRASP2 achieves an improvement of average 10+ BLEU compared with the multilingual Transformer baseline.},
code = {https://github.com/PANXiao1994/mRASP2},
eprint = {https://arxiv.org/abs/2105.09501},
slides = {pubs/mRASP2_ACL2021.pdf},
timestamp = {2020-05-01},
url = {https://medium.com/@panxiao1994/mrasp2-multilingual-nmt-advances-via-contrastive-learning-ac8c4c35d63},
video = {https://underline.io/lecture/25372-contrastive-learning-for-many-to-many-multilingual-neural-machine-translation},
}
@InProceedings{qian2021glancing,
author = {Lihua Qian and Hao Zhou and Yu Bao and Mingxuan Wang and Lin Qiu and Weinan Zhang and Yong Yu and Lei Li},
booktitle = {the 59th Annual Meeting of the Association for Computational Linguistics (ACL)},
title = {Glancing Transformer for Non-Autoregressive Neural Machine Translation},
year = {2021},
month = aug,
abstract = {Recent work on non-autoregressive neural machine translation (NAT) aims at improving the efficiency by parallel decoding without sacrificing the quality. However, existing NAT methods are either inferior to Transformer or require multiple decoding passes, leading to reduced speedup. We propose the Glancing Language Model (GLM), a method to learn word interdependency for single-pass parallel generation models. With GLM, we develop Glancing Transformer (GLAT) for machine translation. With only single-pass parallel decoding, GLAT is able to generate high-quality translation with 8-15 times speedup. Experiments on multiple WMT language directions show that GLAT outperforms all previous single pass non-autoregressive methods, and is nearly comparable to Transformer, reducing the gap to 0.25-0.9 BLEU points.},
code = {https://github.com/FLC777/GLAT},
comment = {The main algorithm that achieves top 1 BLEU scores in WMT21 En-De and De-En machine translation contest.},
eprint = {https://arxiv.org/abs/2008.07905},
timestamp = {2020-05-01},