diff --git a/docs/authors/zhigang-wang/index.xml b/docs/authors/zhigang-wang/index.xml
index 0a2b2360..59027e9d 100755
--- a/docs/authors/zhigang-wang/index.xml
+++ b/docs/authors/zhigang-wang/index.xml
@@ -28,6 +28,14 @@
+
+ KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance
+ /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/
+ Mon, 01 Jan 0001 00:00:00 +0000
+ /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/
+
+
+
Unsupervised Multi-Source Domain Adaptation for Person Re-Identification
/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/
diff --git a/docs/index.json b/docs/index.json
index b6750a25..58182903 100755
--- a/docs/index.json
+++ b/docs/index.json
@@ -1 +1 @@
-[{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"4e73f707a3c1da0c5d8d165361161c7b","permalink":"/authors/19_ruize/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/19_ruize/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ruize Xu","type":"authors"},{"authors":null,"categories":null,"content":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who are interested in my research domain are very welcome and do not hesitate to contact me actively. For more information, please visit his personal homepage. Valar Morghulis!\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"537de72d4cb178cea6fbf2b2a92ea589","permalink":"/authors/20_guangyao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_guangyao/","section":"authors","summary":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who","tags":null,"title":"Guangyao Li","type":"authors"},{"authors":null,"categories":null,"content":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"22debf3f166bda4bfb28c8317489f918","permalink":"/authors/20_xiaokang/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xiaokang/","section":"authors","summary":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.","tags":null,"title":"Xiaokang Peng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"55a49bcd8ae300a0362a45302ca97c26","permalink":"/authors/20_xuemin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xuemin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Xuemin Liu","type":"authors"},{"authors":null,"categories":null,"content":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"033ae9c233d8ca15172e0f0eb482735e","permalink":"/authors/20_yixin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_yixin/","section":"authors","summary":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.","tags":null,"title":"Yixin Xu","type":"authors"},{"authors":null,"categories":null,"content":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"9434b9dca31f1f23a676f2b869e0c881","permalink":"/authors/21_ruiqian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_ruiqian/","section":"authors","summary":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.","tags":null,"title":"Rui Qian","type":"authors"},{"authors":null,"categories":null,"content":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"57b1d4e29185f3870d53fc65c766173e","permalink":"/authors/21_yake/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_yake/","section":"authors","summary":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.","tags":null,"title":"Yake Wei","type":"authors"},{"authors":null,"categories":null,"content":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c95476ad24cc214056b3d2c5e8c90f17","permalink":"/authors/22_andong/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_andong/","section":"authors","summary":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.","tags":null,"title":"Andong Deng","type":"authors"},{"authors":null,"categories":null,"content":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a2791369e75b13b52139d9860293bdd5","permalink":"/authors/22_wenke/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenke/","section":"authors","summary":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.","tags":null,"title":"Wenke Xia","type":"authors"},{"authors":null,"categories":null,"content":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"cd37724dba9b446f1c1307e40cd45632","permalink":"/authors/22_wenxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenxuan/","section":"authors","summary":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.","tags":null,"title":"Wenxuan Hou","type":"authors"},{"authors":null,"categories":null,"content":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a389590984a0c3fb50de499f8df2d4c0","permalink":"/authors/22_xincheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_xincheng/","section":"authors","summary":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.","tags":null,"title":"Xincheng Pang","type":"authors"},{"authors":null,"categories":null,"content":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"d884fc3eb1e2b2382def5073cec5e105","permalink":"/authors/22_zequn/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_zequn/","section":"authors","summary":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.","tags":null,"title":"Zequn Yang","type":"authors"},{"authors":null,"categories":null,"content":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"0f875044223f8afd458b089859ba38d8","permalink":"/authors/23_henghui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_henghui/","section":"authors","summary":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.","tags":null,"title":"Henghui Du","type":"authors"},{"authors":null,"categories":null,"content":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"8808a5aa1460c5cb4fad660d28f8520a","permalink":"/authors/23_jiahao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jiahao/","section":"authors","summary":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.","tags":null,"title":"Jiahao Li","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"1697602eb95e74d0fb1a9247c1f07489","permalink":"/authors/23_jianghan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jianghan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jianghan Chao","type":"authors"},{"authors":null,"categories":null,"content":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bdeafc1f9127d19078299ad17ddcf547","permalink":"/authors/23_jingxian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jingxian/","section":"authors","summary":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.","tags":null,"title":"Jingxian Lu","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"b1f3ebd7d0f58e6a501810a383c4a9ed","permalink":"/authors/23_jinlin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jinlin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jinlin Li","type":"authors"},{"authors":null,"categories":null,"content":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"874c09024781e4fd5375423eaef9c9e8","permalink":"/authors/23_juncheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_juncheng/","section":"authors","summary":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.","tags":null,"title":"Juncheng Ma","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"834fe556c30cd4180a6dc4c692fd63d9","permalink":"/authors/23_liangce/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_liangce/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ce Liang","type":"authors"},{"authors":null,"categories":null,"content":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bf84fe39ef0b614af0ae82d08359c784","permalink":"/authors/23_peiwen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_peiwen/","section":"authors","summary":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.","tags":null,"title":"Peiwen Sun","type":"authors"},{"authors":null,"categories":null,"content":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c29a63de0242659b43a43451fc077046","permalink":"/authors/23_ruoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_ruoxuan/","section":"authors","summary":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.","tags":null,"title":"Ruoxuan Feng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"364786f50ed04bbfb2309f8069cdbe90","permalink":"/authors/23_shaoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_shaoxuan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Shaoxuan Xu","type":"authors"},{"authors":null,"categories":null,"content":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"062e874f9d4216ee7c15e6afe41e1631","permalink":"/authors/23_siwei/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_siwei/","section":"authors","summary":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.","tags":null,"title":"Siwei Li","type":"authors"},{"authors":null,"categories":null,"content":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bda305ecfaa132f6e49d2dd2566d0f25","permalink":"/authors/23_yaoting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_yaoting/","section":"authors","summary":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.","tags":null,"title":"Yaoting Wang","type":"authors"},{"authors":null,"categories":null,"content":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"7a6ee1988cb2fa93bfeee88a094c7489","permalink":"/authors/24_jirui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_jirui/","section":"authors","summary":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.","tags":null,"title":"JiRui Huang","type":"authors"},{"authors":null,"categories":null,"content":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"186e15560cfa29bcd45c618efc625779","permalink":"/authors/24_yuchen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_yuchen/","section":"authors","summary":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.","tags":null,"title":"Yuchen Li","type":"authors"},{"authors":["dihu"],"categories":null,"content":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of cognitive neuroscience and has wrote one study note during his undergraduate. Inspired by what he learned from cognitive neuroscience, and what he observed and deliberated from the daily-life, he strongly convinced that the pervasive, free, natural multimodal messages can provide sufficient information for perceiving, learning and understanding environment, even the agent itself, which promisingly makes multimodal learning become one of the key to achieve machine intelligence.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"2525497d367e79493fd32b198b28f040","permalink":"/authors/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/admin/","section":"authors","summary":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of","tags":null,"title":"Di Hu","type":"authors"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"852b491b0dcadb44b8f099f931db74c4","permalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"A Two-Stage Framework for Multiple Sound-Source Localization","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"b21459d2cd2aa98d5a771a396df3c29e","permalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","section":"publication","summary":"","tags":null,"title":"Ambient Sound Helps: Audiovisual Crowd Counting in Extreme Conditions","type":"publication"},{"authors":["Wenke Xia*","Xu Zhao*","Xincheng Pang","Changqing Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"7a5ff9681de843469038165a230c4f87","permalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","section":"publication","summary":"","tags":null,"title":"Balanced Audiovisual Dataset for Imbalance Analysis","type":"publication"},{"authors":["Xiaokang Peng*","Yake Wei*","Andong Deng","Dong Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1cdda2159c4adeb4f31cb4e7f1a5ab8a","permalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","section":"publication","summary":"","tags":null,"title":"Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Yuanchao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e2d14df72502e78a30f83d09310b98b6","permalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","section":"publication","summary":"","tags":null,"title":"Can Textual Semantics Mitigate Sounding Object SegmentationPreference?","type":"publication"},{"authors":["Di Hu","Yake Wei","Rui Qian","Weiyao Lin","Ruihua Song","Ji-Rong Wen"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"91e67073102678aec9799732ceef49f3","permalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","section":"publication","summary":"","tags":null,"title":"Class-aware Sounding Objects Localization via Audiovisual Correspondence","type":"publication"},{"authors":["Yapeng Tian*","Di Hu*","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c0d82a52007e4e9ab50a2cfafdc4ac17","permalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","section":"publication","summary":"","tags":null,"title":"Co-Learn Sounding Object Visual Grounding and Visually Indicated Sound Separation in A Cycle","type":"publication"},{"authors":["Di Hu","Xuhong Li","Lichao Mou","Pu Jin","Dong Chen","Liping Jing","Xiaoxiang Zhu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c7688dd14aa743d0b927f94d97854f27","permalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","section":"publication","summary":"","tags":null,"title":"Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac02b15b850ff085e6c9ad497f3a130c","permalink":"/publication/curriculum-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/curriculum-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Curriculum Audiovisual Learning","type":"publication"},{"authors":["Yapeng Tian","Di Hu","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"cd0308a1bfb55705c394057955f2375d","permalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","section":"publication","summary":"","tags":null,"title":"Cyclic Co-Learning of Sounding Object Visual Grounding and Sound Separation","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"202776673a51788c119f1451c9e313c2","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"00f72a8fe1deeb265958a59b94c2cd33","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"f6c0a9a658cdceee78bd291860181d99","permalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","section":"publication","summary":"","tags":null,"title":"Deep Linear Discriminant Analysis Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d1466a6c42ba930502049d24243f8b62","permalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Deep Multimodal Clustering for Unsupervised Audiovisual Learning Representation","type":"publication"},{"authors":["Di Hu - Chengze Wang - Feiping Nie - Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9e4cd76d6b972d54b50c190779f639a5","permalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","section":"publication","summary":"","tags":null,"title":"Dense Multimodal Fusion for Hierarchically Joint Representation","type":"publication"},{"authors":["Xincheng Pang","Wenke Xia","Zhigang Wang","Bin Zhao","Di Hu","Dong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"832f5776c5daa77fa5df21ce843a3196","permalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","section":"publication","summary":"","tags":null,"title":"Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection","type":"publication"},{"authors":["Yake Wei","Siwei Li","Ruoxuan Feng","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"591c348a8e03f441318436eb005ae2cc","permalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","section":"publication","summary":"","tags":null,"title":"Diagnosing and Re-learning for Balanced Multimodal Learning","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"4b1e10b4327cca00dfd58162571a2f8c","permalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","section":"publication","summary":"","tags":null,"title":"Discrete Spectral Hashing for Efficient Similarity Retrieval","type":"publication"},{"authors":["Di Hu","Rui Qian","Minyue Jiang","Xiao Tan","Shilei Wen","Errui Ding","Weiyao Lin","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6953eeac03ee85322e85eece2eeeb84","permalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","section":"publication","summary":"","tags":null,"title":"Discriminative Sounding Objects Localization via Self-supervised Audiovisual Matching","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiaoxiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3016d01c7b86e792f8778f7aba6fc44d","permalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","section":"publication","summary":"","tags":null,"title":"Does Ambient Sound Help? - Audiovisual Crowd Counting","type":"publication"},{"authors":["Yake Wei","Ruoxuan Feng","Zihe Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"0278c6a7c52909fa5c55eaf522569e7f","permalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","section":"publication","summary":"","tags":null,"title":"Enhancing Multi-modal Cooperation via Fine-grained Modality Valuation","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Di Hu","Hang Zhou","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"28bd51450c42258842f48363910f83c8","permalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"Exploiting Visual Context Semantics for Sound Source Localization","type":"publication"},{"authors":["Sijia Yang","Haoyi Xiong","Di Hu","Kaibo Xu","Licheng Wang","Peizhen Zhu","Zeyi Sun"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ed52bf34eef1f16fc89a0fc5c32fa152","permalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","section":"publication","summary":"","tags":null,"title":"Generalising Combinatorial Discriminant Analysis through Conditioning Truncated Rayleigh Flow","type":"publication"},{"authors":["Zequn Yang","Han Zhang","Yake Wei","Zheng Wang","Feiping Nie","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"76c72a76e4cf8516d166a780e270c79b","permalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","section":"publication","summary":"","tags":null,"title":"Geometric-Inspired Graph-based Incomplete Multi-view Clustering","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8fe03bbbdab04c3ee4ecc7e01ecd723c","permalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Heterogeneous Scene Analysis via Self-supervised Audiovisual Learning","type":"publication"},{"authors":["Xuelong Li","Di Hu","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1850ab6a7473c571586aed28d796ac66","permalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","section":"publication","summary":"","tags":null,"title":"Image2song: Song Retrieval via Bridging Image Content and Lyric Words","type":"publication"},{"authors":["Wenke Xia","Dong Wang","Xincheng Pang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"82a334df3b6181644b600e4679ce595c","permalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","section":"publication","summary":"","tags":null,"title":"Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs","type":"publication"},{"authors":["Xuelong Li","Di Hu","Feiping Nie"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"24881bb5f959ea9f061fb67469d72eb9","permalink":"/publication/large-graph-hashing-with-spectral-rotation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/large-graph-hashing-with-spectral-rotation/","section":"publication","summary":"","tags":null,"title":"Large Graph Hashing with Spectral Rotation","type":"publication"},{"authors":["Yake Wei","Di Hu","Yapeng Tian","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"67b2f40c745acaa698a385e2742a25bc","permalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","section":"publication","summary":"","tags":null,"title":"Learning in Audio-visual Context: A Review, Analysis, and New Perspective","type":"publication"},{"authors":["Guangyao Li*","Yake Wei*","Yapeng Tian*","Chenliang Xu","Ji-Rong Wen","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"125a97cdaa82fb5a0ec455cfd53c1b46","permalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","section":"publication","summary":"","tags":null,"title":"Learning to Answer Questions in Dynamic Audio-Visual Scenarios","type":"publication"},{"authors":["Di Hu","Dong Wang","Xuelong Li","Feiping Nie","Qi Wang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c716bb52e5e46a2dbaebc46fda1517d6","permalink":"/publication/listen-to-the-image/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/listen-to-the-image/","section":"publication","summary":"","tags":null,"title":"Listen to the Image","type":"publication"},{"authors":["Ruize Xu","Ruoxuan Feng","Shi-xiong Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8e1ed6fc418000d90eed8231ce30fa73","permalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","section":"publication","summary":"","tags":null,"title":"MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning","type":"publication"},{"authors":["Yake Wei","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ad6411f0202e0562a67a75820ff098f","permalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","section":"publication","summary":"","tags":null,"title":"MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning","type":"publication"},{"authors":["Guangyao Li","Yixin Xu","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"38daed7d60d2831123ddca90ac47d9b7","permalink":"/publication/multi-scale-attention-for-audio-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multi-scale-attention-for-audio-question-answering/","section":"publication","summary":"","tags":null,"title":"Multi-Scale Attention for Audio Question Answering","type":"publication"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"88c9d48496c44a5980763aa946676e9e","permalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","section":"publication","summary":"","tags":null,"title":"Multiple Sound Sources Localization from Coarse to Fine","type":"publication"},{"authors":["Ziyun Li","Xinshao Wang","Haojin Yang","Di Hu","Neil M Robertson","David A Clifton","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"a48ea4ca10463e6ef980903ef312977d","permalink":"/publication/not-all-knowledge-is-created-equal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/not-all-knowledge-is-created-equal/","section":"publication","summary":"","tags":null,"title":"Not All Knowledge Is Created Equal","type":"publication"},{"authors":["Guangyao Li","Wenxuan Hou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"545100c95da731d9faeb7037b5801449","permalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","section":"publication","summary":"","tags":null,"title":"Progressive Spatio-temporal Perception for Audio-Visual Question Answering","type":"publication"},{"authors":["Yaoting Wang*","Weisong Liu*","Guangyao Li","Jian Ding","Di Hu","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d414aab41857970b60155d360ceac88","permalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","section":"publication","summary":"","tags":null,"title":"Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer","type":"publication"},{"authors":["Zequn Yang","Yake Wei","Ce Liang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d069d78586930bf2dd726ae7c0b00c9b","permalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","section":"publication","summary":"","tags":null,"title":"Quantifying and Enhancing Multi-modal Robustness with Modality Preference","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Dongzhan Zhou","Guangyao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e787cc7b340511ed0ad617eaf61af942","permalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","section":"publication","summary":"","tags":null,"title":"Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes","type":"publication"},{"authors":["Ruoxuan Feng","Wenke Xia","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"450f11c7cb976aa1013ed40cd3963388","permalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","section":"publication","summary":"","tags":null,"title":"Revisiting Pre-training in Audio-Visual Learning","type":"publication"},{"authors":["Wenke Xia","Xingjian Li","Andong Deng","Haoyi Xiong","Dejing Dou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d68814ab18c4fd432535b2592c31988","permalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","section":"publication","summary":"","tags":null,"title":"Robust Cross-modal Knowledge Distillation for Unconstrained Videos","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Wanli Ouyang","Hang Zhou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"acd80d91071719018f44e8766871cb74","permalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","section":"publication","summary":"","tags":null,"title":"SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance","type":"publication"},{"authors":["Konrad Heidler","Lichao Mou","Di Hu","Pu Jin","Guangyao Li","Chuang Gan","Ji-Rong Wen","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"36c9fd21445495f69bad705471393094","permalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","section":"publication","summary":"","tags":null,"title":"Self-supervised Audiovisual Representation Learning for Remote Sensing Data","type":"publication"},{"authors":["Di Hu","Zheng Wang","Feiping Nie","Rong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac1ac86aa9c1772d446b7594a05d9100","permalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","section":"publication","summary":"","tags":null,"title":"Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis","type":"publication"},{"authors":["Dongzhan Zhou","Xinchi Zhou","Di Hu","Hang Zhou","Lei Bai","Ziwei Liu","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3f2c9d5779b3cec3c9b69a845335b218","permalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","section":"publication","summary":"","tags":null,"title":"SepFusion: Finding Optimal Fusion Structures for Visual Sound Separation","type":"publication"},{"authors":["Tao Wu","Xuewei Li","Zhongang Qi","Di Hu","Xintao Wang","Ying Shan","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"75b3553d0dff4fb43ea7284e9d6f8d1c","permalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","section":"publication","summary":"","tags":null,"title":"SphereDiffusion: Spherical Geometry-aware Distortion Resilient Diffusion Model","type":"publication"},{"authors":["Juncheng Ma","Peiwen Sun","Yaoting Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1fe41f212fd0141fdf179a000dd9df81","permalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","section":"publication","summary":"","tags":null,"title":"Stepping Stones: A Progressive Training Strategy for Audio-Visual Semantic Segmentation","type":"publication"},{"authors":["ZiYun Li","Jona Otholt","Ben Dai","Di Hu","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"50b53591fe6d761222acbe7d191d3e47","permalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","section":"publication","summary":"","tags":null,"title":"Supervised Knowledge May Hurt Novel Class Discovery Performance","type":"publication"},{"authors":["Di Hu","Xuelong Li","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6704b0eb55495bb979be6fcbb8243ae","permalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","section":"publication","summary":"","tags":null,"title":"Temporal Multimodal Learning in Audiovisual Speech Recognition","type":"publication"},{"authors":["Dong Wang","Di Hu","Xingjian Li","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"807bb234ac2724175550dbdf52f64d08","permalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","section":"publication","summary":"","tags":null,"title":"Temporal Relational Modeling with Self-Supervision for Action Segmentation","type":"publication"},{"authors":["Hongpeng Lin*","Ludan Ruan*","Wenke Xia*","Peiyu Liu","Jingyuan Wen","Yixin Xu","Di Hu","Ruihua Song","Wayne Xin Zhao","Qin Jin","Zhiwu Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"dd91d75ebb359650640b7b6c75634dff","permalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","section":"publication","summary":"","tags":null,"title":"TikTalk: A Video-Based Dialogue Dataset for Multi-Modal Chitchat in Real World","type":"publication"},{"authors":["Xingjian Li","Di Hu","Xuhong Li","Haoyi Xiong","Zhi Ye","Zhipeng Wang","Chengzhong Xu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"5b40a464bbfccb601c6d4c37e85cf81e","permalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","section":"publication","summary":"","tags":null,"title":"Towards Accurate Knowledge Transfer via Target-awareness Representation Disentanglement","type":"publication"},{"authors":["Andong Deng","Xingjian Li","Di Hu","Tianyang Wang","Haoyi Xiong","Chengzhong Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"113edd12d767a54c1fdd10685167cd5c","permalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","section":"publication","summary":"","tags":null,"title":"Towards Inadequately Pre-trained Models in Transfer Learning","type":"publication"},{"authors":["Wenxuan Hou*","Guangyao Li*","Yapeng Tian","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"56c2e256bf8d4a20cdffe034f430aaef","permalink":"/publication/towards-long-form-audio-visual-video-understanding/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-long-form-audio-visual-video-understanding/","section":"publication","summary":"","tags":null,"title":"Towards Long Form Audio-visual Video Understanding","type":"publication"},{"authors":["Zechen Bai","Zhigang Wang","Jian Wang","Di Hu","Errui Ding"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9905f139a565b4f5eabfc5902965f851","permalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","section":"publication","summary":"","tags":null,"title":"Unsupervised Multi-Source Domain Adaptation for Person Re-Identification","type":"publication"},{"authors":["Peiwen Sun","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ff959ec7e9a3da6203370e48a939fd1","permalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","section":"publication","summary":"","tags":null,"title":"Unveiling and Mitigating Bias in Audio Visual Segmentation (ACM MM Oral)","type":"publication"},{"authors":["Xian Liu","Rui Qian","Hang Zhou","Di Hu","Weiyao Lin","Ziwei Liu","Bolei Zhou","Xiaowei Zhou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ca462fd19e2017e2ecb2b26a145ef250","permalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","section":"publication","summary":"","tags":null,"title":"Visual Sound Localization in-the-Wild by Cross-Modal Interference Erasing","type":"publication"}]
\ No newline at end of file
+[{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"4e73f707a3c1da0c5d8d165361161c7b","permalink":"/authors/19_ruize/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/19_ruize/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ruize Xu","type":"authors"},{"authors":null,"categories":null,"content":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who are interested in my research domain are very welcome and do not hesitate to contact me actively. For more information, please visit his personal homepage. Valar Morghulis!\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"537de72d4cb178cea6fbf2b2a92ea589","permalink":"/authors/20_guangyao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_guangyao/","section":"authors","summary":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who","tags":null,"title":"Guangyao Li","type":"authors"},{"authors":null,"categories":null,"content":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"22debf3f166bda4bfb28c8317489f918","permalink":"/authors/20_xiaokang/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xiaokang/","section":"authors","summary":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.","tags":null,"title":"Xiaokang Peng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"55a49bcd8ae300a0362a45302ca97c26","permalink":"/authors/20_xuemin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xuemin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Xuemin Liu","type":"authors"},{"authors":null,"categories":null,"content":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"033ae9c233d8ca15172e0f0eb482735e","permalink":"/authors/20_yixin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_yixin/","section":"authors","summary":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.","tags":null,"title":"Yixin Xu","type":"authors"},{"authors":null,"categories":null,"content":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"9434b9dca31f1f23a676f2b869e0c881","permalink":"/authors/21_ruiqian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_ruiqian/","section":"authors","summary":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.","tags":null,"title":"Rui Qian","type":"authors"},{"authors":null,"categories":null,"content":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"57b1d4e29185f3870d53fc65c766173e","permalink":"/authors/21_yake/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_yake/","section":"authors","summary":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.","tags":null,"title":"Yake Wei","type":"authors"},{"authors":null,"categories":null,"content":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c95476ad24cc214056b3d2c5e8c90f17","permalink":"/authors/22_andong/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_andong/","section":"authors","summary":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.","tags":null,"title":"Andong Deng","type":"authors"},{"authors":null,"categories":null,"content":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a2791369e75b13b52139d9860293bdd5","permalink":"/authors/22_wenke/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenke/","section":"authors","summary":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.","tags":null,"title":"Wenke Xia","type":"authors"},{"authors":null,"categories":null,"content":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"cd37724dba9b446f1c1307e40cd45632","permalink":"/authors/22_wenxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenxuan/","section":"authors","summary":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.","tags":null,"title":"Wenxuan Hou","type":"authors"},{"authors":null,"categories":null,"content":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a389590984a0c3fb50de499f8df2d4c0","permalink":"/authors/22_xincheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_xincheng/","section":"authors","summary":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.","tags":null,"title":"Xincheng Pang","type":"authors"},{"authors":null,"categories":null,"content":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"d884fc3eb1e2b2382def5073cec5e105","permalink":"/authors/22_zequn/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_zequn/","section":"authors","summary":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.","tags":null,"title":"Zequn Yang","type":"authors"},{"authors":null,"categories":null,"content":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"0f875044223f8afd458b089859ba38d8","permalink":"/authors/23_henghui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_henghui/","section":"authors","summary":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.","tags":null,"title":"Henghui Du","type":"authors"},{"authors":null,"categories":null,"content":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"8808a5aa1460c5cb4fad660d28f8520a","permalink":"/authors/23_jiahao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jiahao/","section":"authors","summary":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.","tags":null,"title":"Jiahao Li","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"1697602eb95e74d0fb1a9247c1f07489","permalink":"/authors/23_jianghan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jianghan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jianghan Chao","type":"authors"},{"authors":null,"categories":null,"content":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bdeafc1f9127d19078299ad17ddcf547","permalink":"/authors/23_jingxian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jingxian/","section":"authors","summary":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.","tags":null,"title":"Jingxian Lu","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"b1f3ebd7d0f58e6a501810a383c4a9ed","permalink":"/authors/23_jinlin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jinlin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jinlin Li","type":"authors"},{"authors":null,"categories":null,"content":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"874c09024781e4fd5375423eaef9c9e8","permalink":"/authors/23_juncheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_juncheng/","section":"authors","summary":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.","tags":null,"title":"Juncheng Ma","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"834fe556c30cd4180a6dc4c692fd63d9","permalink":"/authors/23_liangce/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_liangce/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ce Liang","type":"authors"},{"authors":null,"categories":null,"content":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bf84fe39ef0b614af0ae82d08359c784","permalink":"/authors/23_peiwen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_peiwen/","section":"authors","summary":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.","tags":null,"title":"Peiwen Sun","type":"authors"},{"authors":null,"categories":null,"content":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c29a63de0242659b43a43451fc077046","permalink":"/authors/23_ruoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_ruoxuan/","section":"authors","summary":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.","tags":null,"title":"Ruoxuan Feng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"364786f50ed04bbfb2309f8069cdbe90","permalink":"/authors/23_shaoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_shaoxuan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Shaoxuan Xu","type":"authors"},{"authors":null,"categories":null,"content":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"062e874f9d4216ee7c15e6afe41e1631","permalink":"/authors/23_siwei/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_siwei/","section":"authors","summary":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.","tags":null,"title":"Siwei Li","type":"authors"},{"authors":null,"categories":null,"content":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bda305ecfaa132f6e49d2dd2566d0f25","permalink":"/authors/23_yaoting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_yaoting/","section":"authors","summary":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.","tags":null,"title":"Yaoting Wang","type":"authors"},{"authors":null,"categories":null,"content":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"7a6ee1988cb2fa93bfeee88a094c7489","permalink":"/authors/24_jirui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_jirui/","section":"authors","summary":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.","tags":null,"title":"JiRui Huang","type":"authors"},{"authors":null,"categories":null,"content":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"186e15560cfa29bcd45c618efc625779","permalink":"/authors/24_yuchen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_yuchen/","section":"authors","summary":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.","tags":null,"title":"Yuchen Li","type":"authors"},{"authors":["dihu"],"categories":null,"content":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of cognitive neuroscience and has wrote one study note during his undergraduate. Inspired by what he learned from cognitive neuroscience, and what he observed and deliberated from the daily-life, he strongly convinced that the pervasive, free, natural multimodal messages can provide sufficient information for perceiving, learning and understanding environment, even the agent itself, which promisingly makes multimodal learning become one of the key to achieve machine intelligence.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"2525497d367e79493fd32b198b28f040","permalink":"/authors/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/admin/","section":"authors","summary":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of","tags":null,"title":"Di Hu","type":"authors"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"852b491b0dcadb44b8f099f931db74c4","permalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"A Two-Stage Framework for Multiple Sound-Source Localization","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"b21459d2cd2aa98d5a771a396df3c29e","permalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","section":"publication","summary":"","tags":null,"title":"Ambient Sound Helps: Audiovisual Crowd Counting in Extreme Conditions","type":"publication"},{"authors":["Wenke Xia*","Xu Zhao*","Xincheng Pang","Changqing Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"7a5ff9681de843469038165a230c4f87","permalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","section":"publication","summary":"","tags":null,"title":"Balanced Audiovisual Dataset for Imbalance Analysis","type":"publication"},{"authors":["Xiaokang Peng*","Yake Wei*","Andong Deng","Dong Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1cdda2159c4adeb4f31cb4e7f1a5ab8a","permalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","section":"publication","summary":"","tags":null,"title":"Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Yuanchao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e2d14df72502e78a30f83d09310b98b6","permalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","section":"publication","summary":"","tags":null,"title":"Can Textual Semantics Mitigate Sounding Object SegmentationPreference?","type":"publication"},{"authors":["Di Hu","Yake Wei","Rui Qian","Weiyao Lin","Ruihua Song","Ji-Rong Wen"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"91e67073102678aec9799732ceef49f3","permalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","section":"publication","summary":"","tags":null,"title":"Class-aware Sounding Objects Localization via Audiovisual Correspondence","type":"publication"},{"authors":["Yapeng Tian*","Di Hu*","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c0d82a52007e4e9ab50a2cfafdc4ac17","permalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","section":"publication","summary":"","tags":null,"title":"Co-Learn Sounding Object Visual Grounding and Visually Indicated Sound Separation in A Cycle","type":"publication"},{"authors":["Di Hu","Xuhong Li","Lichao Mou","Pu Jin","Dong Chen","Liping Jing","Xiaoxiang Zhu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c7688dd14aa743d0b927f94d97854f27","permalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","section":"publication","summary":"","tags":null,"title":"Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac02b15b850ff085e6c9ad497f3a130c","permalink":"/publication/curriculum-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/curriculum-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Curriculum Audiovisual Learning","type":"publication"},{"authors":["Yapeng Tian","Di Hu","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"cd0308a1bfb55705c394057955f2375d","permalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","section":"publication","summary":"","tags":null,"title":"Cyclic Co-Learning of Sounding Object Visual Grounding and Sound Separation","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"202776673a51788c119f1451c9e313c2","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"00f72a8fe1deeb265958a59b94c2cd33","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"f6c0a9a658cdceee78bd291860181d99","permalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","section":"publication","summary":"","tags":null,"title":"Deep Linear Discriminant Analysis Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d1466a6c42ba930502049d24243f8b62","permalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Deep Multimodal Clustering for Unsupervised Audiovisual Learning Representation","type":"publication"},{"authors":["Di Hu - Chengze Wang - Feiping Nie - Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9e4cd76d6b972d54b50c190779f639a5","permalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","section":"publication","summary":"","tags":null,"title":"Dense Multimodal Fusion for Hierarchically Joint Representation","type":"publication"},{"authors":["Xincheng Pang","Wenke Xia","Zhigang Wang","Bin Zhao","Di Hu","Dong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"832f5776c5daa77fa5df21ce843a3196","permalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","section":"publication","summary":"","tags":null,"title":"Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection","type":"publication"},{"authors":["Yake Wei","Siwei Li","Ruoxuan Feng","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"591c348a8e03f441318436eb005ae2cc","permalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","section":"publication","summary":"","tags":null,"title":"Diagnosing and Re-learning for Balanced Multimodal Learning","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"4b1e10b4327cca00dfd58162571a2f8c","permalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","section":"publication","summary":"","tags":null,"title":"Discrete Spectral Hashing for Efficient Similarity Retrieval","type":"publication"},{"authors":["Di Hu","Rui Qian","Minyue Jiang","Xiao Tan","Shilei Wen","Errui Ding","Weiyao Lin","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6953eeac03ee85322e85eece2eeeb84","permalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","section":"publication","summary":"","tags":null,"title":"Discriminative Sounding Objects Localization via Self-supervised Audiovisual Matching","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiaoxiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3016d01c7b86e792f8778f7aba6fc44d","permalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","section":"publication","summary":"","tags":null,"title":"Does Ambient Sound Help? - Audiovisual Crowd Counting","type":"publication"},{"authors":["Yake Wei","Ruoxuan Feng","Zihe Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"0278c6a7c52909fa5c55eaf522569e7f","permalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","section":"publication","summary":"","tags":null,"title":"Enhancing Multi-modal Cooperation via Fine-grained Modality Valuation","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Di Hu","Hang Zhou","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"28bd51450c42258842f48363910f83c8","permalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"Exploiting Visual Context Semantics for Sound Source Localization","type":"publication"},{"authors":["Sijia Yang","Haoyi Xiong","Di Hu","Kaibo Xu","Licheng Wang","Peizhen Zhu","Zeyi Sun"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ed52bf34eef1f16fc89a0fc5c32fa152","permalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","section":"publication","summary":"","tags":null,"title":"Generalising Combinatorial Discriminant Analysis through Conditioning Truncated Rayleigh Flow","type":"publication"},{"authors":["Zequn Yang","Han Zhang","Yake Wei","Zheng Wang","Feiping Nie","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"76c72a76e4cf8516d166a780e270c79b","permalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","section":"publication","summary":"","tags":null,"title":"Geometric-Inspired Graph-based Incomplete Multi-view Clustering","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8fe03bbbdab04c3ee4ecc7e01ecd723c","permalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Heterogeneous Scene Analysis via Self-supervised Audiovisual Learning","type":"publication"},{"authors":["Xuelong Li","Di Hu","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1850ab6a7473c571586aed28d796ac66","permalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","section":"publication","summary":"","tags":null,"title":"Image2song: Song Retrieval via Bridging Image Content and Lyric Words","type":"publication"},{"authors":["Wenke Xia","Dong Wang","Xincheng Pang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"82a334df3b6181644b600e4679ce595c","permalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","section":"publication","summary":"","tags":null,"title":"Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs","type":"publication"},{"authors":["Jingxian Lu","Wenke Xia","Dong Wang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"2cb0cd3b7dd67caebf2eae2ac616b156","permalink":"/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/","section":"publication","summary":"","tags":null,"title":"KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance","type":"publication"},{"authors":["Xuelong Li","Di Hu","Feiping Nie"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"24881bb5f959ea9f061fb67469d72eb9","permalink":"/publication/large-graph-hashing-with-spectral-rotation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/large-graph-hashing-with-spectral-rotation/","section":"publication","summary":"","tags":null,"title":"Large Graph Hashing with Spectral Rotation","type":"publication"},{"authors":["Yake Wei","Di Hu","Yapeng Tian","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"67b2f40c745acaa698a385e2742a25bc","permalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","section":"publication","summary":"","tags":null,"title":"Learning in Audio-visual Context: A Review, Analysis, and New Perspective","type":"publication"},{"authors":["Guangyao Li*","Yake Wei*","Yapeng Tian*","Chenliang Xu","Ji-Rong Wen","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"125a97cdaa82fb5a0ec455cfd53c1b46","permalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","section":"publication","summary":"","tags":null,"title":"Learning to Answer Questions in Dynamic Audio-Visual Scenarios","type":"publication"},{"authors":["Di Hu","Dong Wang","Xuelong Li","Feiping Nie","Qi Wang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c716bb52e5e46a2dbaebc46fda1517d6","permalink":"/publication/listen-to-the-image/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/listen-to-the-image/","section":"publication","summary":"","tags":null,"title":"Listen to the Image","type":"publication"},{"authors":["Ruize Xu","Ruoxuan Feng","Shi-xiong Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8e1ed6fc418000d90eed8231ce30fa73","permalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","section":"publication","summary":"","tags":null,"title":"MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning","type":"publication"},{"authors":["Yake Wei","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ad6411f0202e0562a67a75820ff098f","permalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","section":"publication","summary":"","tags":null,"title":"MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning","type":"publication"},{"authors":["Guangyao Li","Yixin Xu","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"38daed7d60d2831123ddca90ac47d9b7","permalink":"/publication/multi-scale-attention-for-audio-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multi-scale-attention-for-audio-question-answering/","section":"publication","summary":"","tags":null,"title":"Multi-Scale Attention for Audio Question Answering","type":"publication"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"88c9d48496c44a5980763aa946676e9e","permalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","section":"publication","summary":"","tags":null,"title":"Multiple Sound Sources Localization from Coarse to Fine","type":"publication"},{"authors":["Ziyun Li","Xinshao Wang","Haojin Yang","Di Hu","Neil M Robertson","David A Clifton","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"a48ea4ca10463e6ef980903ef312977d","permalink":"/publication/not-all-knowledge-is-created-equal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/not-all-knowledge-is-created-equal/","section":"publication","summary":"","tags":null,"title":"Not All Knowledge Is Created Equal","type":"publication"},{"authors":["Ruoxuan Feng","Di Hu","Wenke Ma","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"891cffdde1feb31f3dc52292231f2969","permalink":"/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/","section":"publication","summary":"","tags":null,"title":"Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation","type":"publication"},{"authors":["Guangyao Li","Wenxuan Hou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"545100c95da731d9faeb7037b5801449","permalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","section":"publication","summary":"","tags":null,"title":"Progressive Spatio-temporal Perception for Audio-Visual Question Answering","type":"publication"},{"authors":["Yaoting Wang*","Weisong Liu*","Guangyao Li","Jian Ding","Di Hu","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d414aab41857970b60155d360ceac88","permalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","section":"publication","summary":"","tags":null,"title":"Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer","type":"publication"},{"authors":["Zequn Yang","Yake Wei","Ce Liang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d069d78586930bf2dd726ae7c0b00c9b","permalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","section":"publication","summary":"","tags":null,"title":"Quantifying and Enhancing Multi-modal Robustness with Modality Preference","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Dongzhan Zhou","Guangyao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e787cc7b340511ed0ad617eaf61af942","permalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","section":"publication","summary":"","tags":null,"title":"Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes","type":"publication"},{"authors":["Ruoxuan Feng","Wenke Xia","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"450f11c7cb976aa1013ed40cd3963388","permalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","section":"publication","summary":"","tags":null,"title":"Revisiting Pre-training in Audio-Visual Learning","type":"publication"},{"authors":["Wenke Xia","Xingjian Li","Andong Deng","Haoyi Xiong","Dejing Dou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d68814ab18c4fd432535b2592c31988","permalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","section":"publication","summary":"","tags":null,"title":"Robust Cross-modal Knowledge Distillation for Unconstrained Videos","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Wanli Ouyang","Hang Zhou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"acd80d91071719018f44e8766871cb74","permalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","section":"publication","summary":"","tags":null,"title":"SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance","type":"publication"},{"authors":["Konrad Heidler","Lichao Mou","Di Hu","Pu Jin","Guangyao Li","Chuang Gan","Ji-Rong Wen","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"36c9fd21445495f69bad705471393094","permalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","section":"publication","summary":"","tags":null,"title":"Self-supervised Audiovisual Representation Learning for Remote Sensing Data","type":"publication"},{"authors":["Di Hu","Zheng Wang","Feiping Nie","Rong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac1ac86aa9c1772d446b7594a05d9100","permalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","section":"publication","summary":"","tags":null,"title":"Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis","type":"publication"},{"authors":["Dongzhan Zhou","Xinchi Zhou","Di Hu","Hang Zhou","Lei Bai","Ziwei Liu","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3f2c9d5779b3cec3c9b69a845335b218","permalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","section":"publication","summary":"","tags":null,"title":"SepFusion: Finding Optimal Fusion Structures for Visual Sound Separation","type":"publication"},{"authors":["Tao Wu","Xuewei Li","Zhongang Qi","Di Hu","Xintao Wang","Ying Shan","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"75b3553d0dff4fb43ea7284e9d6f8d1c","permalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","section":"publication","summary":"","tags":null,"title":"SphereDiffusion: Spherical Geometry-aware Distortion Resilient Diffusion Model","type":"publication"},{"authors":["Juncheng Ma","Peiwen Sun","Yaoting Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1fe41f212fd0141fdf179a000dd9df81","permalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","section":"publication","summary":"","tags":null,"title":"Stepping Stones: A Progressive Training Strategy for Audio-Visual Semantic Segmentation","type":"publication"},{"authors":["ZiYun Li","Jona Otholt","Ben Dai","Di Hu","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"50b53591fe6d761222acbe7d191d3e47","permalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","section":"publication","summary":"","tags":null,"title":"Supervised Knowledge May Hurt Novel Class Discovery Performance","type":"publication"},{"authors":["Di Hu","Xuelong Li","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6704b0eb55495bb979be6fcbb8243ae","permalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","section":"publication","summary":"","tags":null,"title":"Temporal Multimodal Learning in Audiovisual Speech Recognition","type":"publication"},{"authors":["Dong Wang","Di Hu","Xingjian Li","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"807bb234ac2724175550dbdf52f64d08","permalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","section":"publication","summary":"","tags":null,"title":"Temporal Relational Modeling with Self-Supervision for Action Segmentation","type":"publication"},{"authors":["Hongpeng Lin*","Ludan Ruan*","Wenke Xia*","Peiyu Liu","Jingyuan Wen","Yixin Xu","Di Hu","Ruihua Song","Wayne Xin Zhao","Qin Jin","Zhiwu Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"dd91d75ebb359650640b7b6c75634dff","permalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","section":"publication","summary":"","tags":null,"title":"TikTalk: A Video-Based Dialogue Dataset for Multi-Modal Chitchat in Real World","type":"publication"},{"authors":["Xingjian Li","Di Hu","Xuhong Li","Haoyi Xiong","Zhi Ye","Zhipeng Wang","Chengzhong Xu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"5b40a464bbfccb601c6d4c37e85cf81e","permalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","section":"publication","summary":"","tags":null,"title":"Towards Accurate Knowledge Transfer via Target-awareness Representation Disentanglement","type":"publication"},{"authors":["Andong Deng","Xingjian Li","Di Hu","Tianyang Wang","Haoyi Xiong","Chengzhong Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"113edd12d767a54c1fdd10685167cd5c","permalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","section":"publication","summary":"","tags":null,"title":"Towards Inadequately Pre-trained Models in Transfer Learning","type":"publication"},{"authors":["Wenxuan Hou*","Guangyao Li*","Yapeng Tian","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"56c2e256bf8d4a20cdffe034f430aaef","permalink":"/publication/towards-long-form-audio-visual-video-understanding/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-long-form-audio-visual-video-understanding/","section":"publication","summary":"","tags":null,"title":"Towards Long Form Audio-visual Video Understanding","type":"publication"},{"authors":["Zechen Bai","Zhigang Wang","Jian Wang","Di Hu","Errui Ding"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9905f139a565b4f5eabfc5902965f851","permalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","section":"publication","summary":"","tags":null,"title":"Unsupervised Multi-Source Domain Adaptation for Person Re-Identification","type":"publication"},{"authors":["Peiwen Sun","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ff959ec7e9a3da6203370e48a939fd1","permalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","section":"publication","summary":"","tags":null,"title":"Unveiling and Mitigating Bias in Audio Visual Segmentation (ACM MM Oral)","type":"publication"},{"authors":["Xian Liu","Rui Qian","Hang Zhou","Di Hu","Weiyao Lin","Ziwei Liu","Bolei Zhou","Xiaowei Zhou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ca462fd19e2017e2ecb2b26a145ef250","permalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","section":"publication","summary":"","tags":null,"title":"Visual Sound Localization in-the-Wild by Cross-Modal Interference Erasing","type":"publication"}]
\ No newline at end of file
diff --git a/docs/index.xml b/docs/index.xml
index b84a4267..33b5787e 100755
--- a/docs/index.xml
+++ b/docs/index.xml
@@ -228,6 +228,14 @@
+
+ KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance
+ /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/
+ Mon, 01 Jan 0001 00:00:00 +0000
+ /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/
+
+
+
Large Graph Hashing with Spectral Rotation
/publication/large-graph-hashing-with-spectral-rotation/
@@ -300,6 +308,14 @@
+
+ Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation
+ /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/
+ Mon, 01 Jan 0001 00:00:00 +0000
+ /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/
+
+
+
Progressive Spatio-temporal Perception for Audio-Visual Question Answering
/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/
diff --git a/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html b/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html
index 6f5aed89..07bbdd46 100755
--- a/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html
+++ b/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html
@@ -154,7 +154,7 @@
"author": {
"@type": "Person",
- "name": "Wenke Xia"
+ "name": "Wenke Xia*"
},
"publisher": {
diff --git a/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html b/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html
index bc80c9b9..b4d02177 100755
--- a/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html
+++ b/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html
@@ -154,7 +154,7 @@
"author": {
"@type": "Person",
- "name": "Yapeng Tian*"
+ "name": "Yapeng Tian"
},
"publisher": {
diff --git a/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html b/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html
index 8be06258..9286afb7 100755
--- a/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html
+++ b/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html
@@ -154,7 +154,7 @@
"author": {
"@type": "Person",
- "name": "Yapeng Tian*"
+ "name": "Yapeng Tian"
},
"publisher": {
diff --git a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/cite.bib b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/cite.bib
new file mode 100644
index 00000000..b95f5fdb
--- /dev/null
+++ b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/cite.bib
@@ -0,0 +1,6 @@
+@article{pang2024depth,
+ title={Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection},
+ author={Pang, Xincheng and Xia, Wenke and Wang, Zhigang and Zhao, Bin and Hu, Di and Wang, Dong and Li, Xuelong},
+ journal={arXiv preprint arXiv:2408.05107},
+ year={2024}
+}
\ No newline at end of file
diff --git a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html
index fda94d04..cde64d4c 100644
--- a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html
+++ b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html
@@ -509,9 +509,71 @@
Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information I
+
+
+
+
+