diff --git a/content/authors/24_yufan/_index.md b/content/authors/24_yufan/_index.md deleted file mode 100755 index 9d47872e..00000000 --- a/content/authors/24_yufan/_index.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -name: Yufan Wu - -superuser: true - -role: Visiting Student, 2024 - - -# website_url: # 可以配置个人主页, 如果有的话 - -user_groups: -- Research Assistant ---- - -Yufan is a 2024 master's graduate from Zhejiang University. She is passionate about image generation, cross-modal generation and multimodal learning. \ No newline at end of file diff --git a/content/authors/24_yufan/avatar.png b/content/authors/24_yufan/avatar.png deleted file mode 100644 index e162df77..00000000 Binary files a/content/authors/24_yufan/avatar.png and /dev/null differ diff --git a/content/publication/Can Textual Semantics Mitigate Sounding Object SegmentationPreference?/featured.jpg b/content/publication/Can Textual Semantics Mitigate Sounding Object SegmentationPreference?/featured.jpg new file mode 100644 index 00000000..0e252023 Binary files /dev/null and b/content/publication/Can Textual Semantics Mitigate Sounding Object SegmentationPreference?/featured.jpg differ diff --git a/content/publication/Can Textual Semantics Mitigate Sounding Object SegmentationPreference?/index.md b/content/publication/Can Textual Semantics Mitigate Sounding Object SegmentationPreference?/index.md new file mode 100755 index 00000000..d970c2b0 --- /dev/null +++ b/content/publication/Can Textual Semantics Mitigate Sounding Object SegmentationPreference?/index.md @@ -0,0 +1,17 @@ +--- +title: "Can Textual Semantics Mitigate Sounding Object SegmentationPreference?" +authors: + - Yaoting Wang + - Peiwen Sun + - Yuanchao Li + - Honggang Zhang + - Di Hu +publication_types: ["1"] +publication: European Conference on Computer Vision(ECCV) 2024 +publication_types_name: Conference Paper +url_pdf: +url_code: +topic_types: ["3"] +# topic_types_name: topic_hash +rating : 2024_06_30 +--- diff --git a/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/featured.jpg b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/featured.jpg new file mode 100644 index 00000000..1d62c3af Binary files /dev/null and b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/featured.jpg differ diff --git a/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/index.md b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/index.md new file mode 100755 index 00000000..a2bc34b6 --- /dev/null +++ b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/index.md @@ -0,0 +1,19 @@ +--- +title: "Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection" +authors: + - Xincheng Pang + - Wenke Xia + - Zhigang Wang + - Bin Zhao + - Di Hu + - Dong Wang + - Xuelong Li +publication_types: ["1"] +publication: The 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems(IROS) 2024 +publication_types_name: Conference Paper +url_pdf: +url_code: +topic_types: ["3"] +# topic_types_name: topic_hash +rating : 2024_06_28 +--- diff --git a/content/publication/Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes/featured.jpg b/content/publication/Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes/featured.jpg new file mode 100644 index 00000000..aacd0e03 Binary files /dev/null and b/content/publication/Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes/featured.jpg differ diff --git a/content/publication/Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes/index.md b/content/publication/Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes/index.md new file mode 100755 index 00000000..f6db8c17 --- /dev/null +++ b/content/publication/Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes/index.md @@ -0,0 +1,18 @@ +--- +title: "Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes" +authors: + - Yaoting Wang + - Peiwen Sun + - Dongzhan Zhou + - Guangyao Li + - Honggang Zhang + - Di Hu +publication_types: ["1"] +publication: European Conference on Computer Vision(ECCV) 2024 +publication_types_name: Conference Paper +url_pdf: +url_code: +topic_types: ["3"] +# topic_types_name: topic_hash +rating : 2024_06_30 +--- diff --git a/docs/authors/bin-zhao/index.html b/docs/authors/bin-zhao/index.html index 2b6f9a44..bdae136c 100644 --- a/docs/authors/bin-zhao/index.html +++ b/docs/authors/bin-zhao/index.html @@ -386,6 +386,10 @@

Bin Zhao

Latest

diff --git a/docs/authors/page/11/index.html b/docs/authors/page/11/index.html index b7c88e45..f20d69d4 100644 --- a/docs/authors/page/11/index.html +++ b/docs/authors/page/11/index.html @@ -406,6 +406,9 @@

Authors

+
  • Xiaoxiang Zhu
  • + +
  • Xincheng Pang
  • @@ -432,9 +435,6 @@

    Authors

  • Xuhong Li
  • - -
  • Yake Wei*
  • - diff --git a/docs/authors/page/12/index.html b/docs/authors/page/12/index.html index b9adbbbe..a9e3117b 100644 --- a/docs/authors/page/12/index.html +++ b/docs/authors/page/12/index.html @@ -406,10 +406,13 @@

    Authors

    -
  • Yaoting Wang*
  • +
  • Yake Wei
  • -
  • Yapeng Tian*
  • +
  • Yaoting Wang
  • + + +
  • Yapeng Tian
  • Ying Shan
  • @@ -418,6 +421,9 @@

    Authors

  • Yixin Xu
  • +
  • Yuanchao Li
  • + +
  • Yuansheng Hua
  • @@ -429,12 +435,6 @@

    Authors

  • Zeyi Sun
  • - -
  • Zheng Wang
  • - - -
  • Zhi Ye
  • - diff --git a/docs/authors/page/13/index.html b/docs/authors/page/13/index.html index 93a9f9e4..e9fc199f 100644 --- a/docs/authors/page/13/index.html +++ b/docs/authors/page/13/index.html @@ -406,6 +406,12 @@

    Authors

    +
  • Zheng Wang
  • + + +
  • Zhi Ye
  • + +
  • Zhigang Wang
  • diff --git a/docs/authors/page/3/index.html b/docs/authors/page/3/index.html index aa3fbce7..18329de8 100755 --- a/docs/authors/page/3/index.html +++ b/docs/authors/page/3/index.html @@ -421,9 +421,6 @@

    Authors

  • JiRui Huang
  • -
  • Yufan Wu
  • - -
  • Di Hu
  • @@ -435,6 +432,9 @@

    Authors

  • Bin Zhao
  • + +
  • Bolei Zhou
  • + diff --git a/docs/authors/page/4/index.html b/docs/authors/page/4/index.html index 9228e920..c6e4c941 100755 --- a/docs/authors/page/4/index.html +++ b/docs/authors/page/4/index.html @@ -406,9 +406,6 @@

    Authors

    -
  • Bolei Zhou
  • - -
  • Ce Liang
  • @@ -435,6 +432,9 @@

    Authors

  • Di Hu
  • + +
  • Di Hu - Chengze Wang - Feiping Nie - Xuelong Li
  • + diff --git a/docs/authors/page/5/index.html b/docs/authors/page/5/index.html index d228974f..db99deac 100755 --- a/docs/authors/page/5/index.html +++ b/docs/authors/page/5/index.html @@ -406,9 +406,6 @@

    Authors

    -
  • Di Hu - Chengze Wang - Feiping Nie - Xuelong Li
  • - -
  • Dong Chen
  • @@ -424,7 +421,7 @@

    Authors

  • Feiping Nie
  • -
  • Guangyao Li*
  • +
  • Guangyao Li
  • Han Zhang
  • @@ -435,6 +432,9 @@

    Authors

  • Haojin Yang
  • + +
  • Haoyi Xiong
  • + diff --git a/docs/authors/page/6/index.html b/docs/authors/page/6/index.html index 8677790e..14fb5896 100755 --- a/docs/authors/page/6/index.html +++ b/docs/authors/page/6/index.html @@ -406,10 +406,10 @@

    Authors

    -
  • Haoyi Xiong
  • +
  • Heinrich Dinkel
  • -
  • Heinrich Dinkel
  • +
  • Honggang Zhang
  • Hongpeng Lin*
  • diff --git a/docs/authors/page/8/index.html b/docs/authors/page/8/index.html index 73c90b99..2ff920e4 100755 --- a/docs/authors/page/8/index.html +++ b/docs/authors/page/8/index.html @@ -406,6 +406,9 @@

    Authors

    +
  • Peiwen Sun
  • + +
  • Peiyu Liu
  • @@ -432,9 +435,6 @@

    Authors

  • Ruihua Song
  • - -
  • Ruize Xu
  • - diff --git a/docs/authors/page/9/index.html b/docs/authors/page/9/index.html index c49ddfe9..ac2b76fc 100755 --- a/docs/authors/page/9/index.html +++ b/docs/authors/page/9/index.html @@ -406,6 +406,9 @@

    Authors

    +
  • Ruize Xu
  • + +
  • Ruoxuan Feng
  • @@ -432,9 +435,6 @@

    Authors

  • Weisong Liu*
  • - -
  • Weiyao Lin
  • - diff --git a/docs/authors/peiwen-sun/index.html b/docs/authors/peiwen-sun/index.html new file mode 100644 index 00000000..528f59e7 --- /dev/null +++ b/docs/authors/peiwen-sun/index.html @@ -0,0 +1,520 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + + + +
    +

    Peiwen Sun

    +
    + + +
    +
    + + + + + + +
    +

    Latest

    + +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + +
    + + + + + + + diff --git a/docs/authors/peiwen-sun/index.xml b/docs/authors/peiwen-sun/index.xml new file mode 100644 index 00000000..6fbdbeaf --- /dev/null +++ b/docs/authors/peiwen-sun/index.xml @@ -0,0 +1,32 @@ + + + + Peiwen Sun | GeWu-Lab + /authors/peiwen-sun/ + + Peiwen Sun + Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab + + /img/logo.png + Peiwen Sun + /authors/peiwen-sun/ + + + + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + + + + Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + + + + + diff --git a/docs/authors/wenke-xia/index.html b/docs/authors/wenke-xia/index.html index d773ac1e..30abbe2d 100755 --- a/docs/authors/wenke-xia/index.html +++ b/docs/authors/wenke-xia/index.html @@ -115,7 +115,7 @@ - + @@ -370,7 +370,7 @@

    Search

    -

    Wenke Xia*

    +

    Wenke Xia

    @@ -390,6 +390,10 @@

    Latest

    Balanced Audiovisual Dataset for Imbalance Analysis +
  • + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection +
  • +
  • Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs
  • diff --git a/docs/authors/wenke-xia/index.xml b/docs/authors/wenke-xia/index.xml index 0ff987e0..5a8bdb20 100755 --- a/docs/authors/wenke-xia/index.xml +++ b/docs/authors/wenke-xia/index.xml @@ -1,14 +1,14 @@ - Wenke Xia* | GeWu-Lab + Wenke Xia | GeWu-Lab /authors/wenke-xia/ - Wenke Xia* + Wenke Xia Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab /img/logo.png - Wenke Xia* + Wenke Xia /authors/wenke-xia/ @@ -20,6 +20,14 @@ + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs /publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/ diff --git a/docs/authors/xincheng-pang/index.html b/docs/authors/xincheng-pang/index.html index 2e61bf9f..a808b69a 100755 --- a/docs/authors/xincheng-pang/index.html +++ b/docs/authors/xincheng-pang/index.html @@ -390,6 +390,10 @@

    Latest

    Balanced Audiovisual Dataset for Imbalance Analysis +
  • + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection +
  • +
  • Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs
  • diff --git a/docs/authors/xincheng-pang/index.xml b/docs/authors/xincheng-pang/index.xml index 91711c92..612fd697 100755 --- a/docs/authors/xincheng-pang/index.xml +++ b/docs/authors/xincheng-pang/index.xml @@ -20,6 +20,14 @@
    + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs /publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/ diff --git a/docs/authors/xuelong-li/index.html b/docs/authors/xuelong-li/index.html index 0e08227d..bbb9e81d 100755 --- a/docs/authors/xuelong-li/index.html +++ b/docs/authors/xuelong-li/index.html @@ -402,6 +402,10 @@

    Latest

    Deep Multimodal Clustering for Unsupervised Audiovisual Learning Representation +
  • + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection +
  • +
  • Discrete Spectral Hashing for Efficient Similarity Retrieval
  • diff --git a/docs/authors/xuelong-li/index.xml b/docs/authors/xuelong-li/index.xml index 08f8591d..cf1f3b77 100755 --- a/docs/authors/xuelong-li/index.xml +++ b/docs/authors/xuelong-li/index.xml @@ -44,6 +44,14 @@
    + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Discrete Spectral Hashing for Efficient Similarity Retrieval /publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/ diff --git a/docs/authors/yake-wei/index.html b/docs/authors/yake-wei/index.html index e5c26626..2c7a24b6 100755 --- a/docs/authors/yake-wei/index.html +++ b/docs/authors/yake-wei/index.html @@ -115,7 +115,7 @@ - + @@ -370,7 +370,7 @@

    Search

    -

    Yake Wei*

    +

    Yake Wei

    diff --git a/docs/authors/yake-wei/index.xml b/docs/authors/yake-wei/index.xml index d91a20a8..da2527f7 100755 --- a/docs/authors/yake-wei/index.xml +++ b/docs/authors/yake-wei/index.xml @@ -1,14 +1,14 @@ - Yake Wei* | GeWu-Lab + Yake Wei | GeWu-Lab /authors/yake-wei/ - Yake Wei* + Yake Wei Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab /img/logo.png - Yake Wei* + Yake Wei /authors/yake-wei/ diff --git a/docs/authors/yaoting-wang/index.html b/docs/authors/yaoting-wang/index.html index bb73e519..47224d94 100644 --- a/docs/authors/yaoting-wang/index.html +++ b/docs/authors/yaoting-wang/index.html @@ -115,7 +115,7 @@ - + @@ -370,7 +370,7 @@

    Search

    -

    Yaoting Wang*

    +

    Yaoting Wang

    @@ -386,10 +386,18 @@

    Yaoting Wang*

    Latest

    diff --git a/docs/authors/yaoting-wang/index.xml b/docs/authors/yaoting-wang/index.xml index 6e7eb454..669c5fcc 100644 --- a/docs/authors/yaoting-wang/index.xml +++ b/docs/authors/yaoting-wang/index.xml @@ -1,17 +1,25 @@ - Yaoting Wang* | GeWu-Lab + Yaoting Wang | GeWu-Lab /authors/yaoting-wang/ - Yaoting Wang* + Yaoting Wang Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab /img/logo.png - Yaoting Wang* + Yaoting Wang /authors/yaoting-wang/ + + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + + Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer /publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/ @@ -20,5 +28,13 @@ + + Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + + + diff --git a/docs/authors/yapeng-tian/index.html b/docs/authors/yapeng-tian/index.html index 93aaf475..6a55ebe2 100755 --- a/docs/authors/yapeng-tian/index.html +++ b/docs/authors/yapeng-tian/index.html @@ -115,7 +115,7 @@ - + @@ -370,7 +370,7 @@

    Search

    -

    Yapeng Tian*

    +

    Yapeng Tian

    diff --git a/docs/authors/yapeng-tian/index.xml b/docs/authors/yapeng-tian/index.xml index 7c92e1b7..451a1056 100755 --- a/docs/authors/yapeng-tian/index.xml +++ b/docs/authors/yapeng-tian/index.xml @@ -1,14 +1,14 @@ - Yapeng Tian* | GeWu-Lab + Yapeng Tian | GeWu-Lab /authors/yapeng-tian/ - Yapeng Tian* + Yapeng Tian Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab /img/logo.png - Yapeng Tian* + Yapeng Tian /authors/yapeng-tian/ diff --git a/docs/authors/yuanchao-li/index.html b/docs/authors/yuanchao-li/index.html new file mode 100644 index 00000000..2a81fad5 --- /dev/null +++ b/docs/authors/yuanchao-li/index.html @@ -0,0 +1,516 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + + + +
    +

    Yuanchao Li

    +
    + + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +

    + copyright © 2024 GeWu-Lab +
    + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

    +
    + +
    + + + + + + + diff --git a/docs/authors/yuanchao-li/index.xml b/docs/authors/yuanchao-li/index.xml new file mode 100644 index 00000000..d3f48e81 --- /dev/null +++ b/docs/authors/yuanchao-li/index.xml @@ -0,0 +1,24 @@ + + + + Yuanchao Li | GeWu-Lab + /authors/yuanchao-li/ + + Yuanchao Li + Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab + + /img/logo.png + Yuanchao Li + /authors/yuanchao-li/ + + + + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + + + + diff --git a/docs/authors/zhigang-wang/index.html b/docs/authors/zhigang-wang/index.html index 361cf543..56cdc96b 100755 --- a/docs/authors/zhigang-wang/index.html +++ b/docs/authors/zhigang-wang/index.html @@ -386,6 +386,10 @@

    Zhigang Wang

    Latest

      +
    • + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection +
    • +
    • Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs
    • diff --git a/docs/authors/zhigang-wang/index.xml b/docs/authors/zhigang-wang/index.xml index 0121255a..0a2b2360 100755 --- a/docs/authors/zhigang-wang/index.xml +++ b/docs/authors/zhigang-wang/index.xml @@ -12,6 +12,14 @@ /authors/zhigang-wang/ + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs /publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/ diff --git a/docs/index.json b/docs/index.json index 9752f59b..b51dbc65 100755 --- a/docs/index.json +++ b/docs/index.json @@ -1 +1 @@ -[{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"4e73f707a3c1da0c5d8d165361161c7b","permalink":"/authors/19_ruize/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/19_ruize/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ruize Xu","type":"authors"},{"authors":null,"categories":null,"content":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who are interested in my research domain are very welcome and do not hesitate to contact me actively. For more information, please visit his personal homepage. Valar Morghulis!\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"537de72d4cb178cea6fbf2b2a92ea589","permalink":"/authors/20_guangyao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_guangyao/","section":"authors","summary":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who","tags":null,"title":"Guangyao Li","type":"authors"},{"authors":null,"categories":null,"content":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"22debf3f166bda4bfb28c8317489f918","permalink":"/authors/20_xiaokang/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xiaokang/","section":"authors","summary":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.","tags":null,"title":"Xiaokang Peng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"55a49bcd8ae300a0362a45302ca97c26","permalink":"/authors/20_xuemin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xuemin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Xuemin Liu","type":"authors"},{"authors":null,"categories":null,"content":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"033ae9c233d8ca15172e0f0eb482735e","permalink":"/authors/20_yixin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_yixin/","section":"authors","summary":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.","tags":null,"title":"Yixin Xu","type":"authors"},{"authors":null,"categories":null,"content":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"9434b9dca31f1f23a676f2b869e0c881","permalink":"/authors/21_ruiqian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_ruiqian/","section":"authors","summary":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.","tags":null,"title":"Rui Qian","type":"authors"},{"authors":null,"categories":null,"content":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"57b1d4e29185f3870d53fc65c766173e","permalink":"/authors/21_yake/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_yake/","section":"authors","summary":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.","tags":null,"title":"Yake Wei","type":"authors"},{"authors":null,"categories":null,"content":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c95476ad24cc214056b3d2c5e8c90f17","permalink":"/authors/22_andong/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_andong/","section":"authors","summary":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.","tags":null,"title":"Andong Deng","type":"authors"},{"authors":null,"categories":null,"content":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a2791369e75b13b52139d9860293bdd5","permalink":"/authors/22_wenke/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenke/","section":"authors","summary":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.","tags":null,"title":"Wenke Xia","type":"authors"},{"authors":null,"categories":null,"content":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"cd37724dba9b446f1c1307e40cd45632","permalink":"/authors/22_wenxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenxuan/","section":"authors","summary":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.","tags":null,"title":"Wenxuan Hou","type":"authors"},{"authors":null,"categories":null,"content":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a389590984a0c3fb50de499f8df2d4c0","permalink":"/authors/22_xincheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_xincheng/","section":"authors","summary":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.","tags":null,"title":"Xincheng Pang","type":"authors"},{"authors":null,"categories":null,"content":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"d884fc3eb1e2b2382def5073cec5e105","permalink":"/authors/22_zequn/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_zequn/","section":"authors","summary":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.","tags":null,"title":"Zequn Yang","type":"authors"},{"authors":null,"categories":null,"content":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"0f875044223f8afd458b089859ba38d8","permalink":"/authors/23_henghui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_henghui/","section":"authors","summary":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.","tags":null,"title":"Henghui Du","type":"authors"},{"authors":null,"categories":null,"content":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"8808a5aa1460c5cb4fad660d28f8520a","permalink":"/authors/23_jiahao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jiahao/","section":"authors","summary":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.","tags":null,"title":"Jiahao Li","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"1697602eb95e74d0fb1a9247c1f07489","permalink":"/authors/23_jianghan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jianghan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jianghan Chao","type":"authors"},{"authors":null,"categories":null,"content":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bdeafc1f9127d19078299ad17ddcf547","permalink":"/authors/23_jingxian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jingxian/","section":"authors","summary":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.","tags":null,"title":"Jingxian Lu","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"b1f3ebd7d0f58e6a501810a383c4a9ed","permalink":"/authors/23_jinlin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jinlin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jinlin Li","type":"authors"},{"authors":null,"categories":null,"content":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"874c09024781e4fd5375423eaef9c9e8","permalink":"/authors/23_juncheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_juncheng/","section":"authors","summary":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.","tags":null,"title":"Juncheng Ma","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"834fe556c30cd4180a6dc4c692fd63d9","permalink":"/authors/23_liangce/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_liangce/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ce Liang","type":"authors"},{"authors":null,"categories":null,"content":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bf84fe39ef0b614af0ae82d08359c784","permalink":"/authors/23_peiwen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_peiwen/","section":"authors","summary":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.","tags":null,"title":"Peiwen Sun","type":"authors"},{"authors":null,"categories":null,"content":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c29a63de0242659b43a43451fc077046","permalink":"/authors/23_ruoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_ruoxuan/","section":"authors","summary":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.","tags":null,"title":"Ruoxuan Feng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"364786f50ed04bbfb2309f8069cdbe90","permalink":"/authors/23_shaoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_shaoxuan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Shaoxuan Xu","type":"authors"},{"authors":null,"categories":null,"content":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"062e874f9d4216ee7c15e6afe41e1631","permalink":"/authors/23_siwei/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_siwei/","section":"authors","summary":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.","tags":null,"title":"Siwei Li","type":"authors"},{"authors":null,"categories":null,"content":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bda305ecfaa132f6e49d2dd2566d0f25","permalink":"/authors/23_yaoting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_yaoting/","section":"authors","summary":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.","tags":null,"title":"Yaoting Wang","type":"authors"},{"authors":null,"categories":null,"content":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"7a6ee1988cb2fa93bfeee88a094c7489","permalink":"/authors/24_jirui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_jirui/","section":"authors","summary":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.","tags":null,"title":"JiRui Huang","type":"authors"},{"authors":null,"categories":null,"content":"Yufan is a 2024 master\u0026rsquo;s graduate from Zhejiang University. She is passionate about image generation, cross-modal generation and multimodal learning.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c14f420ee1c389fe8957ee3db85085ee","permalink":"/authors/24_yufan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_yufan/","section":"authors","summary":"Yufan is a 2024 master\u0026rsquo;s graduate from Zhejiang University. She is passionate about image generation, cross-modal generation and multimodal learning.","tags":null,"title":"Yufan Wu","type":"authors"},{"authors":["dihu"],"categories":null,"content":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of cognitive neuroscience and has wrote one study note during his undergraduate. Inspired by what he learned from cognitive neuroscience, and what he observed and deliberated from the daily-life, he strongly convinced that the pervasive, free, natural multimodal messages can provide sufficient information for perceiving, learning and understanding environment, even the agent itself, which promisingly makes multimodal learning become one of the key to achieve machine intelligence.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"2525497d367e79493fd32b198b28f040","permalink":"/authors/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/admin/","section":"authors","summary":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of","tags":null,"title":"Di Hu","type":"authors"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"852b491b0dcadb44b8f099f931db74c4","permalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"A Two-Stage Framework for Multiple Sound-Source Localization","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"b21459d2cd2aa98d5a771a396df3c29e","permalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","section":"publication","summary":"","tags":null,"title":"Ambient Sound Helps: Audiovisual Crowd Counting in Extreme Conditions","type":"publication"},{"authors":["Wenke Xia*","Xu Zhao*","Xincheng Pang","Changqing Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"7a5ff9681de843469038165a230c4f87","permalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","section":"publication","summary":"","tags":null,"title":"Balanced Audiovisual Dataset for Imbalance Analysis","type":"publication"},{"authors":["Xiaokang Peng*","Yake Wei*","Andong Deng","Dong Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1cdda2159c4adeb4f31cb4e7f1a5ab8a","permalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","section":"publication","summary":"","tags":null,"title":"Balanced Multimodal Learning via On-the-fly Gradient Modulation","type":"publication"},{"authors":["Di Hu","Yake Wei","Rui Qian","Weiyao Lin","Ruihua Song","Ji-Rong Wen"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"91e67073102678aec9799732ceef49f3","permalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","section":"publication","summary":"","tags":null,"title":"Class-aware Sounding Objects Localization via Audiovisual Correspondence","type":"publication"},{"authors":["Yapeng Tian*","Di Hu*","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c0d82a52007e4e9ab50a2cfafdc4ac17","permalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","section":"publication","summary":"","tags":null,"title":"Co-Learn Sounding Object Visual Grounding and Visually Indicated Sound Separation in A Cycle","type":"publication"},{"authors":["Di Hu","Xuhong Li","Lichao Mou","Pu Jin","Dong Chen","Liping Jing","Xiaoxiang Zhu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c7688dd14aa743d0b927f94d97854f27","permalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","section":"publication","summary":"","tags":null,"title":"Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac02b15b850ff085e6c9ad497f3a130c","permalink":"/publication/curriculum-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/curriculum-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Curriculum Audiovisual Learning","type":"publication"},{"authors":["Yapeng Tian","Di Hu","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"cd0308a1bfb55705c394057955f2375d","permalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","section":"publication","summary":"","tags":null,"title":"Cyclic Co-Learning of Sounding Object Visual Grounding and Sound Separation","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"202776673a51788c119f1451c9e313c2","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"00f72a8fe1deeb265958a59b94c2cd33","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"f6c0a9a658cdceee78bd291860181d99","permalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","section":"publication","summary":"","tags":null,"title":"Deep Linear Discriminant Analysis Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d1466a6c42ba930502049d24243f8b62","permalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Deep Multimodal Clustering for Unsupervised Audiovisual Learning Representation","type":"publication"},{"authors":["Di Hu - Chengze Wang - Feiping Nie - Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9e4cd76d6b972d54b50c190779f639a5","permalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","section":"publication","summary":"","tags":null,"title":"Dense Multimodal Fusion for Hierarchically Joint Representation","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"4b1e10b4327cca00dfd58162571a2f8c","permalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","section":"publication","summary":"","tags":null,"title":"Discrete Spectral Hashing for Efficient Similarity Retrieval","type":"publication"},{"authors":["Di Hu","Rui Qian","Minyue Jiang","Xiao Tan","Shilei Wen","Errui Ding","Weiyao Lin","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6953eeac03ee85322e85eece2eeeb84","permalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","section":"publication","summary":"","tags":null,"title":"Discriminative Sounding Objects Localization via Self-supervised Audiovisual Matching","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiaoxiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3016d01c7b86e792f8778f7aba6fc44d","permalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","section":"publication","summary":"","tags":null,"title":"Does Ambient Sound Help? - Audiovisual Crowd Counting","type":"publication"},{"authors":["Yake Wei","Ruoxuan Feng","Zihe Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"0278c6a7c52909fa5c55eaf522569e7f","permalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","section":"publication","summary":"","tags":null,"title":"Enhancing Multi-modal Cooperation via Fine-grained Modality Valuation","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Di Hu","Hang Zhou","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"28bd51450c42258842f48363910f83c8","permalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"Exploiting Visual Context Semantics for Sound Source Localization","type":"publication"},{"authors":["Sijia Yang","Haoyi Xiong","Di Hu","Kaibo Xu","Licheng Wang","Peizhen Zhu","Zeyi Sun"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ed52bf34eef1f16fc89a0fc5c32fa152","permalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","section":"publication","summary":"","tags":null,"title":"Generalising Combinatorial Discriminant Analysis through Conditioning Truncated Rayleigh Flow","type":"publication"},{"authors":["Zequn Yang","Han Zhang","Yake Wei","Zheng Wang","Feiping Nie","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"76c72a76e4cf8516d166a780e270c79b","permalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","section":"publication","summary":"","tags":null,"title":"Geometric-Inspired Graph-based Incomplete Multi-view Clustering","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8fe03bbbdab04c3ee4ecc7e01ecd723c","permalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Heterogeneous Scene Analysis via Self-supervised Audiovisual Learning","type":"publication"},{"authors":["Xuelong Li","Di Hu","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1850ab6a7473c571586aed28d796ac66","permalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","section":"publication","summary":"","tags":null,"title":"Image2song: Song Retrieval via Bridging Image Content and Lyric Words","type":"publication"},{"authors":["Wenke Xia","Dong Wang","Xincheng Pang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"82a334df3b6181644b600e4679ce595c","permalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","section":"publication","summary":"","tags":null,"title":"Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs","type":"publication"},{"authors":["Xuelong Li","Di Hu","Feiping Nie"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"24881bb5f959ea9f061fb67469d72eb9","permalink":"/publication/large-graph-hashing-with-spectral-rotation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/large-graph-hashing-with-spectral-rotation/","section":"publication","summary":"","tags":null,"title":"Large Graph Hashing with Spectral Rotation","type":"publication"},{"authors":["Yake Wei","Di Hu","Yapeng Tian","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"67b2f40c745acaa698a385e2742a25bc","permalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","section":"publication","summary":"","tags":null,"title":"Learning in Audio-visual Context: A Review, Analysis, and New Perspective","type":"publication"},{"authors":["Guangyao Li*","Yake Wei*","Yapeng Tian*","Chenliang Xu","Ji-Rong Wen","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"125a97cdaa82fb5a0ec455cfd53c1b46","permalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","section":"publication","summary":"","tags":null,"title":"Learning to Answer Questions in Dynamic Audio-Visual Scenarios","type":"publication"},{"authors":["Di Hu","Dong Wang","Xuelong Li","Feiping Nie","Qi Wang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c716bb52e5e46a2dbaebc46fda1517d6","permalink":"/publication/listen-to-the-image/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/listen-to-the-image/","section":"publication","summary":"","tags":null,"title":"Listen to the Image","type":"publication"},{"authors":["Ruize Xu","Ruoxuan Feng","Shi-xiong Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8e1ed6fc418000d90eed8231ce30fa73","permalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","section":"publication","summary":"","tags":null,"title":"MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning","type":"publication"},{"authors":["Yake Wei","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ad6411f0202e0562a67a75820ff098f","permalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","section":"publication","summary":"","tags":null,"title":"MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning","type":"publication"},{"authors":["Guangyao Li","Yixin Xu","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"38daed7d60d2831123ddca90ac47d9b7","permalink":"/publication/multi-scale-attention-for-audio-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multi-scale-attention-for-audio-question-answering/","section":"publication","summary":"","tags":null,"title":"Multi-Scale Attention for Audio Question Answering","type":"publication"},{"authors":["Di Hu","Xiaoqiang Lu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"09affd8c2ded11a4005e40db4e1b960d","permalink":"/publication/multimodal-learning-via-exploring-deep-semantic-similarity/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multimodal-learning-via-exploring-deep-semantic-similarity/","section":"publication","summary":"","tags":null,"title":"Multimodal Learning via Exploring Deep Semantic Similarity","type":"publication"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"88c9d48496c44a5980763aa946676e9e","permalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","section":"publication","summary":"","tags":null,"title":"Multiple Sound Sources Localization from Coarse to Fine","type":"publication"},{"authors":["Ziyun Li","Xinshao Wang","Haojin Yang","Di Hu","Neil M Robertson","David A Clifton","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"a48ea4ca10463e6ef980903ef312977d","permalink":"/publication/not-all-knowledge-is-created-equal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/not-all-knowledge-is-created-equal/","section":"publication","summary":"","tags":null,"title":"Not All Knowledge Is Created Equal","type":"publication"},{"authors":["Guangyao Li","Wenxuan Hou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"545100c95da731d9faeb7037b5801449","permalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","section":"publication","summary":"","tags":null,"title":"Progressive Spatio-temporal Perception for Audio-Visual Question Answering","type":"publication"},{"authors":["Yaoting Wang*","Weisong Liu*","Guangyao Li","Jian Ding","Di Hu","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d414aab41857970b60155d360ceac88","permalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","section":"publication","summary":"","tags":null,"title":"Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer","type":"publication"},{"authors":["Zequn Yang","Yake Wei","Ce Liang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d069d78586930bf2dd726ae7c0b00c9b","permalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","section":"publication","summary":"","tags":null,"title":"Quantifying and Enhancing Multi-modal Robustness with Modality Preference","type":"publication"},{"authors":["Ruoxuan Feng","Wenke Xia","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"450f11c7cb976aa1013ed40cd3963388","permalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","section":"publication","summary":"","tags":null,"title":"Revisiting Pre-training in Audio-Visual Learning","type":"publication"},{"authors":["Wenke Xia","Xingjian Li","Andong Deng","Haoyi Xiong","Dejing Dou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d68814ab18c4fd432535b2592c31988","permalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","section":"publication","summary":"","tags":null,"title":"Robust Cross-modal Knowledge Distillation for Unconstrained Videos","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Wanli Ouyang","Hang Zhou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"acd80d91071719018f44e8766871cb74","permalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","section":"publication","summary":"","tags":null,"title":"SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance","type":"publication"},{"authors":["Konrad Heidler","Lichao Mou","Di Hu","Pu Jin","Guangyao Li","Chuang Gan","Ji-Rong Wen","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"36c9fd21445495f69bad705471393094","permalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","section":"publication","summary":"","tags":null,"title":"Self-supervised Audiovisual Representation Learning for Remote Sensing Data","type":"publication"},{"authors":["Di Hu","Zheng Wang","Feiping Nie","Rong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac1ac86aa9c1772d446b7594a05d9100","permalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","section":"publication","summary":"","tags":null,"title":"Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis","type":"publication"},{"authors":["Dongzhan Zhou","Xinchi Zhou","Di Hu","Hang Zhou","Lei Bai","Ziwei Liu","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3f2c9d5779b3cec3c9b69a845335b218","permalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","section":"publication","summary":"","tags":null,"title":"SepFusion: Finding Optimal Fusion Structures for Visual Sound Separation","type":"publication"},{"authors":["Tao Wu","Xuewei Li","Zhongang Qi","Di Hu","Xintao Wang","Ying Shan","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"75b3553d0dff4fb43ea7284e9d6f8d1c","permalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","section":"publication","summary":"","tags":null,"title":"SphereDiffusion: Spherical Geometry-aware Distortion Resilient Diffusion Model","type":"publication"},{"authors":["ZiYun Li","Jona Otholt","Ben Dai","Di Hu","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"50b53591fe6d761222acbe7d191d3e47","permalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","section":"publication","summary":"","tags":null,"title":"Supervised Knowledge May Hurt Novel Class Discovery Performance","type":"publication"},{"authors":["Di Hu","Xuelong Li","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6704b0eb55495bb979be6fcbb8243ae","permalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","section":"publication","summary":"","tags":null,"title":"Temporal Multimodal Learning in Audiovisual Speech Recognition","type":"publication"},{"authors":["Dong Wang","Di Hu","Xingjian Li","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"807bb234ac2724175550dbdf52f64d08","permalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","section":"publication","summary":"","tags":null,"title":"Temporal Relational Modeling with Self-Supervision for Action Segmentation","type":"publication"},{"authors":["Hongpeng Lin*","Ludan Ruan*","Wenke Xia*","Peiyu Liu","Jingyuan Wen","Yixin Xu","Di Hu","Ruihua Song","Wayne Xin Zhao","Qin Jin","Zhiwu Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"dd91d75ebb359650640b7b6c75634dff","permalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","section":"publication","summary":"","tags":null,"title":"TikTalk: A Video-Based Dialogue Dataset for Multi-Modal Chitchat in Real World","type":"publication"},{"authors":["Xingjian Li","Di Hu","Xuhong Li","Haoyi Xiong","Zhi Ye","Zhipeng Wang","Chengzhong Xu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"5b40a464bbfccb601c6d4c37e85cf81e","permalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","section":"publication","summary":"","tags":null,"title":"Towards Accurate Knowledge Transfer via Target-awareness Representation Disentanglement","type":"publication"},{"authors":["Andong Deng","Xingjian Li","Di Hu","Tianyang Wang","Haoyi Xiong","Chengzhong Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"113edd12d767a54c1fdd10685167cd5c","permalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","section":"publication","summary":"","tags":null,"title":"Towards Inadequately Pre-trained Models in Transfer Learning","type":"publication"},{"authors":["Wenxuan Hou*","Guangyao Li*","Yapeng Tian","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"56c2e256bf8d4a20cdffe034f430aaef","permalink":"/publication/towards-long-form-audio-visual-video-understanding/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-long-form-audio-visual-video-understanding/","section":"publication","summary":"","tags":null,"title":"Towards Long Form Audio-visual Video Understanding","type":"publication"},{"authors":["Zechen Bai","Zhigang Wang","Jian Wang","Di Hu","Errui Ding"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9905f139a565b4f5eabfc5902965f851","permalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","section":"publication","summary":"","tags":null,"title":"Unsupervised Multi-Source Domain Adaptation for Person Re-Identification","type":"publication"},{"authors":["Xian Liu","Rui Qian","Hang Zhou","Di Hu","Weiyao Lin","Ziwei Liu","Bolei Zhou","Xiaowei Zhou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ca462fd19e2017e2ecb2b26a145ef250","permalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","section":"publication","summary":"","tags":null,"title":"Visual Sound Localization in-the-Wild by Cross-Modal Interference Erasing","type":"publication"}] \ No newline at end of file +[{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"4e73f707a3c1da0c5d8d165361161c7b","permalink":"/authors/19_ruize/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/19_ruize/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ruize Xu","type":"authors"},{"authors":null,"categories":null,"content":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who are interested in my research domain are very welcome and do not hesitate to contact me actively. For more information, please visit his personal homepage. Valar Morghulis!\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"537de72d4cb178cea6fbf2b2a92ea589","permalink":"/authors/20_guangyao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_guangyao/","section":"authors","summary":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who","tags":null,"title":"Guangyao Li","type":"authors"},{"authors":null,"categories":null,"content":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"22debf3f166bda4bfb28c8317489f918","permalink":"/authors/20_xiaokang/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xiaokang/","section":"authors","summary":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.","tags":null,"title":"Xiaokang Peng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"55a49bcd8ae300a0362a45302ca97c26","permalink":"/authors/20_xuemin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xuemin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Xuemin Liu","type":"authors"},{"authors":null,"categories":null,"content":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"033ae9c233d8ca15172e0f0eb482735e","permalink":"/authors/20_yixin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_yixin/","section":"authors","summary":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.","tags":null,"title":"Yixin Xu","type":"authors"},{"authors":null,"categories":null,"content":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"9434b9dca31f1f23a676f2b869e0c881","permalink":"/authors/21_ruiqian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_ruiqian/","section":"authors","summary":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.","tags":null,"title":"Rui Qian","type":"authors"},{"authors":null,"categories":null,"content":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"57b1d4e29185f3870d53fc65c766173e","permalink":"/authors/21_yake/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_yake/","section":"authors","summary":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.","tags":null,"title":"Yake Wei","type":"authors"},{"authors":null,"categories":null,"content":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c95476ad24cc214056b3d2c5e8c90f17","permalink":"/authors/22_andong/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_andong/","section":"authors","summary":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.","tags":null,"title":"Andong Deng","type":"authors"},{"authors":null,"categories":null,"content":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a2791369e75b13b52139d9860293bdd5","permalink":"/authors/22_wenke/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenke/","section":"authors","summary":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.","tags":null,"title":"Wenke Xia","type":"authors"},{"authors":null,"categories":null,"content":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"cd37724dba9b446f1c1307e40cd45632","permalink":"/authors/22_wenxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenxuan/","section":"authors","summary":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.","tags":null,"title":"Wenxuan Hou","type":"authors"},{"authors":null,"categories":null,"content":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a389590984a0c3fb50de499f8df2d4c0","permalink":"/authors/22_xincheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_xincheng/","section":"authors","summary":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.","tags":null,"title":"Xincheng Pang","type":"authors"},{"authors":null,"categories":null,"content":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"d884fc3eb1e2b2382def5073cec5e105","permalink":"/authors/22_zequn/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_zequn/","section":"authors","summary":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.","tags":null,"title":"Zequn Yang","type":"authors"},{"authors":null,"categories":null,"content":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"0f875044223f8afd458b089859ba38d8","permalink":"/authors/23_henghui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_henghui/","section":"authors","summary":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.","tags":null,"title":"Henghui Du","type":"authors"},{"authors":null,"categories":null,"content":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"8808a5aa1460c5cb4fad660d28f8520a","permalink":"/authors/23_jiahao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jiahao/","section":"authors","summary":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.","tags":null,"title":"Jiahao Li","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"1697602eb95e74d0fb1a9247c1f07489","permalink":"/authors/23_jianghan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jianghan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jianghan Chao","type":"authors"},{"authors":null,"categories":null,"content":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bdeafc1f9127d19078299ad17ddcf547","permalink":"/authors/23_jingxian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jingxian/","section":"authors","summary":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.","tags":null,"title":"Jingxian Lu","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"b1f3ebd7d0f58e6a501810a383c4a9ed","permalink":"/authors/23_jinlin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jinlin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jinlin Li","type":"authors"},{"authors":null,"categories":null,"content":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"874c09024781e4fd5375423eaef9c9e8","permalink":"/authors/23_juncheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_juncheng/","section":"authors","summary":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.","tags":null,"title":"Juncheng Ma","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"834fe556c30cd4180a6dc4c692fd63d9","permalink":"/authors/23_liangce/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_liangce/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ce Liang","type":"authors"},{"authors":null,"categories":null,"content":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bf84fe39ef0b614af0ae82d08359c784","permalink":"/authors/23_peiwen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_peiwen/","section":"authors","summary":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.","tags":null,"title":"Peiwen Sun","type":"authors"},{"authors":null,"categories":null,"content":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c29a63de0242659b43a43451fc077046","permalink":"/authors/23_ruoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_ruoxuan/","section":"authors","summary":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.","tags":null,"title":"Ruoxuan Feng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"364786f50ed04bbfb2309f8069cdbe90","permalink":"/authors/23_shaoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_shaoxuan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Shaoxuan Xu","type":"authors"},{"authors":null,"categories":null,"content":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"062e874f9d4216ee7c15e6afe41e1631","permalink":"/authors/23_siwei/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_siwei/","section":"authors","summary":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.","tags":null,"title":"Siwei Li","type":"authors"},{"authors":null,"categories":null,"content":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bda305ecfaa132f6e49d2dd2566d0f25","permalink":"/authors/23_yaoting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_yaoting/","section":"authors","summary":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.","tags":null,"title":"Yaoting Wang","type":"authors"},{"authors":null,"categories":null,"content":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"7a6ee1988cb2fa93bfeee88a094c7489","permalink":"/authors/24_jirui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_jirui/","section":"authors","summary":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.","tags":null,"title":"JiRui Huang","type":"authors"},{"authors":["dihu"],"categories":null,"content":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of cognitive neuroscience and has wrote one study note during his undergraduate. Inspired by what he learned from cognitive neuroscience, and what he observed and deliberated from the daily-life, he strongly convinced that the pervasive, free, natural multimodal messages can provide sufficient information for perceiving, learning and understanding environment, even the agent itself, which promisingly makes multimodal learning become one of the key to achieve machine intelligence.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"2525497d367e79493fd32b198b28f040","permalink":"/authors/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/admin/","section":"authors","summary":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of","tags":null,"title":"Di Hu","type":"authors"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"852b491b0dcadb44b8f099f931db74c4","permalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"A Two-Stage Framework for Multiple Sound-Source Localization","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"b21459d2cd2aa98d5a771a396df3c29e","permalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","section":"publication","summary":"","tags":null,"title":"Ambient Sound Helps: Audiovisual Crowd Counting in Extreme Conditions","type":"publication"},{"authors":["Wenke Xia*","Xu Zhao*","Xincheng Pang","Changqing Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"7a5ff9681de843469038165a230c4f87","permalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","section":"publication","summary":"","tags":null,"title":"Balanced Audiovisual Dataset for Imbalance Analysis","type":"publication"},{"authors":["Xiaokang Peng*","Yake Wei*","Andong Deng","Dong Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1cdda2159c4adeb4f31cb4e7f1a5ab8a","permalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","section":"publication","summary":"","tags":null,"title":"Balanced Multimodal Learning via On-the-fly Gradient Modulation","type":"publication"},{"authors":["Yaoting Wang","Peiwen Sun","Yuanchao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e2d14df72502e78a30f83d09310b98b6","permalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","section":"publication","summary":"","tags":null,"title":"Can Textual Semantics Mitigate Sounding Object SegmentationPreference?","type":"publication"},{"authors":["Di Hu","Yake Wei","Rui Qian","Weiyao Lin","Ruihua Song","Ji-Rong Wen"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"91e67073102678aec9799732ceef49f3","permalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","section":"publication","summary":"","tags":null,"title":"Class-aware Sounding Objects Localization via Audiovisual Correspondence","type":"publication"},{"authors":["Yapeng Tian*","Di Hu*","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c0d82a52007e4e9ab50a2cfafdc4ac17","permalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","section":"publication","summary":"","tags":null,"title":"Co-Learn Sounding Object Visual Grounding and Visually Indicated Sound Separation in A Cycle","type":"publication"},{"authors":["Di Hu","Xuhong Li","Lichao Mou","Pu Jin","Dong Chen","Liping Jing","Xiaoxiang Zhu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c7688dd14aa743d0b927f94d97854f27","permalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","section":"publication","summary":"","tags":null,"title":"Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac02b15b850ff085e6c9ad497f3a130c","permalink":"/publication/curriculum-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/curriculum-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Curriculum Audiovisual Learning","type":"publication"},{"authors":["Yapeng Tian","Di Hu","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"cd0308a1bfb55705c394057955f2375d","permalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","section":"publication","summary":"","tags":null,"title":"Cyclic Co-Learning of Sounding Object Visual Grounding and Sound Separation","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"202776673a51788c119f1451c9e313c2","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"00f72a8fe1deeb265958a59b94c2cd33","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"f6c0a9a658cdceee78bd291860181d99","permalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","section":"publication","summary":"","tags":null,"title":"Deep Linear Discriminant Analysis Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d1466a6c42ba930502049d24243f8b62","permalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Deep Multimodal Clustering for Unsupervised Audiovisual Learning Representation","type":"publication"},{"authors":["Di Hu - Chengze Wang - Feiping Nie - Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9e4cd76d6b972d54b50c190779f639a5","permalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","section":"publication","summary":"","tags":null,"title":"Dense Multimodal Fusion for Hierarchically Joint Representation","type":"publication"},{"authors":["Xincheng Pang","Wenke Xia","Zhigang Wang","Bin Zhao","Di Hu","Dong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"832f5776c5daa77fa5df21ce843a3196","permalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","section":"publication","summary":"","tags":null,"title":"Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"4b1e10b4327cca00dfd58162571a2f8c","permalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","section":"publication","summary":"","tags":null,"title":"Discrete Spectral Hashing for Efficient Similarity Retrieval","type":"publication"},{"authors":["Di Hu","Rui Qian","Minyue Jiang","Xiao Tan","Shilei Wen","Errui Ding","Weiyao Lin","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6953eeac03ee85322e85eece2eeeb84","permalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","section":"publication","summary":"","tags":null,"title":"Discriminative Sounding Objects Localization via Self-supervised Audiovisual Matching","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiaoxiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3016d01c7b86e792f8778f7aba6fc44d","permalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","section":"publication","summary":"","tags":null,"title":"Does Ambient Sound Help? - Audiovisual Crowd Counting","type":"publication"},{"authors":["Yake Wei","Ruoxuan Feng","Zihe Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"0278c6a7c52909fa5c55eaf522569e7f","permalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","section":"publication","summary":"","tags":null,"title":"Enhancing Multi-modal Cooperation via Fine-grained Modality Valuation","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Di Hu","Hang Zhou","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"28bd51450c42258842f48363910f83c8","permalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"Exploiting Visual Context Semantics for Sound Source Localization","type":"publication"},{"authors":["Sijia Yang","Haoyi Xiong","Di Hu","Kaibo Xu","Licheng Wang","Peizhen Zhu","Zeyi Sun"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ed52bf34eef1f16fc89a0fc5c32fa152","permalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","section":"publication","summary":"","tags":null,"title":"Generalising Combinatorial Discriminant Analysis through Conditioning Truncated Rayleigh Flow","type":"publication"},{"authors":["Zequn Yang","Han Zhang","Yake Wei","Zheng Wang","Feiping Nie","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"76c72a76e4cf8516d166a780e270c79b","permalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","section":"publication","summary":"","tags":null,"title":"Geometric-Inspired Graph-based Incomplete Multi-view Clustering","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8fe03bbbdab04c3ee4ecc7e01ecd723c","permalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Heterogeneous Scene Analysis via Self-supervised Audiovisual Learning","type":"publication"},{"authors":["Xuelong Li","Di Hu","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1850ab6a7473c571586aed28d796ac66","permalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","section":"publication","summary":"","tags":null,"title":"Image2song: Song Retrieval via Bridging Image Content and Lyric Words","type":"publication"},{"authors":["Wenke Xia","Dong Wang","Xincheng Pang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"82a334df3b6181644b600e4679ce595c","permalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","section":"publication","summary":"","tags":null,"title":"Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs","type":"publication"},{"authors":["Xuelong Li","Di Hu","Feiping Nie"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"24881bb5f959ea9f061fb67469d72eb9","permalink":"/publication/large-graph-hashing-with-spectral-rotation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/large-graph-hashing-with-spectral-rotation/","section":"publication","summary":"","tags":null,"title":"Large Graph Hashing with Spectral Rotation","type":"publication"},{"authors":["Yake Wei","Di Hu","Yapeng Tian","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"67b2f40c745acaa698a385e2742a25bc","permalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","section":"publication","summary":"","tags":null,"title":"Learning in Audio-visual Context: A Review, Analysis, and New Perspective","type":"publication"},{"authors":["Guangyao Li*","Yake Wei*","Yapeng Tian*","Chenliang Xu","Ji-Rong Wen","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"125a97cdaa82fb5a0ec455cfd53c1b46","permalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","section":"publication","summary":"","tags":null,"title":"Learning to Answer Questions in Dynamic Audio-Visual Scenarios","type":"publication"},{"authors":["Di Hu","Dong Wang","Xuelong Li","Feiping Nie","Qi Wang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c716bb52e5e46a2dbaebc46fda1517d6","permalink":"/publication/listen-to-the-image/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/listen-to-the-image/","section":"publication","summary":"","tags":null,"title":"Listen to the Image","type":"publication"},{"authors":["Ruize Xu","Ruoxuan Feng","Shi-xiong Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8e1ed6fc418000d90eed8231ce30fa73","permalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","section":"publication","summary":"","tags":null,"title":"MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning","type":"publication"},{"authors":["Yake Wei","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ad6411f0202e0562a67a75820ff098f","permalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","section":"publication","summary":"","tags":null,"title":"MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning","type":"publication"},{"authors":["Guangyao Li","Yixin Xu","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"38daed7d60d2831123ddca90ac47d9b7","permalink":"/publication/multi-scale-attention-for-audio-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multi-scale-attention-for-audio-question-answering/","section":"publication","summary":"","tags":null,"title":"Multi-Scale Attention for Audio Question Answering","type":"publication"},{"authors":["Di Hu","Xiaoqiang Lu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"09affd8c2ded11a4005e40db4e1b960d","permalink":"/publication/multimodal-learning-via-exploring-deep-semantic-similarity/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multimodal-learning-via-exploring-deep-semantic-similarity/","section":"publication","summary":"","tags":null,"title":"Multimodal Learning via Exploring Deep Semantic Similarity","type":"publication"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"88c9d48496c44a5980763aa946676e9e","permalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","section":"publication","summary":"","tags":null,"title":"Multiple Sound Sources Localization from Coarse to Fine","type":"publication"},{"authors":["Ziyun Li","Xinshao Wang","Haojin Yang","Di Hu","Neil M Robertson","David A Clifton","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"a48ea4ca10463e6ef980903ef312977d","permalink":"/publication/not-all-knowledge-is-created-equal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/not-all-knowledge-is-created-equal/","section":"publication","summary":"","tags":null,"title":"Not All Knowledge Is Created Equal","type":"publication"},{"authors":["Guangyao Li","Wenxuan Hou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"545100c95da731d9faeb7037b5801449","permalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","section":"publication","summary":"","tags":null,"title":"Progressive Spatio-temporal Perception for Audio-Visual Question Answering","type":"publication"},{"authors":["Yaoting Wang*","Weisong Liu*","Guangyao Li","Jian Ding","Di Hu","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d414aab41857970b60155d360ceac88","permalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","section":"publication","summary":"","tags":null,"title":"Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer","type":"publication"},{"authors":["Zequn Yang","Yake Wei","Ce Liang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d069d78586930bf2dd726ae7c0b00c9b","permalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","section":"publication","summary":"","tags":null,"title":"Quantifying and Enhancing Multi-modal Robustness with Modality Preference","type":"publication"},{"authors":["Yaoting Wang","Peiwen Sun","Dongzhan Zhou","Guangyao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e787cc7b340511ed0ad617eaf61af942","permalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","section":"publication","summary":"","tags":null,"title":"Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes","type":"publication"},{"authors":["Ruoxuan Feng","Wenke Xia","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"450f11c7cb976aa1013ed40cd3963388","permalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","section":"publication","summary":"","tags":null,"title":"Revisiting Pre-training in Audio-Visual Learning","type":"publication"},{"authors":["Wenke Xia","Xingjian Li","Andong Deng","Haoyi Xiong","Dejing Dou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d68814ab18c4fd432535b2592c31988","permalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","section":"publication","summary":"","tags":null,"title":"Robust Cross-modal Knowledge Distillation for Unconstrained Videos","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Wanli Ouyang","Hang Zhou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"acd80d91071719018f44e8766871cb74","permalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","section":"publication","summary":"","tags":null,"title":"SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance","type":"publication"},{"authors":["Konrad Heidler","Lichao Mou","Di Hu","Pu Jin","Guangyao Li","Chuang Gan","Ji-Rong Wen","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"36c9fd21445495f69bad705471393094","permalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","section":"publication","summary":"","tags":null,"title":"Self-supervised Audiovisual Representation Learning for Remote Sensing Data","type":"publication"},{"authors":["Di Hu","Zheng Wang","Feiping Nie","Rong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac1ac86aa9c1772d446b7594a05d9100","permalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","section":"publication","summary":"","tags":null,"title":"Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis","type":"publication"},{"authors":["Dongzhan Zhou","Xinchi Zhou","Di Hu","Hang Zhou","Lei Bai","Ziwei Liu","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3f2c9d5779b3cec3c9b69a845335b218","permalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","section":"publication","summary":"","tags":null,"title":"SepFusion: Finding Optimal Fusion Structures for Visual Sound Separation","type":"publication"},{"authors":["Tao Wu","Xuewei Li","Zhongang Qi","Di Hu","Xintao Wang","Ying Shan","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"75b3553d0dff4fb43ea7284e9d6f8d1c","permalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","section":"publication","summary":"","tags":null,"title":"SphereDiffusion: Spherical Geometry-aware Distortion Resilient Diffusion Model","type":"publication"},{"authors":["ZiYun Li","Jona Otholt","Ben Dai","Di Hu","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"50b53591fe6d761222acbe7d191d3e47","permalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","section":"publication","summary":"","tags":null,"title":"Supervised Knowledge May Hurt Novel Class Discovery Performance","type":"publication"},{"authors":["Di Hu","Xuelong Li","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6704b0eb55495bb979be6fcbb8243ae","permalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","section":"publication","summary":"","tags":null,"title":"Temporal Multimodal Learning in Audiovisual Speech Recognition","type":"publication"},{"authors":["Dong Wang","Di Hu","Xingjian Li","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"807bb234ac2724175550dbdf52f64d08","permalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","section":"publication","summary":"","tags":null,"title":"Temporal Relational Modeling with Self-Supervision for Action Segmentation","type":"publication"},{"authors":["Hongpeng Lin*","Ludan Ruan*","Wenke Xia*","Peiyu Liu","Jingyuan Wen","Yixin Xu","Di Hu","Ruihua Song","Wayne Xin Zhao","Qin Jin","Zhiwu Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"dd91d75ebb359650640b7b6c75634dff","permalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","section":"publication","summary":"","tags":null,"title":"TikTalk: A Video-Based Dialogue Dataset for Multi-Modal Chitchat in Real World","type":"publication"},{"authors":["Xingjian Li","Di Hu","Xuhong Li","Haoyi Xiong","Zhi Ye","Zhipeng Wang","Chengzhong Xu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"5b40a464bbfccb601c6d4c37e85cf81e","permalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","section":"publication","summary":"","tags":null,"title":"Towards Accurate Knowledge Transfer via Target-awareness Representation Disentanglement","type":"publication"},{"authors":["Andong Deng","Xingjian Li","Di Hu","Tianyang Wang","Haoyi Xiong","Chengzhong Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"113edd12d767a54c1fdd10685167cd5c","permalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","section":"publication","summary":"","tags":null,"title":"Towards Inadequately Pre-trained Models in Transfer Learning","type":"publication"},{"authors":["Wenxuan Hou*","Guangyao Li*","Yapeng Tian","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"56c2e256bf8d4a20cdffe034f430aaef","permalink":"/publication/towards-long-form-audio-visual-video-understanding/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-long-form-audio-visual-video-understanding/","section":"publication","summary":"","tags":null,"title":"Towards Long Form Audio-visual Video Understanding","type":"publication"},{"authors":["Zechen Bai","Zhigang Wang","Jian Wang","Di Hu","Errui Ding"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9905f139a565b4f5eabfc5902965f851","permalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","section":"publication","summary":"","tags":null,"title":"Unsupervised Multi-Source Domain Adaptation for Person Re-Identification","type":"publication"},{"authors":["Xian Liu","Rui Qian","Hang Zhou","Di Hu","Weiyao Lin","Ziwei Liu","Bolei Zhou","Xiaowei Zhou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ca462fd19e2017e2ecb2b26a145ef250","permalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","section":"publication","summary":"","tags":null,"title":"Visual Sound Localization in-the-Wild by Cross-Modal Interference Erasing","type":"publication"}] \ No newline at end of file diff --git a/docs/index.xml b/docs/index.xml index 5f12e291..f91c9686 100755 --- a/docs/index.xml +++ b/docs/index.xml @@ -44,6 +44,14 @@ + + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + + Class-aware Sounding Objects Localization via Audiovisual Correspondence /publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/ @@ -124,6 +132,14 @@ + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Discrete Spectral Hashing for Efficient Similarity Retrieval /publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/ @@ -308,6 +324,14 @@ + + Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + + + Revisiting Pre-training in Audio-Visual Learning /publication/revisiting-pre-training-in-audio-visual-learning/ diff --git a/docs/member/index.html b/docs/member/index.html index 583df2fa..401e98d2 100755 --- a/docs/member/index.html +++ b/docs/member/index.html @@ -850,40 +850,6 @@

      Research Assistant

      - - - - - - -
      -
      -
      - - - - - - -
      - - Avatar - -
      - - -
      -

      Yufan Wu

      -

      Visiting Student, 2024

      -
      -
      -
      -
      -

      Yufan is a 2024 master’s graduate from Zhejiang University. She is passionate about image generation, cross-modal generation and multimodal learning.

      -

      -
      -
      - diff --git a/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html b/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html index 07bbdd46..6f5aed89 100755 --- a/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html +++ b/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Wenke Xia*" + "name": "Wenke Xia" }, "publisher": { diff --git a/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/featured.jpg b/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/featured.jpg new file mode 100644 index 00000000..0e252023 Binary files /dev/null and b/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/featured.jpg differ diff --git a/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/featured_hufd4a65e8fdf109f57110242e18504833_651369_720x0_resize_q90_lanczos.jpg b/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/featured_hufd4a65e8fdf109f57110242e18504833_651369_720x0_resize_q90_lanczos.jpg new file mode 100644 index 00000000..56142f6a Binary files /dev/null and b/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/featured_hufd4a65e8fdf109f57110242e18504833_651369_720x0_resize_q90_lanczos.jpg differ diff --git a/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/index.html b/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/index.html new file mode 100644 index 00000000..1576faa6 --- /dev/null +++ b/docs/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/index.html @@ -0,0 +1,699 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +

      Can Textual Semantics Mitigate Sounding Object SegmentationPreference?

      + + + + + + + + + + + + + + + + + + + + + + + +
      + + + + + +
      +
      + + +
      +
      + + + + + +
      + + + + +
      +
      +
      +
      +
      All
      + +
      +
      +
      +
      +
      + + + +
      +
      +
      +
      +
      Publication
      +
      European Conference on Computer Vision(ECCV) 2024
      +
      +
      +
      +
      +
      + + +
      + +
      + + + + + + + + + + + +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +
      + +

      + copyright © 2024 GeWu-Lab +
      + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

      +
      + +
      + + + + + + + diff --git a/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html b/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html index bc80c9b9..b4d02177 100755 --- a/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html +++ b/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yapeng Tian*" + "name": "Yapeng Tian" }, "publisher": { diff --git a/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html b/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html index 8be06258..9286afb7 100755 --- a/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html +++ b/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yapeng Tian*" + "name": "Yapeng Tian" }, "publisher": { diff --git a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/featured.jpg b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/featured.jpg new file mode 100644 index 00000000..1d62c3af Binary files /dev/null and b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/featured.jpg differ diff --git a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/featured_hud61968a1a1f311915190fe8da37d7b04_488685_720x0_resize_q90_lanczos.jpg b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/featured_hud61968a1a1f311915190fe8da37d7b04_488685_720x0_resize_q90_lanczos.jpg new file mode 100644 index 00000000..6eb4aee2 Binary files /dev/null and b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/featured_hud61968a1a1f311915190fe8da37d7b04_488685_720x0_resize_q90_lanczos.jpg differ diff --git a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html new file mode 100644 index 00000000..fda94d04 --- /dev/null +++ b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html @@ -0,0 +1,705 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +

      Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection

      + + + + + + + + + + + + + + + + + + + + + + + +
      + + + + + +
      +
      + + +
      +
      + + + + + +
      + + + + +
      +
      +
      +
      +
      All
      + +
      +
      +
      +
      +
      + + + +
      +
      +
      +
      +
      Publication
      +
      The 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems(IROS) 2024
      +
      +
      +
      +
      +
      + + +
      + +
      + + + + + + + + + + + +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +
      + +

      + copyright © 2024 GeWu-Lab +
      + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

      +
      + +
      + + + + + + + diff --git a/docs/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/index.html b/docs/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/index.html index a1490ede..49fa56d7 100644 --- a/docs/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/index.html +++ b/docs/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yake Wei*" + "name": "Yake Wei" }, "publisher": { diff --git a/docs/publication/index.html b/docs/publication/index.html index e32d4c1c..07838c0f 100755 --- a/docs/publication/index.html +++ b/docs/publication/index.html @@ -556,6 +556,15 @@

      Publications

      + + + + + + + + + @@ -616,6 +625,444 @@

      Publications

      +
      + + + + + + + + + + + + + + + + + + +
      +
      + + + + + +
      +
      + +

      + Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes +

      + + + + + + + + + +
      + +
      + + +
      + + + + + + + + +
      + + + + + + + + + + + + + + + + + + +
      +
      + + + + + +
      +
      + +

      + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? +

      + + + + + + + + + +
      + +
      + + +
      + + + + + + + + +
      + + + + + + + + + + + + + + + + + + +
      +
      + + + + + +
      +
      + +

      + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection +

      + + + + + + + + + +
      + +
      + + +
      + + + + + + + +
      diff --git a/docs/publication/index.xml b/docs/publication/index.xml index f2623080..ea58cc31 100755 --- a/docs/publication/index.xml +++ b/docs/publication/index.xml @@ -44,6 +44,14 @@ + + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + + Class-aware Sounding Objects Localization via Audiovisual Correspondence /publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/ @@ -124,6 +132,14 @@ + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Discrete Spectral Hashing for Efficient Similarity Retrieval /publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/ @@ -308,6 +324,14 @@ + + Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + + + Revisiting Pre-training in Audio-Visual Learning /publication/revisiting-pre-training-in-audio-visual-learning/ diff --git a/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html b/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html index 5fc59214..3fabd17a 100644 --- a/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html +++ b/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Wenke Xia*" + "name": "Wenke Xia" }, "publisher": { diff --git a/docs/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/index.html b/docs/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/index.html index a86fda7c..b144a1b9 100755 --- a/docs/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/index.html +++ b/docs/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yake Wei*" + "name": "Yake Wei" }, "publisher": { diff --git a/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html b/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html index aaa61c90..a873a7ab 100755 --- a/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html +++ b/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Guangyao Li*" + "name": "Guangyao Li" }, "publisher": { diff --git a/docs/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/index.html b/docs/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/index.html index 1117cc0a..70ad99ab 100644 --- a/docs/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/index.html +++ b/docs/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yake Wei*" + "name": "Yake Wei" }, "publisher": { diff --git a/docs/publication/multi-scale-attention-for-audio-question-answering/index.html b/docs/publication/multi-scale-attention-for-audio-question-answering/index.html index 91f945ea..e3fac170 100644 --- a/docs/publication/multi-scale-attention-for-audio-question-answering/index.html +++ b/docs/publication/multi-scale-attention-for-audio-question-answering/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Guangyao Li*" + "name": "Guangyao Li" }, "publisher": { diff --git a/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html b/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html index 6df36ea8..606c7a8b 100644 --- a/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html +++ b/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Guangyao Li*" + "name": "Guangyao Li" }, "publisher": { diff --git a/docs/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/index.html b/docs/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/index.html index d1db0059..b29a2688 100644 --- a/docs/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/index.html +++ b/docs/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yaoting Wang*" + "name": "Yaoting Wang" }, "publisher": { diff --git a/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/featured.jpg b/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/featured.jpg new file mode 100644 index 00000000..aacd0e03 Binary files /dev/null and b/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/featured.jpg differ diff --git a/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/featured_huf3db66af0d88d2dd71ae8956a5a70414_440955_720x0_resize_q90_lanczos.jpg b/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/featured_huf3db66af0d88d2dd71ae8956a5a70414_440955_720x0_resize_q90_lanczos.jpg new file mode 100644 index 00000000..c28989b7 Binary files /dev/null and b/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/featured_huf3db66af0d88d2dd71ae8956a5a70414_440955_720x0_resize_q90_lanczos.jpg differ diff --git a/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/index.html b/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/index.html new file mode 100644 index 00000000..57fb17c4 --- /dev/null +++ b/docs/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/index.html @@ -0,0 +1,702 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +

      Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes

      + + + + + + + + + + + + + + + + + + + + + + + +
      + + + + + +
      +
      + + +
      +
      + + + + + +
      + + + + +
      +
      +
      +
      +
      All
      + +
      +
      +
      +
      +
      + + + +
      +
      +
      +
      +
      Publication
      +
      European Conference on Computer Vision(ECCV) 2024
      +
      +
      +
      +
      +
      + + +
      + +
      + + + + + + + + + + + +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +
      + +

      + copyright © 2024 GeWu-Lab +
      + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

      +
      + +
      + + + + + + + diff --git a/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html b/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html index 9447ff49..e07b7b2c 100644 --- a/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html +++ b/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Wenke Xia*" + "name": "Wenke Xia" }, "publisher": { diff --git a/docs/publication_types/1/index.html b/docs/publication_types/1/index.html index 5b47cbc5..a0950d0f 100755 --- a/docs/publication_types/1/index.html +++ b/docs/publication_types/1/index.html @@ -412,63 +412,63 @@

      Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition

      +

      Can Textual Semantics Mitigate Sounding Object SegmentationPreference?

      -

      Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs

      +

      Exploiting Visual Context Semantics for Sound Source Localization

      diff --git a/docs/publication_types/1/index.xml b/docs/publication_types/1/index.xml index 6ecd6048..f8a72d41 100755 --- a/docs/publication_types/1/index.xml +++ b/docs/publication_types/1/index.xml @@ -20,6 +20,14 @@ + + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + + Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition /publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/ @@ -60,6 +68,14 @@ + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Discriminative Sounding Objects Localization via Self-supervised Audiovisual Matching /publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/ @@ -172,6 +188,14 @@ + + Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + + + Robust Cross-modal Knowledge Distillation for Unconstrained Videos /publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/ diff --git a/docs/publication_types/1/page/2/index.html b/docs/publication_types/1/page/2/index.html index f166e9e5..ff20bb9e 100755 --- a/docs/publication_types/1/page/2/index.html +++ b/docs/publication_types/1/page/2/index.html @@ -405,70 +405,70 @@

      1

      -

      Quantifying and Enhancing Multi-modal Robustness with Modality Preference

      +

      Multiple Sound Sources Localization from Coarse to Fine

      diff --git a/docs/publication_types/1/page/3/index.html b/docs/publication_types/1/page/3/index.html index aed768a2..adc1f8b1 100644 --- a/docs/publication_types/1/page/3/index.html +++ b/docs/publication_types/1/page/3/index.html @@ -405,63 +405,70 @@

      1

      + +
    diff --git a/docs/publication_types/1/page/4/index.html b/docs/publication_types/1/page/4/index.html new file mode 100644 index 00000000..64b3681f --- /dev/null +++ b/docs/publication_types/1/page/4/index.html @@ -0,0 +1,552 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +

    1

    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +

    + copyright © 2024 GeWu-Lab +
    + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

    +
    + +
    + + + + + + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index dd42d34f..143ca9f4 100755 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -162,10 +162,6 @@ /authors/24_jirui/ - - /authors/24_yufan/ - - /authors/admin/ @@ -179,19 +175,19 @@ - /topic_types/1/ + /publication_types/1/ - /publication_types/1/ + /topic_types/1/ - /publication_types/2/ + /topic_types/2/ - /topic_types/2/ + /publication_types/2/ @@ -242,6 +238,10 @@ /authors/bolei-zhou/ + + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + /categories/ @@ -322,6 +322,10 @@ /publication/dense-multimodal-fusion-for-hierarchically-joint-representation/ + + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + /authors/di-hu/ @@ -410,6 +414,10 @@ /publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/ + + /authors/honggang-zhang/ + + /authors/hongpeng-lin/ @@ -534,6 +542,10 @@ /publication/not-all-knowledge-is-created-equal/ + + /authors/peiwen-sun/ + + /authors/peiyu-liu/ @@ -582,6 +594,10 @@ /publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/ + + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + + /publication/revisiting-pre-training-in-audio-visual-learning/ @@ -806,6 +822,10 @@ /authors/yixin-xu/ + + /authors/yuanchao-li/ + + /authors/yuansheng-hua/ diff --git a/docs/topic_types/3/index.html b/docs/topic_types/3/index.html index c1707d7c..9db281ed 100755 --- a/docs/topic_types/3/index.html +++ b/docs/topic_types/3/index.html @@ -404,6 +404,13 @@

    3

    + +

    Deep Binary Reconstruction for Cross-modal Hashing

    @@ -425,6 +432,13 @@

    Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection

    +
    + +
    +
    +

    Discrete Spectral Hashing for Efficient Similarity Retrieval

    @@ -453,6 +467,13 @@

    Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes

    +
    + +
    +
    + diff --git a/docs/topic_types/3/index.xml b/docs/topic_types/3/index.xml index 16cc3a25..e5baa727 100755 --- a/docs/topic_types/3/index.xml +++ b/docs/topic_types/3/index.xml @@ -12,6 +12,14 @@ /topic_types/3/ + + Can Textual Semantics Mitigate Sounding Object SegmentationPreference? + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/ + + + Deep Binary Reconstruction for Cross-modal Hashing /publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/ @@ -36,6 +44,14 @@ + + Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/ + + + Discrete Spectral Hashing for Efficient Similarity Retrieval /publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/ @@ -68,5 +84,13 @@ + + Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/ + + +