diff --git a/content/project/.DS_Store b/content/project/.DS_Store index d80fed39..78b24a69 100755 Binary files a/content/project/.DS_Store and b/content/project/.DS_Store differ diff --git a/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/cite.bib b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/cite.bib new file mode 100644 index 00000000..b95f5fdb --- /dev/null +++ b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/cite.bib @@ -0,0 +1,6 @@ +@article{pang2024depth, + title={Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection}, + author={Pang, Xincheng and Xia, Wenke and Wang, Zhigang and Zhao, Bin and Hu, Di and Wang, Dong and Li, Xuelong}, + journal={arXiv preprint arXiv:2408.05107}, + year={2024} +} \ No newline at end of file diff --git a/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/index.md b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/index.md index a2bc34b6..74ad8aaa 100755 --- a/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/index.md +++ b/content/publication/Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection/index.md @@ -11,8 +11,8 @@ authors: publication_types: ["1"] publication: The 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems(IROS) 2024 publication_types_name: Conference Paper -url_pdf: -url_code: +url_pdf: https://arxiv.org/abs/2408.05107 +url_code: https://gewu-lab.github.io/DepthHelps-IROS2024/ topic_types: ["3"] # topic_types_name: topic_hash rating : 2024_06_28 diff --git a/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/cite.bib b/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/cite.bib new file mode 100755 index 00000000..1ab36054 --- /dev/null +++ b/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/cite.bib @@ -0,0 +1,6 @@ +@article{lu2024koi, + title={KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance}, + author={Lu, Jingxian and Xia, Wenke and Wang, Dong and Wang, Zhigang and Zhao, Bin and Hu, Di and Li, Xuelong}, + journal={arXiv preprint arXiv:2408.02912}, + year={2024} +} \ No newline at end of file diff --git a/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/featured.png b/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/featured.png new file mode 100644 index 00000000..2ad6a005 Binary files /dev/null and b/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/featured.png differ diff --git a/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/index.md b/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/index.md new file mode 100755 index 00000000..bf677d31 --- /dev/null +++ b/content/publication/KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance/index.md @@ -0,0 +1,18 @@ +--- +title: "KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance" +authors: + - Jingxian Lu + - Wenke Xia + - Dong Wang + - Zhigang Wang + - Bin Zhao + - Di Hu + - Xuelong Li +publication_types: ["9"] +publication: Conference on Robot Learning (CoRL) +publication_types_name: Conference Paper +url_pdf: https://www.arxiv.org/abs/2408.02912 +topic_types: ["2"] +topic_types_name: topic_scene_understanding +rating : 2024_09_01 +--- diff --git a/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/cite.bib b/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/cite.bib new file mode 100755 index 00000000..b88eef8b --- /dev/null +++ b/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/cite.bib @@ -0,0 +1,6 @@ +@article{feng2024play, + title={Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation}, + author={Feng, Ruoxuan and Hu, Di and Ma, Wenke and Li, Xuelong}, + journal={arXiv preprint arXiv:2408.01366}, + year={2024} +} \ No newline at end of file diff --git a/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/featured.jpg b/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/featured.jpg new file mode 100644 index 00000000..265bcae1 Binary files /dev/null and b/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/featured.jpg differ diff --git a/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/index.md b/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/index.md new file mode 100755 index 00000000..9a1c38dc --- /dev/null +++ b/content/publication/Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation/index.md @@ -0,0 +1,15 @@ +--- +title: "Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation" +authors: + - Ruoxuan Feng + - Di Hu + - Wenke Ma + - Xuelong Li +publication_types: ["9"] +publication: Conference on Robot Learning (CoRL) +publication_types_name: Conference Paper +url_pdf: https://arxiv.org/pdf/2408.01366 +topic_types: ["2"] +topic_types_name: topic_scene_understanding +rating : 2024_09_01 +--- diff --git a/content/publication/Unveiling and Mitigating Bias in Audio Visual Segmentation/cite.bib b/content/publication/Unveiling and Mitigating Bias in Audio Visual Segmentation/cite.bib new file mode 100644 index 00000000..e0ddc2e4 --- /dev/null +++ b/content/publication/Unveiling and Mitigating Bias in Audio Visual Segmentation/cite.bib @@ -0,0 +1,6 @@ +@article{sun2024unveiling, + title={Unveiling and Mitigating Bias in Audio Visual Segmentation}, + author={Sun, Peiwen and Zhang, Honggang and Hu, Di}, + journal={arXiv preprint arXiv:2407.16638}, + year={2024} +} \ No newline at end of file diff --git a/content/publication/Unveiling and Mitigating Bias in Audio Visual Segmentation/index.md b/content/publication/Unveiling and Mitigating Bias in Audio Visual Segmentation/index.md index 575500f5..ef2196e3 100755 --- a/content/publication/Unveiling and Mitigating Bias in Audio Visual Segmentation/index.md +++ b/content/publication/Unveiling and Mitigating Bias in Audio Visual Segmentation/index.md @@ -8,6 +8,7 @@ publication_types: ["1"] publication: ACM MM 2024 publication_types_name: Conference Paper url_code: https://gewu-lab.github.io/bias_in_AVS/ +url_pdf: https://arxiv.org/pdf/2407.16638 topic_types: ["3"] # topic_types_name: topic_hash rating : 2024_07_30 diff --git a/docs/authors/bin-zhao/index.html b/docs/authors/bin-zhao/index.html index bdae136c..00abc653 100644 --- a/docs/authors/bin-zhao/index.html +++ b/docs/authors/bin-zhao/index.html @@ -394,6 +394,10 @@

Latest

Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs +
  • + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance +
  • + diff --git a/docs/authors/bin-zhao/index.xml b/docs/authors/bin-zhao/index.xml index 87fdd8f4..ff037db6 100644 --- a/docs/authors/bin-zhao/index.xml +++ b/docs/authors/bin-zhao/index.xml @@ -28,5 +28,13 @@ + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + diff --git a/docs/authors/di-hu/index.html b/docs/authors/di-hu/index.html index 0df2a763..5ccf052e 100755 --- a/docs/authors/di-hu/index.html +++ b/docs/authors/di-hu/index.html @@ -490,6 +490,10 @@

    Latest

    Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs +
  • + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance +
  • +
  • Large Graph Hashing with Spectral Rotation
  • @@ -526,6 +530,10 @@

    Latest

    Not All Knowledge Is Created Equal +
  • + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation +
  • +
  • Progressive Spatio-temporal Perception for Audio-Visual Question Answering
  • diff --git a/docs/authors/di-hu/index.xml b/docs/authors/di-hu/index.xml index 5081ec68..d985d2ba 100755 --- a/docs/authors/di-hu/index.xml +++ b/docs/authors/di-hu/index.xml @@ -220,6 +220,14 @@ + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Large Graph Hashing with Spectral Rotation /publication/large-graph-hashing-with-spectral-rotation/ @@ -292,6 +300,14 @@ + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + Progressive Spatio-temporal Perception for Audio-Visual Question Answering /publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/ diff --git a/docs/authors/dong-wang/index.html b/docs/authors/dong-wang/index.html index 25c0f88a..bd8d00cb 100755 --- a/docs/authors/dong-wang/index.html +++ b/docs/authors/dong-wang/index.html @@ -406,6 +406,10 @@

    Latest

    Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs +
  • + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance +
  • +
  • Listen to the Image
  • diff --git a/docs/authors/dong-wang/index.xml b/docs/authors/dong-wang/index.xml index 967e7a85..0cb63af6 100755 --- a/docs/authors/dong-wang/index.xml +++ b/docs/authors/dong-wang/index.xml @@ -52,6 +52,14 @@
    + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Listen to the Image /publication/listen-to-the-image/ diff --git a/docs/authors/guangyao-li/index.html b/docs/authors/guangyao-li/index.html index 08138492..b35a7fcb 100755 --- a/docs/authors/guangyao-li/index.html +++ b/docs/authors/guangyao-li/index.html @@ -115,7 +115,7 @@ - + @@ -370,7 +370,7 @@

    Search

    -

    Guangyao Li*

    +

    Guangyao Li

    diff --git a/docs/authors/guangyao-li/index.xml b/docs/authors/guangyao-li/index.xml index 751bc5e0..c9bb206e 100755 --- a/docs/authors/guangyao-li/index.xml +++ b/docs/authors/guangyao-li/index.xml @@ -1,14 +1,14 @@ - Guangyao Li* | GeWu-Lab + Guangyao Li | GeWu-Lab /authors/guangyao-li/ - Guangyao Li* + Guangyao Li Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab /img/logo.png - Guangyao Li* + Guangyao Li /authors/guangyao-li/ diff --git a/docs/authors/index.xml b/docs/authors/index.xml index 78d3ad58..f3898de0 100755 --- a/docs/authors/index.xml +++ b/docs/authors/index.xml @@ -409,7 +409,7 @@ He got his undergraduate degree at <a href="https://www.ruc.edu.cn/"
    - Guangyao Li* + Guangyao Li /authors/guangyao-li/ Mon, 01 Jan 0001 00:00:00 +0000 /authors/guangyao-li/ @@ -496,6 +496,14 @@ He got his undergraduate degree at <a href="https://www.ruc.edu.cn/" + + Jingxian Lu + /authors/jingxian-lu/ + Mon, 01 Jan 0001 00:00:00 +0000 + /authors/jingxian-lu/ + + + Jingyuan Wen /authors/jingyuan-wen/ @@ -793,7 +801,15 @@ He got his undergraduate degree at <a href="https://www.ruc.edu.cn/" - Wenke Xia + Wenke Ma + /authors/wenke-ma/ + Mon, 01 Jan 0001 00:00:00 +0000 + /authors/wenke-ma/ + + + + + Wenke Xia* /authors/wenke-xia/ Mon, 01 Jan 0001 00:00:00 +0000 /authors/wenke-xia/ @@ -961,7 +977,7 @@ He got his undergraduate degree at <a href="https://www.ruc.edu.cn/" - Yapeng Tian* + Yapeng Tian /authors/yapeng-tian/ Mon, 01 Jan 0001 00:00:00 +0000 /authors/yapeng-tian/ diff --git a/docs/authors/jingxian-lu/index.html b/docs/authors/jingxian-lu/index.html new file mode 100644 index 00000000..992a77ff --- /dev/null +++ b/docs/authors/jingxian-lu/index.html @@ -0,0 +1,516 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + + + +
    +

    Jingxian Lu

    +
    + + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +

    + copyright © 2024 GeWu-Lab +
    + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

    +
    + +
    + + + + + + + diff --git a/docs/authors/jingxian-lu/index.xml b/docs/authors/jingxian-lu/index.xml new file mode 100644 index 00000000..8913b181 --- /dev/null +++ b/docs/authors/jingxian-lu/index.xml @@ -0,0 +1,24 @@ + + + + Jingxian Lu | GeWu-Lab + /authors/jingxian-lu/ + + Jingxian Lu + Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab + + /img/logo.png + Jingxian Lu + /authors/jingxian-lu/ + + + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + + + diff --git a/docs/authors/page/10/index.html b/docs/authors/page/10/index.html index 34dc870d..1d6bcc75 100755 --- a/docs/authors/page/10/index.html +++ b/docs/authors/page/10/index.html @@ -406,6 +406,9 @@

    Authors

    +
  • Tianyang Wang
  • + +
  • Wanli Ouyang
  • @@ -418,7 +421,10 @@

    Authors

  • Weiyao Lin
  • -
  • Wenke Xia
  • +
  • Wenke Ma
  • + + +
  • Wenke Xia*
  • Wenxuan Hou
  • @@ -429,12 +435,6 @@

    Authors

  • Xian Liu
  • - -
  • Xiao Tan
  • - - -
  • Xiao Xiang Zhu
  • - diff --git a/docs/authors/page/11/index.html b/docs/authors/page/11/index.html index 910f51d1..a1672f95 100644 --- a/docs/authors/page/11/index.html +++ b/docs/authors/page/11/index.html @@ -406,6 +406,12 @@

    Authors

    +
  • Xiao Tan
  • + + +
  • Xiao Xiang Zhu
  • + +
  • Xiaokang Peng*
  • @@ -429,12 +435,6 @@

    Authors

  • Xinshao Wang
  • - -
  • Xintao Wang
  • - - -
  • Xu Zhao*
  • - diff --git a/docs/authors/page/12/index.html b/docs/authors/page/12/index.html index 9a8c8f10..ee7720ae 100644 --- a/docs/authors/page/12/index.html +++ b/docs/authors/page/12/index.html @@ -406,6 +406,12 @@

    Authors

    +
  • Xintao Wang
  • + + +
  • Xu Zhao*
  • + +
  • Xuelong Li
  • @@ -421,7 +427,7 @@

    Authors

  • Yaoting Wang*
  • -
  • Yapeng Tian*
  • +
  • Yapeng Tian
  • Ying Shan
  • @@ -429,12 +435,6 @@

    Authors

  • Yixin Xu
  • - -
  • Yuanchao Li
  • - - -
  • Yuansheng Hua
  • - diff --git a/docs/authors/page/13/index.html b/docs/authors/page/13/index.html index c290f7c6..2dfdf2c0 100644 --- a/docs/authors/page/13/index.html +++ b/docs/authors/page/13/index.html @@ -406,6 +406,12 @@

    Authors

    +
  • Yuanchao Li
  • + + +
  • Yuansheng Hua
  • + +
  • Zechen Bai
  • @@ -429,12 +435,6 @@

    Authors

  • Zhiwu Lu
  • - -
  • Zhongang Qi
  • - - -
  • Zihe Wang
  • - diff --git a/docs/authors/page/14/index.html b/docs/authors/page/14/index.html index 9e46b06c..60404668 100644 --- a/docs/authors/page/14/index.html +++ b/docs/authors/page/14/index.html @@ -406,6 +406,12 @@

    Authors

    +
  • Zhongang Qi
  • + + +
  • Zihe Wang
  • + +
  • Ziwei Liu
  • diff --git a/docs/authors/page/5/index.html b/docs/authors/page/5/index.html index d228974f..642be5a1 100755 --- a/docs/authors/page/5/index.html +++ b/docs/authors/page/5/index.html @@ -424,7 +424,7 @@

    Authors

  • Feiping Nie
  • -
  • Guangyao Li*
  • +
  • Guangyao Li
  • Han Zhang
  • diff --git a/docs/authors/page/6/index.html b/docs/authors/page/6/index.html index 096f7bb0..8a8e348c 100755 --- a/docs/authors/page/6/index.html +++ b/docs/authors/page/6/index.html @@ -427,13 +427,13 @@

    Authors

  • Jian Wang
  • -
  • Jingyuan Wen
  • +
  • Jingxian Lu
  • -
  • Jona Otholt
  • +
  • Jingyuan Wen
  • -
  • Juncheng Ma
  • +
  • Jona Otholt
  • diff --git a/docs/authors/page/7/index.html b/docs/authors/page/7/index.html index ff2d443e..62e06d49 100755 --- a/docs/authors/page/7/index.html +++ b/docs/authors/page/7/index.html @@ -406,6 +406,9 @@

    Authors

    +
  • Juncheng Ma
  • + +
  • Junyu Gao
  • @@ -432,9 +435,6 @@

    Authors

  • Mengyue Wu
  • - -
  • Minyue Jiang
  • - diff --git a/docs/authors/page/8/index.html b/docs/authors/page/8/index.html index 8523ade9..7fc56c5a 100755 --- a/docs/authors/page/8/index.html +++ b/docs/authors/page/8/index.html @@ -406,6 +406,9 @@

    Authors

    +
  • Minyue Jiang
  • + +
  • Neil M Robertson
  • @@ -432,9 +435,6 @@

    Authors

  • Qingzhong Wang*
  • - -
  • Rong Wang
  • - diff --git a/docs/authors/page/9/index.html b/docs/authors/page/9/index.html index 7a8e7a8c..2c091227 100755 --- a/docs/authors/page/9/index.html +++ b/docs/authors/page/9/index.html @@ -406,6 +406,9 @@

    Authors

    +
  • Rong Wang
  • + +
  • Rui Qian
  • @@ -432,9 +435,6 @@

    Authors

  • Tao Wu
  • - -
  • Tianyang Wang
  • - diff --git a/docs/authors/ruoxuan-feng/index.html b/docs/authors/ruoxuan-feng/index.html index 363da9f1..f7433cec 100755 --- a/docs/authors/ruoxuan-feng/index.html +++ b/docs/authors/ruoxuan-feng/index.html @@ -398,6 +398,10 @@

    Latest

    MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning +
  • + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation +
  • +
  • Revisiting Pre-training in Audio-Visual Learning
  • diff --git a/docs/authors/ruoxuan-feng/index.xml b/docs/authors/ruoxuan-feng/index.xml index 36299a4f..5982e9cd 100755 --- a/docs/authors/ruoxuan-feng/index.xml +++ b/docs/authors/ruoxuan-feng/index.xml @@ -36,6 +36,14 @@
    + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + Revisiting Pre-training in Audio-Visual Learning /publication/revisiting-pre-training-in-audio-visual-learning/ diff --git a/docs/authors/wenke-ma/index.html b/docs/authors/wenke-ma/index.html new file mode 100644 index 00000000..ef1490f5 --- /dev/null +++ b/docs/authors/wenke-ma/index.html @@ -0,0 +1,516 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + + + +
    +

    Wenke Ma

    +
    + + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +

    + copyright © 2024 GeWu-Lab +
    + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

    +
    + +
    + + + + + + + diff --git a/docs/authors/wenke-ma/index.xml b/docs/authors/wenke-ma/index.xml new file mode 100644 index 00000000..ee4afa6f --- /dev/null +++ b/docs/authors/wenke-ma/index.xml @@ -0,0 +1,24 @@ + + + + Wenke Ma | GeWu-Lab + /authors/wenke-ma/ + + Wenke Ma + Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab + + /img/logo.png + Wenke Ma + /authors/wenke-ma/ + + + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + + + diff --git a/docs/authors/wenke-xia/index.html b/docs/authors/wenke-xia/index.html index 30abbe2d..7282d976 100755 --- a/docs/authors/wenke-xia/index.html +++ b/docs/authors/wenke-xia/index.html @@ -115,7 +115,7 @@ - + @@ -370,7 +370,7 @@

    Search

    -

    Wenke Xia

    +

    Wenke Xia*

    @@ -398,6 +398,10 @@

    Latest

    Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs +
  • + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance +
  • +
  • Revisiting Pre-training in Audio-Visual Learning
  • diff --git a/docs/authors/wenke-xia/index.xml b/docs/authors/wenke-xia/index.xml index 5a8bdb20..2ca99b22 100755 --- a/docs/authors/wenke-xia/index.xml +++ b/docs/authors/wenke-xia/index.xml @@ -1,14 +1,14 @@ - Wenke Xia | GeWu-Lab + Wenke Xia* | GeWu-Lab /authors/wenke-xia/ - Wenke Xia + Wenke Xia* Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab /img/logo.png - Wenke Xia + Wenke Xia* /authors/wenke-xia/ @@ -36,6 +36,14 @@
    + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Revisiting Pre-training in Audio-Visual Learning /publication/revisiting-pre-training-in-audio-visual-learning/ diff --git a/docs/authors/xuelong-li/index.html b/docs/authors/xuelong-li/index.html index cba2e7f4..237cb6ca 100755 --- a/docs/authors/xuelong-li/index.html +++ b/docs/authors/xuelong-li/index.html @@ -418,6 +418,10 @@

    Latest

    Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs +
  • + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance +
  • +
  • Large Graph Hashing with Spectral Rotation
  • @@ -430,6 +434,10 @@

    Latest

    Listen to the Image +
  • + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation +
  • +
  • Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis
  • diff --git a/docs/authors/xuelong-li/index.xml b/docs/authors/xuelong-li/index.xml index 88f8a67e..9d668e9b 100755 --- a/docs/authors/xuelong-li/index.xml +++ b/docs/authors/xuelong-li/index.xml @@ -76,6 +76,14 @@
    + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Large Graph Hashing with Spectral Rotation /publication/large-graph-hashing-with-spectral-rotation/ @@ -100,6 +108,14 @@ + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis /publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/ diff --git a/docs/authors/yapeng-tian/index.html b/docs/authors/yapeng-tian/index.html index 93aaf475..6a55ebe2 100755 --- a/docs/authors/yapeng-tian/index.html +++ b/docs/authors/yapeng-tian/index.html @@ -115,7 +115,7 @@ - + @@ -370,7 +370,7 @@

    Search

    -

    Yapeng Tian*

    +

    Yapeng Tian

    diff --git a/docs/authors/yapeng-tian/index.xml b/docs/authors/yapeng-tian/index.xml index 7c92e1b7..451a1056 100755 --- a/docs/authors/yapeng-tian/index.xml +++ b/docs/authors/yapeng-tian/index.xml @@ -1,14 +1,14 @@ - Yapeng Tian* | GeWu-Lab + Yapeng Tian | GeWu-Lab /authors/yapeng-tian/ - Yapeng Tian* + Yapeng Tian Source Themes Academic (https://sourcethemes.com/academic/)en-uscopyright © 2024 GeWu-Lab /img/logo.png - Yapeng Tian* + Yapeng Tian /authors/yapeng-tian/ diff --git a/docs/authors/zhigang-wang/index.html b/docs/authors/zhigang-wang/index.html index 56cdc96b..a75fd15f 100755 --- a/docs/authors/zhigang-wang/index.html +++ b/docs/authors/zhigang-wang/index.html @@ -394,6 +394,10 @@

    Latest

    Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs +
  • + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance +
  • +
  • Unsupervised Multi-Source Domain Adaptation for Person Re-Identification
  • diff --git a/docs/authors/zhigang-wang/index.xml b/docs/authors/zhigang-wang/index.xml index 0a2b2360..59027e9d 100755 --- a/docs/authors/zhigang-wang/index.xml +++ b/docs/authors/zhigang-wang/index.xml @@ -28,6 +28,14 @@
    + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Unsupervised Multi-Source Domain Adaptation for Person Re-Identification /publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/ diff --git a/docs/index.json b/docs/index.json index b6750a25..58182903 100755 --- a/docs/index.json +++ b/docs/index.json @@ -1 +1 @@ -[{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"4e73f707a3c1da0c5d8d165361161c7b","permalink":"/authors/19_ruize/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/19_ruize/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ruize Xu","type":"authors"},{"authors":null,"categories":null,"content":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who are interested in my research domain are very welcome and do not hesitate to contact me actively. For more information, please visit his personal homepage. Valar Morghulis!\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"537de72d4cb178cea6fbf2b2a92ea589","permalink":"/authors/20_guangyao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_guangyao/","section":"authors","summary":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who","tags":null,"title":"Guangyao Li","type":"authors"},{"authors":null,"categories":null,"content":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"22debf3f166bda4bfb28c8317489f918","permalink":"/authors/20_xiaokang/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xiaokang/","section":"authors","summary":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.","tags":null,"title":"Xiaokang Peng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"55a49bcd8ae300a0362a45302ca97c26","permalink":"/authors/20_xuemin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xuemin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Xuemin Liu","type":"authors"},{"authors":null,"categories":null,"content":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"033ae9c233d8ca15172e0f0eb482735e","permalink":"/authors/20_yixin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_yixin/","section":"authors","summary":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.","tags":null,"title":"Yixin Xu","type":"authors"},{"authors":null,"categories":null,"content":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"9434b9dca31f1f23a676f2b869e0c881","permalink":"/authors/21_ruiqian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_ruiqian/","section":"authors","summary":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.","tags":null,"title":"Rui Qian","type":"authors"},{"authors":null,"categories":null,"content":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"57b1d4e29185f3870d53fc65c766173e","permalink":"/authors/21_yake/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_yake/","section":"authors","summary":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.","tags":null,"title":"Yake Wei","type":"authors"},{"authors":null,"categories":null,"content":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c95476ad24cc214056b3d2c5e8c90f17","permalink":"/authors/22_andong/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_andong/","section":"authors","summary":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.","tags":null,"title":"Andong Deng","type":"authors"},{"authors":null,"categories":null,"content":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a2791369e75b13b52139d9860293bdd5","permalink":"/authors/22_wenke/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenke/","section":"authors","summary":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.","tags":null,"title":"Wenke Xia","type":"authors"},{"authors":null,"categories":null,"content":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"cd37724dba9b446f1c1307e40cd45632","permalink":"/authors/22_wenxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenxuan/","section":"authors","summary":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.","tags":null,"title":"Wenxuan Hou","type":"authors"},{"authors":null,"categories":null,"content":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a389590984a0c3fb50de499f8df2d4c0","permalink":"/authors/22_xincheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_xincheng/","section":"authors","summary":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.","tags":null,"title":"Xincheng Pang","type":"authors"},{"authors":null,"categories":null,"content":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"d884fc3eb1e2b2382def5073cec5e105","permalink":"/authors/22_zequn/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_zequn/","section":"authors","summary":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.","tags":null,"title":"Zequn Yang","type":"authors"},{"authors":null,"categories":null,"content":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"0f875044223f8afd458b089859ba38d8","permalink":"/authors/23_henghui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_henghui/","section":"authors","summary":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.","tags":null,"title":"Henghui Du","type":"authors"},{"authors":null,"categories":null,"content":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"8808a5aa1460c5cb4fad660d28f8520a","permalink":"/authors/23_jiahao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jiahao/","section":"authors","summary":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.","tags":null,"title":"Jiahao Li","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"1697602eb95e74d0fb1a9247c1f07489","permalink":"/authors/23_jianghan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jianghan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jianghan Chao","type":"authors"},{"authors":null,"categories":null,"content":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bdeafc1f9127d19078299ad17ddcf547","permalink":"/authors/23_jingxian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jingxian/","section":"authors","summary":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.","tags":null,"title":"Jingxian Lu","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"b1f3ebd7d0f58e6a501810a383c4a9ed","permalink":"/authors/23_jinlin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jinlin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jinlin Li","type":"authors"},{"authors":null,"categories":null,"content":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"874c09024781e4fd5375423eaef9c9e8","permalink":"/authors/23_juncheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_juncheng/","section":"authors","summary":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.","tags":null,"title":"Juncheng Ma","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"834fe556c30cd4180a6dc4c692fd63d9","permalink":"/authors/23_liangce/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_liangce/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ce Liang","type":"authors"},{"authors":null,"categories":null,"content":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bf84fe39ef0b614af0ae82d08359c784","permalink":"/authors/23_peiwen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_peiwen/","section":"authors","summary":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.","tags":null,"title":"Peiwen Sun","type":"authors"},{"authors":null,"categories":null,"content":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c29a63de0242659b43a43451fc077046","permalink":"/authors/23_ruoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_ruoxuan/","section":"authors","summary":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.","tags":null,"title":"Ruoxuan Feng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"364786f50ed04bbfb2309f8069cdbe90","permalink":"/authors/23_shaoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_shaoxuan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Shaoxuan Xu","type":"authors"},{"authors":null,"categories":null,"content":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"062e874f9d4216ee7c15e6afe41e1631","permalink":"/authors/23_siwei/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_siwei/","section":"authors","summary":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.","tags":null,"title":"Siwei Li","type":"authors"},{"authors":null,"categories":null,"content":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bda305ecfaa132f6e49d2dd2566d0f25","permalink":"/authors/23_yaoting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_yaoting/","section":"authors","summary":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.","tags":null,"title":"Yaoting Wang","type":"authors"},{"authors":null,"categories":null,"content":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"7a6ee1988cb2fa93bfeee88a094c7489","permalink":"/authors/24_jirui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_jirui/","section":"authors","summary":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.","tags":null,"title":"JiRui Huang","type":"authors"},{"authors":null,"categories":null,"content":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"186e15560cfa29bcd45c618efc625779","permalink":"/authors/24_yuchen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_yuchen/","section":"authors","summary":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.","tags":null,"title":"Yuchen Li","type":"authors"},{"authors":["dihu"],"categories":null,"content":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of cognitive neuroscience and has wrote one study note during his undergraduate. Inspired by what he learned from cognitive neuroscience, and what he observed and deliberated from the daily-life, he strongly convinced that the pervasive, free, natural multimodal messages can provide sufficient information for perceiving, learning and understanding environment, even the agent itself, which promisingly makes multimodal learning become one of the key to achieve machine intelligence.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"2525497d367e79493fd32b198b28f040","permalink":"/authors/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/admin/","section":"authors","summary":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of","tags":null,"title":"Di Hu","type":"authors"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"852b491b0dcadb44b8f099f931db74c4","permalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"A Two-Stage Framework for Multiple Sound-Source Localization","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"b21459d2cd2aa98d5a771a396df3c29e","permalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","section":"publication","summary":"","tags":null,"title":"Ambient Sound Helps: Audiovisual Crowd Counting in Extreme Conditions","type":"publication"},{"authors":["Wenke Xia*","Xu Zhao*","Xincheng Pang","Changqing Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"7a5ff9681de843469038165a230c4f87","permalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","section":"publication","summary":"","tags":null,"title":"Balanced Audiovisual Dataset for Imbalance Analysis","type":"publication"},{"authors":["Xiaokang Peng*","Yake Wei*","Andong Deng","Dong Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1cdda2159c4adeb4f31cb4e7f1a5ab8a","permalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","section":"publication","summary":"","tags":null,"title":"Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Yuanchao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e2d14df72502e78a30f83d09310b98b6","permalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","section":"publication","summary":"","tags":null,"title":"Can Textual Semantics Mitigate Sounding Object SegmentationPreference?","type":"publication"},{"authors":["Di Hu","Yake Wei","Rui Qian","Weiyao Lin","Ruihua Song","Ji-Rong Wen"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"91e67073102678aec9799732ceef49f3","permalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","section":"publication","summary":"","tags":null,"title":"Class-aware Sounding Objects Localization via Audiovisual Correspondence","type":"publication"},{"authors":["Yapeng Tian*","Di Hu*","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c0d82a52007e4e9ab50a2cfafdc4ac17","permalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","section":"publication","summary":"","tags":null,"title":"Co-Learn Sounding Object Visual Grounding and Visually Indicated Sound Separation in A Cycle","type":"publication"},{"authors":["Di Hu","Xuhong Li","Lichao Mou","Pu Jin","Dong Chen","Liping Jing","Xiaoxiang Zhu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c7688dd14aa743d0b927f94d97854f27","permalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","section":"publication","summary":"","tags":null,"title":"Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac02b15b850ff085e6c9ad497f3a130c","permalink":"/publication/curriculum-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/curriculum-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Curriculum Audiovisual Learning","type":"publication"},{"authors":["Yapeng Tian","Di Hu","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"cd0308a1bfb55705c394057955f2375d","permalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","section":"publication","summary":"","tags":null,"title":"Cyclic Co-Learning of Sounding Object Visual Grounding and Sound Separation","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"202776673a51788c119f1451c9e313c2","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"00f72a8fe1deeb265958a59b94c2cd33","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"f6c0a9a658cdceee78bd291860181d99","permalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","section":"publication","summary":"","tags":null,"title":"Deep Linear Discriminant Analysis Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d1466a6c42ba930502049d24243f8b62","permalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Deep Multimodal Clustering for Unsupervised Audiovisual Learning Representation","type":"publication"},{"authors":["Di Hu - Chengze Wang - Feiping Nie - Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9e4cd76d6b972d54b50c190779f639a5","permalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","section":"publication","summary":"","tags":null,"title":"Dense Multimodal Fusion for Hierarchically Joint Representation","type":"publication"},{"authors":["Xincheng Pang","Wenke Xia","Zhigang Wang","Bin Zhao","Di Hu","Dong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"832f5776c5daa77fa5df21ce843a3196","permalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","section":"publication","summary":"","tags":null,"title":"Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection","type":"publication"},{"authors":["Yake Wei","Siwei Li","Ruoxuan Feng","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"591c348a8e03f441318436eb005ae2cc","permalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","section":"publication","summary":"","tags":null,"title":"Diagnosing and Re-learning for Balanced Multimodal Learning","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"4b1e10b4327cca00dfd58162571a2f8c","permalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","section":"publication","summary":"","tags":null,"title":"Discrete Spectral Hashing for Efficient Similarity Retrieval","type":"publication"},{"authors":["Di Hu","Rui Qian","Minyue Jiang","Xiao Tan","Shilei Wen","Errui Ding","Weiyao Lin","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6953eeac03ee85322e85eece2eeeb84","permalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","section":"publication","summary":"","tags":null,"title":"Discriminative Sounding Objects Localization via Self-supervised Audiovisual Matching","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiaoxiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3016d01c7b86e792f8778f7aba6fc44d","permalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","section":"publication","summary":"","tags":null,"title":"Does Ambient Sound Help? - Audiovisual Crowd Counting","type":"publication"},{"authors":["Yake Wei","Ruoxuan Feng","Zihe Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"0278c6a7c52909fa5c55eaf522569e7f","permalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","section":"publication","summary":"","tags":null,"title":"Enhancing Multi-modal Cooperation via Fine-grained Modality Valuation","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Di Hu","Hang Zhou","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"28bd51450c42258842f48363910f83c8","permalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"Exploiting Visual Context Semantics for Sound Source Localization","type":"publication"},{"authors":["Sijia Yang","Haoyi Xiong","Di Hu","Kaibo Xu","Licheng Wang","Peizhen Zhu","Zeyi Sun"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ed52bf34eef1f16fc89a0fc5c32fa152","permalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","section":"publication","summary":"","tags":null,"title":"Generalising Combinatorial Discriminant Analysis through Conditioning Truncated Rayleigh Flow","type":"publication"},{"authors":["Zequn Yang","Han Zhang","Yake Wei","Zheng Wang","Feiping Nie","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"76c72a76e4cf8516d166a780e270c79b","permalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","section":"publication","summary":"","tags":null,"title":"Geometric-Inspired Graph-based Incomplete Multi-view Clustering","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8fe03bbbdab04c3ee4ecc7e01ecd723c","permalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Heterogeneous Scene Analysis via Self-supervised Audiovisual Learning","type":"publication"},{"authors":["Xuelong Li","Di Hu","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1850ab6a7473c571586aed28d796ac66","permalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","section":"publication","summary":"","tags":null,"title":"Image2song: Song Retrieval via Bridging Image Content and Lyric Words","type":"publication"},{"authors":["Wenke Xia","Dong Wang","Xincheng Pang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"82a334df3b6181644b600e4679ce595c","permalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","section":"publication","summary":"","tags":null,"title":"Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs","type":"publication"},{"authors":["Xuelong Li","Di Hu","Feiping Nie"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"24881bb5f959ea9f061fb67469d72eb9","permalink":"/publication/large-graph-hashing-with-spectral-rotation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/large-graph-hashing-with-spectral-rotation/","section":"publication","summary":"","tags":null,"title":"Large Graph Hashing with Spectral Rotation","type":"publication"},{"authors":["Yake Wei","Di Hu","Yapeng Tian","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"67b2f40c745acaa698a385e2742a25bc","permalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","section":"publication","summary":"","tags":null,"title":"Learning in Audio-visual Context: A Review, Analysis, and New Perspective","type":"publication"},{"authors":["Guangyao Li*","Yake Wei*","Yapeng Tian*","Chenliang Xu","Ji-Rong Wen","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"125a97cdaa82fb5a0ec455cfd53c1b46","permalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","section":"publication","summary":"","tags":null,"title":"Learning to Answer Questions in Dynamic Audio-Visual Scenarios","type":"publication"},{"authors":["Di Hu","Dong Wang","Xuelong Li","Feiping Nie","Qi Wang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c716bb52e5e46a2dbaebc46fda1517d6","permalink":"/publication/listen-to-the-image/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/listen-to-the-image/","section":"publication","summary":"","tags":null,"title":"Listen to the Image","type":"publication"},{"authors":["Ruize Xu","Ruoxuan Feng","Shi-xiong Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8e1ed6fc418000d90eed8231ce30fa73","permalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","section":"publication","summary":"","tags":null,"title":"MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning","type":"publication"},{"authors":["Yake Wei","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ad6411f0202e0562a67a75820ff098f","permalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","section":"publication","summary":"","tags":null,"title":"MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning","type":"publication"},{"authors":["Guangyao Li","Yixin Xu","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"38daed7d60d2831123ddca90ac47d9b7","permalink":"/publication/multi-scale-attention-for-audio-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multi-scale-attention-for-audio-question-answering/","section":"publication","summary":"","tags":null,"title":"Multi-Scale Attention for Audio Question Answering","type":"publication"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"88c9d48496c44a5980763aa946676e9e","permalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","section":"publication","summary":"","tags":null,"title":"Multiple Sound Sources Localization from Coarse to Fine","type":"publication"},{"authors":["Ziyun Li","Xinshao Wang","Haojin Yang","Di Hu","Neil M Robertson","David A Clifton","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"a48ea4ca10463e6ef980903ef312977d","permalink":"/publication/not-all-knowledge-is-created-equal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/not-all-knowledge-is-created-equal/","section":"publication","summary":"","tags":null,"title":"Not All Knowledge Is Created Equal","type":"publication"},{"authors":["Guangyao Li","Wenxuan Hou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"545100c95da731d9faeb7037b5801449","permalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","section":"publication","summary":"","tags":null,"title":"Progressive Spatio-temporal Perception for Audio-Visual Question Answering","type":"publication"},{"authors":["Yaoting Wang*","Weisong Liu*","Guangyao Li","Jian Ding","Di Hu","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d414aab41857970b60155d360ceac88","permalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","section":"publication","summary":"","tags":null,"title":"Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer","type":"publication"},{"authors":["Zequn Yang","Yake Wei","Ce Liang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d069d78586930bf2dd726ae7c0b00c9b","permalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","section":"publication","summary":"","tags":null,"title":"Quantifying and Enhancing Multi-modal Robustness with Modality Preference","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Dongzhan Zhou","Guangyao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e787cc7b340511ed0ad617eaf61af942","permalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","section":"publication","summary":"","tags":null,"title":"Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes","type":"publication"},{"authors":["Ruoxuan Feng","Wenke Xia","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"450f11c7cb976aa1013ed40cd3963388","permalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","section":"publication","summary":"","tags":null,"title":"Revisiting Pre-training in Audio-Visual Learning","type":"publication"},{"authors":["Wenke Xia","Xingjian Li","Andong Deng","Haoyi Xiong","Dejing Dou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d68814ab18c4fd432535b2592c31988","permalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","section":"publication","summary":"","tags":null,"title":"Robust Cross-modal Knowledge Distillation for Unconstrained Videos","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Wanli Ouyang","Hang Zhou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"acd80d91071719018f44e8766871cb74","permalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","section":"publication","summary":"","tags":null,"title":"SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance","type":"publication"},{"authors":["Konrad Heidler","Lichao Mou","Di Hu","Pu Jin","Guangyao Li","Chuang Gan","Ji-Rong Wen","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"36c9fd21445495f69bad705471393094","permalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","section":"publication","summary":"","tags":null,"title":"Self-supervised Audiovisual Representation Learning for Remote Sensing Data","type":"publication"},{"authors":["Di Hu","Zheng Wang","Feiping Nie","Rong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac1ac86aa9c1772d446b7594a05d9100","permalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","section":"publication","summary":"","tags":null,"title":"Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis","type":"publication"},{"authors":["Dongzhan Zhou","Xinchi Zhou","Di Hu","Hang Zhou","Lei Bai","Ziwei Liu","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3f2c9d5779b3cec3c9b69a845335b218","permalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","section":"publication","summary":"","tags":null,"title":"SepFusion: Finding Optimal Fusion Structures for Visual Sound Separation","type":"publication"},{"authors":["Tao Wu","Xuewei Li","Zhongang Qi","Di Hu","Xintao Wang","Ying Shan","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"75b3553d0dff4fb43ea7284e9d6f8d1c","permalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","section":"publication","summary":"","tags":null,"title":"SphereDiffusion: Spherical Geometry-aware Distortion Resilient Diffusion Model","type":"publication"},{"authors":["Juncheng Ma","Peiwen Sun","Yaoting Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1fe41f212fd0141fdf179a000dd9df81","permalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","section":"publication","summary":"","tags":null,"title":"Stepping Stones: A Progressive Training Strategy for Audio-Visual Semantic Segmentation","type":"publication"},{"authors":["ZiYun Li","Jona Otholt","Ben Dai","Di Hu","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"50b53591fe6d761222acbe7d191d3e47","permalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","section":"publication","summary":"","tags":null,"title":"Supervised Knowledge May Hurt Novel Class Discovery Performance","type":"publication"},{"authors":["Di Hu","Xuelong Li","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6704b0eb55495bb979be6fcbb8243ae","permalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","section":"publication","summary":"","tags":null,"title":"Temporal Multimodal Learning in Audiovisual Speech Recognition","type":"publication"},{"authors":["Dong Wang","Di Hu","Xingjian Li","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"807bb234ac2724175550dbdf52f64d08","permalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","section":"publication","summary":"","tags":null,"title":"Temporal Relational Modeling with Self-Supervision for Action Segmentation","type":"publication"},{"authors":["Hongpeng Lin*","Ludan Ruan*","Wenke Xia*","Peiyu Liu","Jingyuan Wen","Yixin Xu","Di Hu","Ruihua Song","Wayne Xin Zhao","Qin Jin","Zhiwu Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"dd91d75ebb359650640b7b6c75634dff","permalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","section":"publication","summary":"","tags":null,"title":"TikTalk: A Video-Based Dialogue Dataset for Multi-Modal Chitchat in Real World","type":"publication"},{"authors":["Xingjian Li","Di Hu","Xuhong Li","Haoyi Xiong","Zhi Ye","Zhipeng Wang","Chengzhong Xu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"5b40a464bbfccb601c6d4c37e85cf81e","permalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","section":"publication","summary":"","tags":null,"title":"Towards Accurate Knowledge Transfer via Target-awareness Representation Disentanglement","type":"publication"},{"authors":["Andong Deng","Xingjian Li","Di Hu","Tianyang Wang","Haoyi Xiong","Chengzhong Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"113edd12d767a54c1fdd10685167cd5c","permalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","section":"publication","summary":"","tags":null,"title":"Towards Inadequately Pre-trained Models in Transfer Learning","type":"publication"},{"authors":["Wenxuan Hou*","Guangyao Li*","Yapeng Tian","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"56c2e256bf8d4a20cdffe034f430aaef","permalink":"/publication/towards-long-form-audio-visual-video-understanding/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-long-form-audio-visual-video-understanding/","section":"publication","summary":"","tags":null,"title":"Towards Long Form Audio-visual Video Understanding","type":"publication"},{"authors":["Zechen Bai","Zhigang Wang","Jian Wang","Di Hu","Errui Ding"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9905f139a565b4f5eabfc5902965f851","permalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","section":"publication","summary":"","tags":null,"title":"Unsupervised Multi-Source Domain Adaptation for Person Re-Identification","type":"publication"},{"authors":["Peiwen Sun","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ff959ec7e9a3da6203370e48a939fd1","permalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","section":"publication","summary":"","tags":null,"title":"Unveiling and Mitigating Bias in Audio Visual Segmentation (ACM MM Oral)","type":"publication"},{"authors":["Xian Liu","Rui Qian","Hang Zhou","Di Hu","Weiyao Lin","Ziwei Liu","Bolei Zhou","Xiaowei Zhou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ca462fd19e2017e2ecb2b26a145ef250","permalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","section":"publication","summary":"","tags":null,"title":"Visual Sound Localization in-the-Wild by Cross-Modal Interference Erasing","type":"publication"}] \ No newline at end of file +[{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"4e73f707a3c1da0c5d8d165361161c7b","permalink":"/authors/19_ruize/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/19_ruize/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ruize Xu","type":"authors"},{"authors":null,"categories":null,"content":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who are interested in my research domain are very welcome and do not hesitate to contact me actively. For more information, please visit his personal homepage. Valar Morghulis!\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"537de72d4cb178cea6fbf2b2a92ea589","permalink":"/authors/20_guangyao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_guangyao/","section":"authors","summary":"Guangyao is a Ph.D. Candidate at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He got his master degree at China Agricultural University in 2020 and got into GeWu-Lab since then. His recently research interests include audio-visual learning and scene understanding. And he hopes to brave the no-man\u0026rsquo;s land on the road of scientific research and make warm artificial intelligence research! People who","tags":null,"title":"Guangyao Li","type":"authors"},{"authors":null,"categories":null,"content":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"22debf3f166bda4bfb28c8317489f918","permalink":"/authors/20_xiaokang/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xiaokang/","section":"authors","summary":"Xiaokang is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He got his undergraduate degree at School of Information, Renmin University of China in 2020 and got into GeWu-Lab since then. He is interested in multi-modal learning and perception, and optimization mechanism design. And he is also devoted to help these visually impaired with AI in both technology and practice.","tags":null,"title":"Xiaokang Peng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"55a49bcd8ae300a0362a45302ca97c26","permalink":"/authors/20_xuemin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_xuemin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Xuemin Liu","type":"authors"},{"authors":null,"categories":null,"content":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"033ae9c233d8ca15172e0f0eb482735e","permalink":"/authors/20_yixin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/20_yixin/","section":"authors","summary":"Yixin is a master student at Gaoling School of Artificial Intelligence, Renmin University of China. His main research topics are Multi-modal Scene Perception and Self-surpervised Representation Learning. Now he is working on video understanding and speaker diarization task for complex speech scenario. He is also interested in Internet finance, and has got his Bachelor of Finance in Renmin University of China besides the Computer Science degree.","tags":null,"title":"Yixin Xu","type":"authors"},{"authors":null,"categories":null,"content":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"9434b9dca31f1f23a676f2b869e0c881","permalink":"/authors/21_ruiqian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_ruiqian/","section":"authors","summary":"Rui is interested in computer vision and machine learning, and has done some research on video representation learning and joint audio-visual learning. During his undergraduate he works with Prof. Di Hu. Now Rui is a Ph.D. student in Multi-Media Lab at The Chinese University of Hong Kong, supervised by Prof. Dahua Lin.","tags":null,"title":"Rui Qian","type":"authors"},{"authors":null,"categories":null,"content":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"57b1d4e29185f3870d53fc65c766173e","permalink":"/authors/21_yake/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/21_yake/","section":"authors","summary":"Yake is a PhD student at Gaoling School of Artificial Intelligence, Renmin University of China. She received her bachelor\u0026rsquo;s degree in Computer Science and Technology from University of Electronic Science and Technology of China in 2021. Now her research interests focus on the effective mechanism of multi-modal learning.","tags":null,"title":"Yake Wei","type":"authors"},{"authors":null,"categories":null,"content":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c95476ad24cc214056b3d2c5e8c90f17","permalink":"/authors/22_andong/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_andong/","section":"authors","summary":"Andong Deng spent a wonderful year at GeWu Lab doing research about multimodal learning with Dr. Di Hu from 2021 to 2022. Now he is an upcoming PhD student in 2022 Fall at Center for Research in Computer Vision, University of Central Florida, advised by Dr. Chen Chen. His research interests include multi-modal learning, video understanding and 3D vision.","tags":null,"title":"Andong Deng","type":"authors"},{"authors":null,"categories":null,"content":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a2791369e75b13b52139d9860293bdd5","permalink":"/authors/22_wenke/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenke/","section":"authors","summary":"Wenke is a Ph.D student since 2022 Fall at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. His research interests include reinforcement learning and embodied AI. Now, he focus on building a generalizable manipulation policy with computer vision.","tags":null,"title":"Wenke Xia","type":"authors"},{"authors":null,"categories":null,"content":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"cd37724dba9b446f1c1307e40cd45632","permalink":"/authors/22_wenxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_wenxuan/","section":"authors","summary":"Wenxuan is a second-year Ph.D student in the GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He has got his bachelor\u0026rsquo;s degree and master\u0026rsquo;s degree in Northwestern Polytechnical University and Xi\u0026rsquo;an Jiaotong University, respectively. Now his main research focuses on multimodal learning towards real-world scene understanding, aiming to guide the machine to perceive and understand natural scenes like human beings.","tags":null,"title":"Wenxuan Hou","type":"authors"},{"authors":null,"categories":null,"content":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"a389590984a0c3fb50de499f8df2d4c0","permalink":"/authors/22_xincheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_xincheng/","section":"authors","summary":"Xincheng is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. Currently his research interests focus on scene understanding in embodied ai with multi-modal.","tags":null,"title":"Xincheng Pang","type":"authors"},{"authors":null,"categories":null,"content":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"d884fc3eb1e2b2382def5073cec5e105","permalink":"/authors/22_zequn/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/22_zequn/","section":"authors","summary":"Zequn is a second-year Ph.D. student at GeWu-Lab, Gaoling School of Artificial Intelligence, Renmin University of China. He currently focuses on the mechanism of multi-modal learning, including theoretical comprehension and algorithm design. He also has a keen interest in developing efficient and effective multi-view clustering techniques utilizing machine learning methods.","tags":null,"title":"Zequn Yang","type":"authors"},{"authors":null,"categories":null,"content":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"0f875044223f8afd458b089859ba38d8","permalink":"/authors/23_henghui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_henghui/","section":"authors","summary":"Henghui is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Dalian University of Technology in 2023. Currently his research instrests focus on Large language Models and cross-modal generation.","tags":null,"title":"Henghui Du","type":"authors"},{"authors":null,"categories":null,"content":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"8808a5aa1460c5cb4fad660d28f8520a","permalink":"/authors/23_jiahao/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jiahao/","section":"authors","summary":"Jiahao is a senior student of the School of Computer Science and Engineering, BUAA. He is interested in the interaction mechanism of multi-modal.","tags":null,"title":"Jiahao Li","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"1697602eb95e74d0fb1a9247c1f07489","permalink":"/authors/23_jianghan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jianghan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jianghan Chao","type":"authors"},{"authors":null,"categories":null,"content":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bdeafc1f9127d19078299ad17ddcf547","permalink":"/authors/23_jingxian/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jingxian/","section":"authors","summary":"Jingxian is a fourth-year student of Gaoling School of Artificial Intelligence, Renmin University of China. He is interested in robot manipulation and perception from interaction.","tags":null,"title":"Jingxian Lu","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"b1f3ebd7d0f58e6a501810a383c4a9ed","permalink":"/authors/23_jinlin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_jinlin/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Jinlin Li","type":"authors"},{"authors":null,"categories":null,"content":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"874c09024781e4fd5375423eaef9c9e8","permalink":"/authors/23_juncheng/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_juncheng/","section":"authors","summary":"Juncheng is a third-year student of School of Artificial Intelligence, University of Chinese Academy of Sciences. His research interests include audio-visual localization and segmentation.","tags":null,"title":"Juncheng Ma","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"834fe556c30cd4180a6dc4c692fd63d9","permalink":"/authors/23_liangce/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_liangce/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Ce Liang","type":"authors"},{"authors":null,"categories":null,"content":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bf84fe39ef0b614af0ae82d08359c784","permalink":"/authors/23_peiwen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_peiwen/","section":"authors","summary":"Peiwen is a second-year MPhil student of the Department of Artificial Intelligence, Beijing University of Posts and Telecommunications. He is interested in multimodal learning including sentiment, segmentation and foundation models.","tags":null,"title":"Peiwen Sun","type":"authors"},{"authors":null,"categories":null,"content":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"c29a63de0242659b43a43451fc077046","permalink":"/authors/23_ruoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_ruoxuan/","section":"authors","summary":"Ruoxuan is a first-year master student in GeWu-Lab at Gaoling School of Artificial Intelligence, Renmin University of China, advised by Prof. Di Hu. He is interested in multi-modal learning and embodied AI.","tags":null,"title":"Ruoxuan Feng","type":"authors"},{"authors":null,"categories":null,"content":"个人简介, 控制在600-800个英文字符之内\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"364786f50ed04bbfb2309f8069cdbe90","permalink":"/authors/23_shaoxuan/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_shaoxuan/","section":"authors","summary":"个人简介, 控制在600-800个英文字符之内","tags":null,"title":"Shaoxuan Xu","type":"authors"},{"authors":null,"categories":null,"content":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"062e874f9d4216ee7c15e6afe41e1631","permalink":"/authors/23_siwei/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_siwei/","section":"authors","summary":"Siwei is a fourth-year student of the Department of Electronic Engineering, Tsinghua University. He is interested in image editing with generative diffusion models and image deblurring.","tags":null,"title":"Siwei Li","type":"authors"},{"authors":null,"categories":null,"content":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"bda305ecfaa132f6e49d2dd2566d0f25","permalink":"/authors/23_yaoting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/23_yaoting/","section":"authors","summary":"Yaoting is currently working as an intern at the Deepwise AI Lab for multimodal medical data processing. He received his master\u0026rsquo;s degree from the University of Edinburgh in 2022. His research interests include multimodal deep learning, cross-modal transformers, and affective computing.","tags":null,"title":"Yaoting Wang","type":"authors"},{"authors":null,"categories":null,"content":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"7a6ee1988cb2fa93bfeee88a094c7489","permalink":"/authors/24_jirui/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_jirui/","section":"authors","summary":"Jirui is a second-year MPhil student of the School of Computer and Artificial Intelligence, Wuhan University of Technology. She is interested in multimodal understanding and cross-modal generation.","tags":null,"title":"JiRui Huang","type":"authors"},{"authors":null,"categories":null,"content":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"186e15560cfa29bcd45c618efc625779","permalink":"/authors/24_yuchen/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/24_yuchen/","section":"authors","summary":"Yuchen is a master student in GeWu-Lab at Renmin University of China, advised by Prof. Di Hu. He has got his bachelor\u0026rsquo;s degree in Zhejiang University in 2024.","tags":null,"title":"Yuchen Li","type":"authors"},{"authors":["dihu"],"categories":null,"content":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of cognitive neuroscience and has wrote one study note during his undergraduate. Inspired by what he learned from cognitive neuroscience, and what he observed and deliberated from the daily-life, he strongly convinced that the pervasive, free, natural multimodal messages can provide sufficient information for perceiving, learning and understanding environment, even the agent itself, which promisingly makes multimodal learning become one of the key to achieve machine intelligence.\n","date":-62135596800,"expirydate":-62135596800,"kind":"taxonomy","lang":"en","lastmod":-62135596800,"objectID":"2525497d367e79493fd32b198b28f040","permalink":"/authors/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/authors/admin/","section":"authors","summary":"Di Hu is tenure-track faculty at Gaoling School of Artificial Intelligence, Renmin University of China. Before that, he was previously a research scientist at Baidu Research. Di Hu obtained the Ph.D degree from Northwestern Polytechnical University in 2019, supervised by Xuelong Li. Currently, Di Hu is leading the GeWu Lab and exploring how to understand and interact with the world via the natural multimodal messages. He is an aficionado of","tags":null,"title":"Di Hu","type":"authors"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"852b491b0dcadb44b8f099f931db74c4","permalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/a-two-stage-framework-for-multiple-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"A Two-Stage Framework for Multiple Sound-Source Localization","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"b21459d2cd2aa98d5a771a396df3c29e","permalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ambient-sound-helps_-audiovisual-crowd-counting-in-extreme-conditions/","section":"publication","summary":"","tags":null,"title":"Ambient Sound Helps: Audiovisual Crowd Counting in Extreme Conditions","type":"publication"},{"authors":["Wenke Xia*","Xu Zhao*","Xincheng Pang","Changqing Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"7a5ff9681de843469038165a230c4f87","permalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-audiovisual-dataset-for-imbalance-analysis/","section":"publication","summary":"","tags":null,"title":"Balanced Audiovisual Dataset for Imbalance Analysis","type":"publication"},{"authors":["Xiaokang Peng*","Yake Wei*","Andong Deng","Dong Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1cdda2159c4adeb4f31cb4e7f1a5ab8a","permalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/","section":"publication","summary":"","tags":null,"title":"Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Yuanchao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e2d14df72502e78a30f83d09310b98b6","permalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/can-textual-semantics-mitigate-sounding-object-segmentationpreference/","section":"publication","summary":"","tags":null,"title":"Can Textual Semantics Mitigate Sounding Object SegmentationPreference?","type":"publication"},{"authors":["Di Hu","Yake Wei","Rui Qian","Weiyao Lin","Ruihua Song","Ji-Rong Wen"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"91e67073102678aec9799732ceef49f3","permalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/class-aware-sounding-objects-localization-via-audiovisual-correspondence/","section":"publication","summary":"","tags":null,"title":"Class-aware Sounding Objects Localization via Audiovisual Correspondence","type":"publication"},{"authors":["Yapeng Tian*","Di Hu*","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c0d82a52007e4e9ab50a2cfafdc4ac17","permalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/","section":"publication","summary":"","tags":null,"title":"Co-Learn Sounding Object Visual Grounding and Visually Indicated Sound Separation in A Cycle","type":"publication"},{"authors":["Di Hu","Xuhong Li","Lichao Mou","Pu Jin","Dong Chen","Liping Jing","Xiaoxiang Zhu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c7688dd14aa743d0b927f94d97854f27","permalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cross-task-transfer-for-geotagged-audiovisual-aerial-scene-recognition/","section":"publication","summary":"","tags":null,"title":"Cross-Task Transfer for Geotagged Audiovisual Aerial Scene Recognition","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac02b15b850ff085e6c9ad497f3a130c","permalink":"/publication/curriculum-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/curriculum-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Curriculum Audiovisual Learning","type":"publication"},{"authors":["Yapeng Tian","Di Hu","Chenliang Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"cd0308a1bfb55705c394057955f2375d","permalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/","section":"publication","summary":"","tags":null,"title":"Cyclic Co-Learning of Sounding Object Visual Grounding and Sound Separation","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"202776673a51788c119f1451c9e313c2","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing-journal/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"00f72a8fe1deeb265958a59b94c2cd33","permalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-binary-reconstruction-for-cross-modal-hashing/","section":"publication","summary":"","tags":null,"title":"Deep Binary Reconstruction for Cross-modal Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"f6c0a9a658cdceee78bd291860181d99","permalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-linear-discriminant-analysis-hashing-supplemental-material/","section":"publication","summary":"","tags":null,"title":"Deep Linear Discriminant Analysis Hashing","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d1466a6c42ba930502049d24243f8b62","permalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/deep-multimodal-clustering-for-unsupervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Deep Multimodal Clustering for Unsupervised Audiovisual Learning Representation","type":"publication"},{"authors":["Di Hu - Chengze Wang - Feiping Nie - Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9e4cd76d6b972d54b50c190779f639a5","permalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/dense-multimodal-fusion-for-hierarchically-joint-representation/","section":"publication","summary":"","tags":null,"title":"Dense Multimodal Fusion for Hierarchically Joint Representation","type":"publication"},{"authors":["Xincheng Pang","Wenke Xia","Zhigang Wang","Bin Zhao","Di Hu","Dong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"832f5776c5daa77fa5df21ce843a3196","permalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/","section":"publication","summary":"","tags":null,"title":"Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection","type":"publication"},{"authors":["Yake Wei","Siwei Li","Ruoxuan Feng","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"591c348a8e03f441318436eb005ae2cc","permalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/diagnosing-and-re-learning-for-balanced-multimodal-learning/","section":"publication","summary":"","tags":null,"title":"Diagnosing and Re-learning for Balanced Multimodal Learning","type":"publication"},{"authors":["Di Hu","Feiping Nie","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"4b1e10b4327cca00dfd58162571a2f8c","permalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discrete-spectral-hashing-for-efficient-similarity-retrieval/","section":"publication","summary":"","tags":null,"title":"Discrete Spectral Hashing for Efficient Similarity Retrieval","type":"publication"},{"authors":["Di Hu","Rui Qian","Minyue Jiang","Xiao Tan","Shilei Wen","Errui Ding","Weiyao Lin","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6953eeac03ee85322e85eece2eeeb84","permalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/discriminative-sounding-objects-localization-via-self-supervised-audiovisual-matching/","section":"publication","summary":"","tags":null,"title":"Discriminative Sounding Objects Localization via Self-supervised Audiovisual Matching","type":"publication"},{"authors":["Di Hu*","Lichao Mou*","Qingzhong Wang*","Junyu Gao","Yuansheng Hua","Dejing Dou","Xiaoxiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3016d01c7b86e792f8778f7aba6fc44d","permalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/does-ambient-sound-help_-audiovisual-crowd-counting/","section":"publication","summary":"","tags":null,"title":"Does Ambient Sound Help? - Audiovisual Crowd Counting","type":"publication"},{"authors":["Yake Wei","Ruoxuan Feng","Zihe Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"0278c6a7c52909fa5c55eaf522569e7f","permalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/enhancing-multi-modal-cooperation-via-fine-grained-modality-valuation/","section":"publication","summary":"","tags":null,"title":"Enhancing Multi-modal Cooperation via Fine-grained Modality Valuation","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Di Hu","Hang Zhou","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"28bd51450c42258842f48363910f83c8","permalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/exploiting-visual-context-semantics-for-sound-source-localization/","section":"publication","summary":"","tags":null,"title":"Exploiting Visual Context Semantics for Sound Source Localization","type":"publication"},{"authors":["Sijia Yang","Haoyi Xiong","Di Hu","Kaibo Xu","Licheng Wang","Peizhen Zhu","Zeyi Sun"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ed52bf34eef1f16fc89a0fc5c32fa152","permalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/generalising-combinatorial-discriminant-analysis-through-conditioning-truncated-rayleigh-flow/","section":"publication","summary":"","tags":null,"title":"Generalising Combinatorial Discriminant Analysis through Conditioning Truncated Rayleigh Flow","type":"publication"},{"authors":["Zequn Yang","Han Zhang","Yake Wei","Zheng Wang","Feiping Nie","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"76c72a76e4cf8516d166a780e270c79b","permalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/geometric-inspired-graph-based-incomplete-multi-view-clustering/","section":"publication","summary":"","tags":null,"title":"Geometric-Inspired Graph-based Incomplete Multi-view Clustering","type":"publication"},{"authors":["Di Hu","Zheng Wang","Haoyi Xiong","Dong Wang","Feiping Nie","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8fe03bbbdab04c3ee4ecc7e01ecd723c","permalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/heterogeneous-scene-analysis-via-self-supervised-audiovisual-learning/","section":"publication","summary":"","tags":null,"title":"Heterogeneous Scene Analysis via Self-supervised Audiovisual Learning","type":"publication"},{"authors":["Xuelong Li","Di Hu","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1850ab6a7473c571586aed28d796ac66","permalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/image2song-song-retrieval-via-bridging-image-content-and-lyric-words/","section":"publication","summary":"","tags":null,"title":"Image2song: Song Retrieval via Bridging Image Content and Lyric Words","type":"publication"},{"authors":["Wenke Xia","Dong Wang","Xincheng Pang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"82a334df3b6181644b600e4679ce595c","permalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/","section":"publication","summary":"","tags":null,"title":"Kinematic-aware Prompting for Generalizable Articulated Object Manipulation with LLMs","type":"publication"},{"authors":["Jingxian Lu","Wenke Xia","Dong Wang","Zhigang Wang","Bin Zhao","Di Hu","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"2cb0cd3b7dd67caebf2eae2ac616b156","permalink":"/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/","section":"publication","summary":"","tags":null,"title":"KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance","type":"publication"},{"authors":["Xuelong Li","Di Hu","Feiping Nie"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"24881bb5f959ea9f061fb67469d72eb9","permalink":"/publication/large-graph-hashing-with-spectral-rotation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/large-graph-hashing-with-spectral-rotation/","section":"publication","summary":"","tags":null,"title":"Large Graph Hashing with Spectral Rotation","type":"publication"},{"authors":["Yake Wei","Di Hu","Yapeng Tian","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"67b2f40c745acaa698a385e2742a25bc","permalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-in-audio-visual-context-a-review-analysis-and-new-perspective/","section":"publication","summary":"","tags":null,"title":"Learning in Audio-visual Context: A Review, Analysis, and New Perspective","type":"publication"},{"authors":["Guangyao Li*","Yake Wei*","Yapeng Tian*","Chenliang Xu","Ji-Rong Wen","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"125a97cdaa82fb5a0ec455cfd53c1b46","permalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/","section":"publication","summary":"","tags":null,"title":"Learning to Answer Questions in Dynamic Audio-Visual Scenarios","type":"publication"},{"authors":["Di Hu","Dong Wang","Xuelong Li","Feiping Nie","Qi Wang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"c716bb52e5e46a2dbaebc46fda1517d6","permalink":"/publication/listen-to-the-image/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/listen-to-the-image/","section":"publication","summary":"","tags":null,"title":"Listen to the Image","type":"publication"},{"authors":["Ruize Xu","Ruoxuan Feng","Shi-xiong Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"8e1ed6fc418000d90eed8231ce30fa73","permalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning/","section":"publication","summary":"","tags":null,"title":"MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning","type":"publication"},{"authors":["Yake Wei","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ad6411f0202e0562a67a75820ff098f","permalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/mmpareto-innocent-uni-modal-assistance-for-enhanced-multi-modal-learning/","section":"publication","summary":"","tags":null,"title":"MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning","type":"publication"},{"authors":["Guangyao Li","Yixin Xu","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"38daed7d60d2831123ddca90ac47d9b7","permalink":"/publication/multi-scale-attention-for-audio-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multi-scale-attention-for-audio-question-answering/","section":"publication","summary":"","tags":null,"title":"Multi-Scale Attention for Audio Question Answering","type":"publication"},{"authors":["Rui Qian","Di Hu","Heinrich Dinkel","Mengyue Wu","Ning Xu","Weiyao Lin"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"88c9d48496c44a5980763aa946676e9e","permalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/multiple-sound-sources-localization-from-coarse-to-fine/","section":"publication","summary":"","tags":null,"title":"Multiple Sound Sources Localization from Coarse to Fine","type":"publication"},{"authors":["Ziyun Li","Xinshao Wang","Haojin Yang","Di Hu","Neil M Robertson","David A Clifton","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"a48ea4ca10463e6ef980903ef312977d","permalink":"/publication/not-all-knowledge-is-created-equal/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/not-all-knowledge-is-created-equal/","section":"publication","summary":"","tags":null,"title":"Not All Knowledge Is Created Equal","type":"publication"},{"authors":["Ruoxuan Feng","Di Hu","Wenke Ma","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"891cffdde1feb31f3dc52292231f2969","permalink":"/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/","section":"publication","summary":"","tags":null,"title":"Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation","type":"publication"},{"authors":["Guangyao Li","Wenxuan Hou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"545100c95da731d9faeb7037b5801449","permalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/","section":"publication","summary":"","tags":null,"title":"Progressive Spatio-temporal Perception for Audio-Visual Question Answering","type":"publication"},{"authors":["Yaoting Wang*","Weisong Liu*","Guangyao Li","Jian Ding","Di Hu","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d414aab41857970b60155d360ceac88","permalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/","section":"publication","summary":"","tags":null,"title":"Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer","type":"publication"},{"authors":["Zequn Yang","Yake Wei","Ce Liang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d069d78586930bf2dd726ae7c0b00c9b","permalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/quantifying-and-enhancing-multi-modal-robustness-with-modality-preference/","section":"publication","summary":"","tags":null,"title":"Quantifying and Enhancing Multi-modal Robustness with Modality Preference","type":"publication"},{"authors":["Yaoting Wang*","Peiwen Sun*","Dongzhan Zhou","Guangyao Li","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"e787cc7b340511ed0ad617eaf61af942","permalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/ref-avs-refer-and-segment-objects-in-audio-visual-scenes/","section":"publication","summary":"","tags":null,"title":"Ref-AVS: Refer and Segment Objects in Audio-Visual Scenes","type":"publication"},{"authors":["Ruoxuan Feng","Wenke Xia","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"450f11c7cb976aa1013ed40cd3963388","permalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/revisiting-pre-training-in-audio-visual-learning/","section":"publication","summary":"","tags":null,"title":"Revisiting Pre-training in Audio-Visual Learning","type":"publication"},{"authors":["Wenke Xia","Xingjian Li","Andong Deng","Haoyi Xiong","Dejing Dou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6d68814ab18c4fd432535b2592c31988","permalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/","section":"publication","summary":"","tags":null,"title":"Robust Cross-modal Knowledge Distillation for Unconstrained Videos","type":"publication"},{"authors":["Xinchi Zhou","Dongzhan Zhou","Wanli Ouyang","Hang Zhou","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"acd80d91071719018f44e8766871cb74","permalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/seco-separating-unknown-musical-visual-sounds-with-consistency-guidance/","section":"publication","summary":"","tags":null,"title":"SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance","type":"publication"},{"authors":["Konrad Heidler","Lichao Mou","Di Hu","Pu Jin","Guangyao Li","Chuang Gan","Ji-Rong Wen","Xiao Xiang Zhu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"36c9fd21445495f69bad705471393094","permalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-audiovisual-representation-learning-for-remote-sensing-data/","section":"publication","summary":"","tags":null,"title":"Self-supervised Audiovisual Representation Learning for Remote Sensing Data","type":"publication"},{"authors":["Di Hu","Zheng Wang","Feiping Nie","Rong Wang","Xuelong Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ac1ac86aa9c1772d446b7594a05d9100","permalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/self-supervised-learning-for-heterogeneous-audiovisual-scene-analysis/","section":"publication","summary":"","tags":null,"title":"Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis","type":"publication"},{"authors":["Dongzhan Zhou","Xinchi Zhou","Di Hu","Hang Zhou","Lei Bai","Ziwei Liu","Wanli Ouyang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"3f2c9d5779b3cec3c9b69a845335b218","permalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/sepfusion_-finding-optimal-fusion-structures-for-visual-sound-separation/","section":"publication","summary":"","tags":null,"title":"SepFusion: Finding Optimal Fusion Structures for Visual Sound Separation","type":"publication"},{"authors":["Tao Wu","Xuewei Li","Zhongang Qi","Di Hu","Xintao Wang","Ying Shan","Xi Li"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"75b3553d0dff4fb43ea7284e9d6f8d1c","permalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/spherediffusion-spherical-geometry-aware-distortion-resilient-diffusion-model/","section":"publication","summary":"","tags":null,"title":"SphereDiffusion: Spherical Geometry-aware Distortion Resilient Diffusion Model","type":"publication"},{"authors":["Juncheng Ma","Peiwen Sun","Yaoting Wang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"1fe41f212fd0141fdf179a000dd9df81","permalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/stepping-stones-a-progressive-training-strategy-for-audio-visual-semantic-segmentation/","section":"publication","summary":"","tags":null,"title":"Stepping Stones: A Progressive Training Strategy for Audio-Visual Semantic Segmentation","type":"publication"},{"authors":["ZiYun Li","Jona Otholt","Ben Dai","Di Hu","Christoph Meinel","Haojin Yang"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"50b53591fe6d761222acbe7d191d3e47","permalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/","section":"publication","summary":"","tags":null,"title":"Supervised Knowledge May Hurt Novel Class Discovery Performance","type":"publication"},{"authors":["Di Hu","Xuelong Li","Xiaoqiang Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"d6704b0eb55495bb979be6fcbb8243ae","permalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-multimodal-learning-in-audiovisual-speech-recognition/","section":"publication","summary":"","tags":null,"title":"Temporal Multimodal Learning in Audiovisual Speech Recognition","type":"publication"},{"authors":["Dong Wang","Di Hu","Xingjian Li","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"807bb234ac2724175550dbdf52f64d08","permalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/temporal-relational-modeling-with-self-supervision-for-action-segmentation/","section":"publication","summary":"","tags":null,"title":"Temporal Relational Modeling with Self-Supervision for Action Segmentation","type":"publication"},{"authors":["Hongpeng Lin*","Ludan Ruan*","Wenke Xia*","Peiyu Liu","Jingyuan Wen","Yixin Xu","Di Hu","Ruihua Song","Wayne Xin Zhao","Qin Jin","Zhiwu Lu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"dd91d75ebb359650640b7b6c75634dff","permalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/tiktalk-a-video-based-dialogue-dataset-for-multi-modal-chitchat-in-real-world/","section":"publication","summary":"","tags":null,"title":"TikTalk: A Video-Based Dialogue Dataset for Multi-Modal Chitchat in Real World","type":"publication"},{"authors":["Xingjian Li","Di Hu","Xuhong Li","Haoyi Xiong","Zhi Ye","Zhipeng Wang","Chengzhong Xu","Dejing Dou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"5b40a464bbfccb601c6d4c37e85cf81e","permalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-accurate-knowledge-transfer-via-target-awareness-representation-disentanglement/","section":"publication","summary":"","tags":null,"title":"Towards Accurate Knowledge Transfer via Target-awareness Representation Disentanglement","type":"publication"},{"authors":["Andong Deng","Xingjian Li","Di Hu","Tianyang Wang","Haoyi Xiong","Chengzhong Xu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"113edd12d767a54c1fdd10685167cd5c","permalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-inadequately-pre-trained-models-in-transfer-learning/","section":"publication","summary":"","tags":null,"title":"Towards Inadequately Pre-trained Models in Transfer Learning","type":"publication"},{"authors":["Wenxuan Hou*","Guangyao Li*","Yapeng Tian","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"56c2e256bf8d4a20cdffe034f430aaef","permalink":"/publication/towards-long-form-audio-visual-video-understanding/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/towards-long-form-audio-visual-video-understanding/","section":"publication","summary":"","tags":null,"title":"Towards Long Form Audio-visual Video Understanding","type":"publication"},{"authors":["Zechen Bai","Zhigang Wang","Jian Wang","Di Hu","Errui Ding"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"9905f139a565b4f5eabfc5902965f851","permalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unsupervised-multi-source-domain-adaptation-for-person-re-identification/","section":"publication","summary":"","tags":null,"title":"Unsupervised Multi-Source Domain Adaptation for Person Re-Identification","type":"publication"},{"authors":["Peiwen Sun","Honggang Zhang","Di Hu"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"6ff959ec7e9a3da6203370e48a939fd1","permalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/","section":"publication","summary":"","tags":null,"title":"Unveiling and Mitigating Bias in Audio Visual Segmentation (ACM MM Oral)","type":"publication"},{"authors":["Xian Liu","Rui Qian","Hang Zhou","Di Hu","Weiyao Lin","Ziwei Liu","Bolei Zhou","Xiaowei Zhou"],"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":-62135596800,"objectID":"ca462fd19e2017e2ecb2b26a145ef250","permalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/publication/visual-sound-localization-in-the-wild-by-cross-modal-interference-erasing/","section":"publication","summary":"","tags":null,"title":"Visual Sound Localization in-the-Wild by Cross-Modal Interference Erasing","type":"publication"}] \ No newline at end of file diff --git a/docs/index.xml b/docs/index.xml index b84a4267..33b5787e 100755 --- a/docs/index.xml +++ b/docs/index.xml @@ -228,6 +228,14 @@ + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Large Graph Hashing with Spectral Rotation /publication/large-graph-hashing-with-spectral-rotation/ @@ -300,6 +308,14 @@ + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + Progressive Spatio-temporal Perception for Audio-Visual Question Answering /publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/ diff --git a/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html b/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html index 6f5aed89..07bbdd46 100755 --- a/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html +++ b/docs/publication/balanced-audiovisual-dataset-for-imbalance-analysis/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Wenke Xia" + "name": "Wenke Xia*" }, "publisher": { diff --git a/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html b/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html index bc80c9b9..b4d02177 100755 --- a/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html +++ b/docs/publication/co-learn-sounding-object-visual-grounding-and-visually-indicated-sound-separation-in-a-cycle/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yapeng Tian*" + "name": "Yapeng Tian" }, "publisher": { diff --git a/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html b/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html index 8be06258..9286afb7 100755 --- a/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html +++ b/docs/publication/cyclic-co-learning-of-sounding-object-visual-grounding-and-sound-separation/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Yapeng Tian*" + "name": "Yapeng Tian" }, "publisher": { diff --git a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/cite.bib b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/cite.bib new file mode 100644 index 00000000..b95f5fdb --- /dev/null +++ b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/cite.bib @@ -0,0 +1,6 @@ +@article{pang2024depth, + title={Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information Injection}, + author={Pang, Xincheng and Xia, Wenke and Wang, Zhigang and Zhao, Bin and Hu, Di and Wang, Dong and Li, Xuelong}, + journal={arXiv preprint arXiv:2408.05107}, + year={2024} +} \ No newline at end of file diff --git a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html index fda94d04..cde64d4c 100644 --- a/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html +++ b/docs/publication/depth-helps-improving-pre-trained-rgb-based-policy-with-depth-information-injection/index.html @@ -509,9 +509,71 @@

    Depth Helps: Improving Pre-trained RGB-based Policy with Depth Information I + + + + + + + diff --git a/docs/publication/index.html b/docs/publication/index.html index 52d67160..f89bbd8c 100755 --- a/docs/publication/index.html +++ b/docs/publication/index.html @@ -574,6 +574,12 @@

    Publications

    + + + + + + @@ -631,6 +637,313 @@

    Publications

    +
    + + + + + + + + + + + + + + + + + + +
    +
    + + + + + +
    +
    + +

    + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation +

    + + + + + + + + + +
    + +
    + + +
    + + + + + + + + +
    + + + + + + + + + + + + + + + + + + +
    +
    + + + + + +
    +
    + +

    + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance +

    + + + + + + + + + +
    + +
    + + +
    + + + + + + + +
    @@ -720,8 +1033,17 @@

    Publications

    + + PDF + + + + @@ -1507,7 +1829,17 @@

    Publications

    + + PDF + + + + + @@ -1517,6 +1849,13 @@

    Publications

    + + + + + + Code + diff --git a/docs/publication/index.xml b/docs/publication/index.xml index 995d8dce..46873ed7 100755 --- a/docs/publication/index.xml +++ b/docs/publication/index.xml @@ -228,6 +228,14 @@ + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Large Graph Hashing with Spectral Rotation /publication/large-graph-hashing-with-spectral-rotation/ @@ -300,6 +308,14 @@ + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + Progressive Spatio-temporal Perception for Audio-Visual Question Answering /publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/ diff --git a/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html b/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html index 3fabd17a..5fc59214 100644 --- a/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html +++ b/docs/publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Wenke Xia" + "name": "Wenke Xia*" }, "publisher": { diff --git a/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/cite.bib b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/cite.bib new file mode 100644 index 00000000..1ab36054 --- /dev/null +++ b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/cite.bib @@ -0,0 +1,6 @@ +@article{lu2024koi, + title={KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance}, + author={Lu, Jingxian and Xia, Wenke and Wang, Dong and Wang, Zhigang and Zhao, Bin and Hu, Di and Li, Xuelong}, + journal={arXiv preprint arXiv:2408.02912}, + year={2024} +} \ No newline at end of file diff --git a/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/featured.png b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/featured.png new file mode 100644 index 00000000..2ad6a005 Binary files /dev/null and b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/featured.png differ diff --git a/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/featured_hu82ebe889fa4e0d0539ca82015614f588_826536_720x0_resize_lanczos_2.png b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/featured_hu82ebe889fa4e0d0539ca82015614f588_826536_720x0_resize_lanczos_2.png new file mode 100644 index 00000000..0d212750 Binary files /dev/null and b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/featured_hu82ebe889fa4e0d0539ca82015614f588_826536_720x0_resize_lanczos_2.png differ diff --git a/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/index.html b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/index.html new file mode 100644 index 00000000..8e0214b9 --- /dev/null +++ b/docs/publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/index.html @@ -0,0 +1,759 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +

    KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance

    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + +
    +
    + + +
    +
    + + + + + +
    + + + + +
    +
    +
    +
    +
    All
    + +
    +
    +
    +
    +
    + + + +
    +
    +
    +
    +
    Publication
    +
    Conference on Robot Learning (CoRL)
    +
    +
    +
    +
    +
    + + +
    + +
    + + + + + + + + + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +

    + copyright © 2024 GeWu-Lab +
    + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

    +
    + +
    + + + + + + + diff --git a/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html b/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html index aaa61c90..a873a7ab 100755 --- a/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html +++ b/docs/publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Guangyao Li*" + "name": "Guangyao Li" }, "publisher": { diff --git a/docs/publication/multi-scale-attention-for-audio-question-answering/index.html b/docs/publication/multi-scale-attention-for-audio-question-answering/index.html index 91f945ea..e3fac170 100644 --- a/docs/publication/multi-scale-attention-for-audio-question-answering/index.html +++ b/docs/publication/multi-scale-attention-for-audio-question-answering/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Guangyao Li*" + "name": "Guangyao Li" }, "publisher": { diff --git a/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/cite.bib b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/cite.bib new file mode 100644 index 00000000..b88eef8b --- /dev/null +++ b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/cite.bib @@ -0,0 +1,6 @@ +@article{feng2024play, + title={Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation}, + author={Feng, Ruoxuan and Hu, Di and Ma, Wenke and Li, Xuelong}, + journal={arXiv preprint arXiv:2408.01366}, + year={2024} +} \ No newline at end of file diff --git a/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/featured.jpg b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/featured.jpg new file mode 100644 index 00000000..265bcae1 Binary files /dev/null and b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/featured.jpg differ diff --git a/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/featured_hu75ce959336ef19bf68b94f874191d99d_811614_720x0_resize_q90_lanczos.jpg b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/featured_hu75ce959336ef19bf68b94f874191d99d_811614_720x0_resize_q90_lanczos.jpg new file mode 100644 index 00000000..03ddf516 Binary files /dev/null and b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/featured_hu75ce959336ef19bf68b94f874191d99d_811614_720x0_resize_q90_lanczos.jpg differ diff --git a/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/index.html b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/index.html new file mode 100644 index 00000000..ec582238 --- /dev/null +++ b/docs/publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/index.html @@ -0,0 +1,750 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeWu-Lab + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +

    Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation

    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + +
    +
    + + +
    +
    + + + + + +
    + + + + +
    +
    +
    +
    +
    All
    + +
    +
    +
    +
    +
    + + + +
    +
    +
    +
    +
    Publication
    +
    Conference on Robot Learning (CoRL)
    +
    +
    +
    +
    +
    + + +
    + +
    + + + + + + + + + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +

    + copyright © 2024 GeWu-Lab +
    + + Gaoling School of Artificial Intelligence, Renmin University of China, Beijing 100872 +

    +
    + +
    + + + + + + + diff --git a/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html b/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html index 6df36ea8..606c7a8b 100644 --- a/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html +++ b/docs/publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Guangyao Li*" + "name": "Guangyao Li" }, "publisher": { diff --git a/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html b/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html index e07b7b2c..9447ff49 100644 --- a/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html +++ b/docs/publication/robust-cross-modal-knowledge-distillation-for-unconstrained-videos/index.html @@ -154,7 +154,7 @@ "author": { "@type": "Person", - "name": "Wenke Xia" + "name": "Wenke Xia*" }, "publisher": { diff --git a/docs/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/cite.bib b/docs/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/cite.bib new file mode 100644 index 00000000..e0ddc2e4 --- /dev/null +++ b/docs/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/cite.bib @@ -0,0 +1,6 @@ +@article{sun2024unveiling, + title={Unveiling and Mitigating Bias in Audio Visual Segmentation}, + author={Sun, Peiwen and Zhang, Honggang and Hu, Di}, + journal={arXiv preprint arXiv:2407.16638}, + year={2024} +} \ No newline at end of file diff --git a/docs/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/index.html b/docs/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/index.html index 1f632cd9..32ef5da4 100644 --- a/docs/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/index.html +++ b/docs/publication/unveiling-and-mitigating-bias-in-audio-visual-segmentation/index.html @@ -497,6 +497,8 @@

    Unveiling and Mitigating Bias in Audio Visual Segmentation (ACM MM Oral)

    @@ -509,8 +511,17 @@

    Unveiling and Mitigating Bias in Audio Visual Segmentation (ACM MM Oral)

    + PDF + + + + diff --git a/docs/publication_types/9/index.html b/docs/publication_types/9/index.html index 30d1be69..eb9445b8 100755 --- a/docs/publication_types/9/index.html +++ b/docs/publication_types/9/index.html @@ -446,6 +446,20 @@

    KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance

    +
    + +
    +
    + +
    +

    Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation

    +
    + +
    +
    + diff --git a/docs/publication_types/9/index.xml b/docs/publication_types/9/index.xml index 421adb10..b20168d5 100755 --- a/docs/publication_types/9/index.xml +++ b/docs/publication_types/9/index.xml @@ -60,5 +60,21 @@
    + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 4870f39f..0b8e7cbc 100755 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -179,11 +179,11 @@ - /topic_types/1/ + /publication_types/1/ - /publication_types/1/ + /topic_types/1/ @@ -446,6 +446,10 @@ /authors/jian-wang/ + + /authors/jingxian-lu/ + + /authors/jingyuan-wen/ @@ -470,6 +474,10 @@ /publication/kinematic-aware-prompting-for-generalizable-articulated-object-manipulation-with-llms/ + + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + /authors/konrad-heidler/ @@ -562,6 +570,10 @@ /authors/peizhen-zhu/ + + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + /publication/progressive-spatio-temporal-perception-for-audio-visual-question-answering/ @@ -746,6 +758,10 @@ /authors/weiyao-lin/ + + /authors/wenke-ma/ + + /authors/wenke-xia/ diff --git a/docs/topic_types/2/index.xml b/docs/topic_types/2/index.xml index 217f3d23..64fb4f38 100755 --- a/docs/topic_types/2/index.xml +++ b/docs/topic_types/2/index.xml @@ -156,6 +156,14 @@ + + KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/koi-accelerating-online-imitation-learning-via-hybrid-key-state-guidance/ + + + Learning to Answer Questions in Dynamic Audio-Visual Scenarios /publication/learning-to-answer-questions-in-dynamic-audio-visual-scenarios/ @@ -204,6 +212,14 @@ + + Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + Mon, 01 Jan 0001 00:00:00 +0000 + /publication/play-to-the-score-stage-guided-dynamic-multi-sensory-fusion-for-robotic-manipulation/ + + + Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer /publication/prompting-segmentation-with-sound-is-generalizable-audio-visual-source-localizer/ diff --git a/docs/topic_types/2/page/2/index.html b/docs/topic_types/2/page/2/index.html index 8961e9c0..6d62cc0e 100755 --- a/docs/topic_types/2/page/2/index.html +++ b/docs/topic_types/2/page/2/index.html @@ -461,14 +461,14 @@

    Learning to Answer Questions in Dynamic Audio-Visual Scenarios

    +

    KOI: Accelerating Online Imitation Learning via Hybrid Key-state Guidance

    -

    Listen to the Image

    +

    Learning to Answer Questions in Dynamic Audio-Visual Scenarios

    diff --git a/docs/topic_types/2/page/3/index.html b/docs/topic_types/2/page/3/index.html index 0161feaa..c09cf09a 100755 --- a/docs/topic_types/2/page/3/index.html +++ b/docs/topic_types/2/page/3/index.html @@ -405,70 +405,70 @@

    2

    -

    MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning

    +

    Listen to the Image

    -

    MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning

    +

    MMCosine: Multi-Modal Cosine Loss Towards Balanced Audio-Visual Fine-Grained Learning

    -

    Multi-Scale Attention for Audio Question Answering

    +

    MMPareto: Innocent Uni-modal Assistance for Enhanced Multi-modal Learning

    -

    Multiple Sound Sources Localization from Coarse to Fine

    +

    Multi-Scale Attention for Audio Question Answering

    -

    Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer

    +

    Multiple Sound Sources Localization from Coarse to Fine

    -

    Revisiting Pre-training in Audio-Visual Learning

    +

    Play to the Score: Stage-Guided Dynamic Multi-Sensory Fusion for Robotic Manipulation

    -

    Robust Cross-modal Knowledge Distillation for Unconstrained Videos

    +

    Prompting Segmentation with Sound is Generalizable Audio-Visual Source Localizer

    -

    SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance

    +

    Revisiting Pre-training in Audio-Visual Learning

    -

    Self-supervised Audiovisual Representation Learning for Remote Sensing Data

    +

    Robust Cross-modal Knowledge Distillation for Unconstrained Videos

    -

    Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis

    +

    SeCo: Separating Unknown Musical Visual Sounds with Consistency Guidance

    diff --git a/docs/topic_types/2/page/4/index.html b/docs/topic_types/2/page/4/index.html index 5cf759c4..e624ad2b 100644 --- a/docs/topic_types/2/page/4/index.html +++ b/docs/topic_types/2/page/4/index.html @@ -404,6 +404,20 @@

    2

    +
    +

    Self-supervised Audiovisual Representation Learning for Remote Sensing Data

    +
    + +
    +
    + +
    +

    Self-supervised Learning for Heterogeneous Audiovisual Scene Analysis

    +
    + +
    +
    +

    SepFusion: Finding Optimal Fusion Structures for Visual Sound Separation