Skip to content

Commit

Permalink
add
Browse files Browse the repository at this point in the history
  • Loading branch information
xwinks committed Oct 31, 2024
1 parent 3176e10 commit 4a15e79
Show file tree
Hide file tree
Showing 63 changed files with 1,579 additions and 126 deletions.
4 changes: 2 additions & 2 deletions content/dataset/00_RefAVS/_index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Refer and Segment Objects in Audio-Visual Scenes (Ref-AVS) Dataset
summary: Traditional reference segmentation tasks have predominantly focused on silent visual scenes, neglecting the integral role of multimodal perception and interaction in human experiences. In this work, we introduce a novel task called Reference Audio-Visual Segmentation (Ref-AVS), which seeks to segment objects within the visual domain based on expressions containing multimodal cues. Such expressions are articulated in natural language forms but are enriched with multimodal cues, including audio and visual descriptions.
title: <b>Ref</b>er and Segment Objects in <b>A</b>udio-<b>V</b>isual <b>S</b>cenes (Ref-AVS) Dataset
summary: In this work, we introduce a novel task called Reference Audio-Visual Segmentation (Ref-AVS), which seeks to segment objects within the visual domain based on expressions containing multimodal cues.
dataset: true
# Optional external URL for project (replaces project detail page).
external_link: https://github.com/GeWu-Lab/Ref-AVS
Expand Down
2 changes: 1 addition & 1 deletion content/dataset/01_LFAV/_index.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: <b>L</b>ong <b>F</b>orm <b>A</b>udio-<b>V</b>isual (<b>LFAV<b>) Dataset
title: <b>L</b>ong <b>F</b>orm <b>A</b>udio-<b>V</b>isual (<b>LFAV</b>) Dataset
summary: To achieve a better understanding of long form audio-visual videos, we propose to focus on the multisensory temporal event localization task, which essentially requires the model to predict the start and end time of each audio and visual event in the video. Concretely, we divide the video into several non-overlapping snippets, then predict the event categories of all snippets. To study the proposed multisensory temporal event localization task, we elaborately build a large-scale Long Form Audio-visual Video (LFAV) dataset with an average video length of 210 seconds and a total video length of 302 hours.
dataset: true
# Optional external URL for project (replaces project detail page).
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: "Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)"
title: "Balanced Multimodal Learning via On-the-fly Gradient Modulation"
authors:
- Xiaokang Peng*
- Yake Wei*
Expand All @@ -8,6 +8,7 @@ authors:
- Di Hu
publication_types: ["1"]
publication: Computer Vision and Pattern Recognition(CVPR) 2022 <b>Oral Presentation</b>
is_oral: True
publication_types_name: Conference Paper
url_pdf: https://arxiv.org/abs/2203.15332
url_code: https://github.com/GeWu-Lab/OGM-GE_CVPR2022
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@article{wang2024can,
title={Can Textual Semantics Mitigate Sounding Object Segmentation Preference?},
author={Wang, Yaoting and Sun, Peiwen and Li, Yuanchao and Zhang, Honggang and Hu, Di},
journal={arXiv preprint arXiv:2407.10947},
year={2024}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ publication_types: ["9"]
publication: Conference on Robot Learning (CoRL)
publication_types_name: Conference Paper
url_pdf: https://www.arxiv.org/abs/2408.02912
url_code: https://github.com/GeWu-Lab/Keystate_Online_Imitation
topic_types: ["2"]
topic_types_name: topic_scene_understanding
rating : 2024_09_01
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ url_pdf: https://arxiv.org/abs/2203.14072
url_code: https://github.com/GeWu-Lab/MUSIC-AVQA
url_project: https://gewu-lab.github.io/MUSIC-AVQA/
topic_types: ["2"]
is_oral: True
topic_types_name: topic_scene_understanding
rating : 2022_03_04
---
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@article{wei2024mmpareto,
title={MMPareto: Boosting Multimodal Learning with Innocent Unimodal Assistance},
author={Wei, Yake and Hu, Di},
journal={arXiv preprint arXiv:2405.17730},
year={2024}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ publication_types: ["1"]
publication: International Conference on Machine Learning(ICML) 2024
publication_types_name: Conference Paper
url_pdf: https://openreview.net/pdf?id=JHD4Q4GbXa
url_code: https://github.com/GeWu-Lab/MMPareto_ICML2024
topic_types: ["2"]
topic_types_name: topic_mechanism
rating : 2024_05_06
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@article{wei2024fly,
title={On-the-fly Modulation for Balanced Multimodal Learning},
author={Wei, Yake and Hu, Di and Du, Henghui and Wen, Ji-Rong},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
year={2024},
publisher={IEEE}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ authors:
- Wenke Ma
- Xuelong Li
publication_types: ["9"]
is_oral: True
publication: Conference on Robot Learning (CoRL)
publication_types_name: Conference Paper
url_pdf: https://arxiv.org/pdf/2408.01366
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@article{wang2024ref,
title={Ref-avs: Refer and segment objects in audio-visual scenes},
author={Wang, Yaoting and Sun, Peiwen and Zhou, Dongzhan and Li, Guangyao and Zhang, Honggang and Hu, Di},
journal={arXiv preprint arXiv:2407.10957},
year={2024}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@article{ma2024steppingstones,
title={Stepping Stones: A Progressive Training Strategy for Audio-Visual Semantic Segmentation},
author={Ma, Juncheng and Sun, Peiwen and Wang, Yaoting and Hu, Di},
journal={IEEE European Conference on Computer Vision (ECCV)},
year={2024},
}
2 changes: 1 addition & 1 deletion docs/authors/andong-deng/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ <h3>Latest</h3>
<ul>

<li>
<a href="/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/">Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)</a>
<a href="/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/">Balanced Multimodal Learning via On-the-fly Gradient Modulation</a>
</li>

<li>
Expand Down
2 changes: 1 addition & 1 deletion docs/authors/andong-deng/index.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
</image>

<item>
<title>Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)</title>
<title>Balanced Multimodal Learning via On-the-fly Gradient Modulation</title>
<link>/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/</link>
<pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate>
<guid>/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/</guid>
Expand Down
2 changes: 1 addition & 1 deletion docs/authors/di-hu/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ <h3>Latest</h3>
</li>

<li>
<a href="/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/">Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)</a>
<a href="/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/">Balanced Multimodal Learning via On-the-fly Gradient Modulation</a>
</li>

<li>
Expand Down
2 changes: 1 addition & 1 deletion docs/authors/di-hu/index.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
</item>

<item>
<title>Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)</title>
<title>Balanced Multimodal Learning via On-the-fly Gradient Modulation</title>
<link>/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/</link>
<pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate>
<guid>/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/</guid>
Expand Down
2 changes: 1 addition & 1 deletion docs/authors/dong-wang/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ <h3>Latest</h3>
<ul>

<li>
<a href="/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/">Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)</a>
<a href="/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/">Balanced Multimodal Learning via On-the-fly Gradient Modulation</a>
</li>

<li>
Expand Down
2 changes: 1 addition & 1 deletion docs/authors/dong-wang/index.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
</image>

<item>
<title>Balanced Multimodal Learning via On-the-fly Gradient Modulation (CVPR Oral)</title>
<title>Balanced Multimodal Learning via On-the-fly Gradient Modulation</title>
<link>/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/</link>
<pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate>
<guid>/publication/balanced-multimodal-learning-via-on-the-fly-gradient-modulation/</guid>
Expand Down
4 changes: 4 additions & 0 deletions docs/authors/haojin-yang/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,10 @@ <h3>Latest</h3>
<a href="/publication/not-all-knowledge-is-created-equal/">Not All Knowledge Is Created Equal</a>
</li>

<li>
<a href="/publication/not-all-knowledge-is-created-equal/">Not All Knowledge Is Created Equal</a>
</li>

<li>
<a href="/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/">Supervised Knowledge May Hurt Novel Class Discovery Performance</a>
</li>
Expand Down
8 changes: 8 additions & 0 deletions docs/authors/haojin-yang/index.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
<description></description>
</item>

<item>
<title>Not All Knowledge Is Created Equal</title>
<link>/publication/not-all-knowledge-is-created-equal/</link>
<pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate>
<guid>/publication/not-all-knowledge-is-created-equal/</guid>
<description></description>
</item>

<item>
<title>Supervised Knowledge May Hurt Novel Class Discovery Performance</title>
<link>/publication/supervised-knowledge-may-hurt-novel-class-discovery-performance/</link>
Expand Down
Loading

0 comments on commit 4a15e79

Please sign in to comment.