dora-rs · haixuanTao · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/apis/python/node/src/lib.rs b/apis/python/node/src/lib.rs
@@ -155,7 +155,7 @@ impl Node {
     /// :rtype: None
     #[pyo3(signature = (output_id, data, metadata=None))]
     pub fn send_output(
-        &mut self,
+        &self,
         output_id: String,
         data: PyObject,
         metadata: Option<Bound<'_, PyDict>>,

diff --git a/apis/python/operator/src/lib.rs b/apis/python/operator/src/lib.rs
@@ -41,7 +41,7 @@
         CleanupHandle(self.0.clone())
     }
 
-    pub fn get_mut(&mut self) -> std::sync::MutexGuard<T> {
+    pub fn get_mut(&self) -> std::sync::MutexGuard<T> {
         self.0.try_lock().expect("failed to lock DelayedCleanup")
     }
 }
@@ -176,7 +176,7 @@
            } else if value.is_instance_of::<PyTuple>()
                && value.len()? > 0
                && value.get_item(0)?.is_exact_instance_of::<PyInt>()
            {
                let list: Vec<i64> = value.extract()?;
                parameters.insert(key, Parameter::ListInt(list))
            } else if value.is_instance_of::<PyList>()

diff --git a/binaries/cli/src/template/python/__node-name__/pyproject.toml b/binaries/cli/src/template/python/__node-name__/pyproject.toml
@@ -12,13 +12,13 @@ packages = [{ include = "__node_name__" }]
 [tool.poetry.dependencies]
 dora-rs = "^0.3.6"
 numpy = "< 2.0.0"
-pyarrow = ">= 5.0.0"
+pyarrow = ">= 15.0.0"
 python = "^3.7"
 
 [tool.poetry.dev-dependencies]
-pytest = ">= 8.3.4"
+pytest = ">= 6.3.4"
 pylint = ">= 3.3.2"
-black = ">= 24.10"
+black = ">= 22.10"
 
 [tool.poetry.scripts]
 __node-name__ = "__node_name__.main:main"

diff --git a/examples/speech-to-speech/.gitignore b/examples/speech-to-speech/.gitignore
@@ -0,0 +1,3 @@
+*.pt
+*.json
+*.npz
diff --git a/examples/speech-to-speech/README.md b/examples/speech-to-speech/README.md
@@ -0,0 +1,10 @@
+# Dora Speech to Text example
+
+Make sure to have, dora, pip and cargo installed.
+
+```bash
+dora build https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-speech/outtetts.yml
+dora run https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-speech/outtetts.yml
+
+# Wait for models to download which can takes a bit of time.
+```
diff --git a/examples/speech-to-speech/outtetts-dev.yml b/examples/speech-to-speech/outtetts-dev.yml
@@ -0,0 +1,40 @@
+nodes:
+  - id: dora-microphone
+    build: pip install -e ../../node-hub/dora-microphone
+    path: dora-microphone
+    inputs:
+      tick: dora/timer/millis/2000
+    outputs:
+      - audio
+
+  - id: dora-vad
+    build: pip install -e ../../node-hub/dora-vad
+    path: dora-vad
+    inputs:
+      audio: dora-microphone/audio
+    outputs:
+      - audio
+
+  - id: dora-distil-whisper
+    build: pip install -e ../../node-hub/dora-distil-whisper
+    path: dora-distil-whisper
+    inputs:
+      input: dora-vad/audio
+    outputs:
+      - text
+    env:
+      TARGET_LANGUAGE: english
+      # For China
+      # USE_MODELSCOPE_HUB: true
+
+  - id: dora-outtetts
+    build: pip install -e ../../node-hub/dora-outtetts
+    path: dora-outtetts
+    inputs:
+      text: dora-whisper/text
+
+  - id: dora-rerun
+    build: cargo build -p dora-rerun --release
+    path: dora-rerun
+    inputs:
+      original_text: dora-distil-whisper/text
diff --git a/examples/speech-to-speech/outtetts.yml b/examples/speech-to-speech/outtetts.yml
@@ -0,0 +1,39 @@
+nodes:
+  - id: dora-microphone
+    description: Microphone
+    build: pip install dora-microphone
+    path: dora-microphone
+    inputs:
+      tick: dora/timer/millis/2000
+    outputs:
+      - audio
+
+  - id: dora-vad
+    build: pip install dora-vad
+    path: dora-vad
+    inputs:
+      audio: dora-microphone/audio
+    outputs:
+      - audio
+
+  - id: dora-whisper
+    build: pip install dora-distil-whisper
+    path: dora-distil-whisper
+    inputs:
+      input: dora-vad/audio
+    outputs:
+      - text
+    env:
+      TARGET_LANGUAGE: english
+
+  - id: dora-outtetts
+    build: pip install dora-outtetts
+    path: dora-outtetts
+    inputs:
+      text: dora-whisper/text
+
+  - id: dora-rerun
+    build: pip install dora-rerun
+    path: dora-rerun
+    inputs:
+      original_text: dora-whisper/text
diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
@@ -1,52 +1,66 @@
-import torch
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 from dora import Node
 import pyarrow as pa
 import os
+import sys
 from pathlib import Path
 
 DEFAULT_PATH = "openai/whisper-large-v3-turbo"
 TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "chinese")
 TRANSLATE = bool(os.getenv("TRANSLATE", "False") in ["True", "true"])
 
+import torch
+
+
+def load_model():
+    from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+
+    MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)
+
+    if bool(os.getenv("USE_MODELSCOPE_HUB") in ["True", "true"]):
+        from modelscope import snapshot_download
 
-MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)
+        if not Path(MODEL_NAME_OR_PATH).exists():
+            MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)
 
-if bool(os.getenv("USE_MODELSCOPE_HUB") in ["True", "true"]):
-    from modelscope import snapshot_download
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 
-    if not Path(MODEL_NAME_OR_PATH).exists():
-        MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        MODEL_NAME_OR_PATH,
+        torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        use_safetensors=True,
+    )
+    model.to(device)
 
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH)
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        max_new_tokens=400,
+        torch_dtype=torch_dtype,
+        device=device,
+    )
+    return pipe
 
 
-model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    MODEL_NAME_OR_PATH,
-    torch_dtype=torch_dtype,
-    low_cpu_mem_usage=True,
-    use_safetensors=True,
-)
-model.to(device)
+def load_model_mlx():
+    from lightning_whisper_mlx import LightningWhisperMLX
+
+    whisper = LightningWhisperMLX(model="distil-large-v3", batch_size=12, quant=None)
+    return whisper
 
-processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH)
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    max_new_tokens=400,
-    torch_dtype=torch_dtype,
-    device=device,
-)
 
 BAD_SENTENCES = [
     "字幕",
     "字幕志愿",
     "中文字幕",
     "我",
     "你",
+    " you",
+    "!",
     "THANK YOU",
     " Thank you.",
     " www.microsoft.com",
@@ -90,6 +104,13 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
 
 def main():
     node = Node()
+
+    # For macos use mlx:
+    if sys.platform == "darwin":
+        whisper = load_model_mlx()
+    else:
+        pipe = load_model()
+
     for event in node:
         if event["type"] == "INPUT":
             audio = event["value"].to_numpy()
@@ -100,10 +121,13 @@ def main():
                     "language": TARGET_LANGUAGE,
                 }
             )
-            result = pipe(
-                audio,
-                generate_kwargs=confg,
-            )
+            if sys.platform == "darwin":
+                result = whisper.transcribe(audio)
+            else:
+                result = pipe(
+                    audio,
+                    generate_kwargs=confg,
+                )
             if result["text"] in BAD_SENTENCES:
                 continue
             text = cut_repetition(result["text"])

diff --git a/node-hub/dora-distil-whisper/pyproject.toml b/node-hub/dora-distil-whisper/pyproject.toml
@@ -21,6 +21,7 @@ accelerate = "^0.29.2"
 torch = "^2.2.0"
 python = "^3.7"
 modelscope = "^1.18.1"
+lightning-whisper-mlx = { version = "^0.0.10", platform = "darwin" }
 
 [tool.poetry.scripts]
 dora-distil-whisper = "dora_distil_whisper.main:main"

diff --git a/node-hub/dora-outtetts/README.md b/node-hub/dora-outtetts/README.md
@@ -0,0 +1,37 @@
+# dora-outtetts
+
+## Getting started
+
+- Install it with pip:
+
+```bash
+pip install -e .
+```
+
+## Contribution Guide
+
+- Format with [black](https://github.com/psf/black):
+
+```bash
+black . # Format
+```
+
+- Lint with [pylint](https://github.com/pylint-dev/pylint):
+
+```bash
+pylint --disable=C,R --ignored-modules=cv2 . # Lint
+```
+
+- Test with [pytest](https://github.com/pytest-dev/pytest)
+
+```bash
+pytest . # Test
+```
+
+## YAML Specification
+
+## Examples
+
+## License
+
+dora-outtetts's code are released under the MIT License
diff --git a/node-hub/dora-outtetts/dora_outtetts/__init__.py b/node-hub/dora-outtetts/dora_outtetts/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-outtetts/dora_outtetts/__main__.py b/node-hub/dora-outtetts/dora_outtetts/__main__.py
@@ -0,0 +1,5 @@
+from .main import main
+
+
+if __name__ == "__main__":
+    main()
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    *.pt
+    *.json
+    *.npz