Revise examples and readme (#117)

examples: improved output format support and added a README
collabora · Apr 9, 2024 · 635ca86 · 635ca86
1 parent 7581308
commit 635ca86
Show file tree

Hide file tree

Showing 7 changed files with 74 additions and 57 deletions.
diff --git a/examples/gui_file_to_text_to_audio_playback.py b/examples/gui_file_to_text_to_audio_playback.py
@@ -13,7 +13,7 @@
 
 (3) run pip3 install WhisperSpeech
 
-(4) pip3 install soundfile==0.12.1 sounddevice==0.4.6 pypdf==4.0.2 python-docx==1.1.0 nltk==3.8.1
+(4) pip3 install sounddevice==0.4.6 pypdf==4.0.2 python-docx==1.1.0 nltk==3.8.1
 
 (9) python gui_file_to_text_to_audio_playback.py
 '''

diff --git a/examples/gui_text_to_audio_playback.py b/examples/gui_text_to_audio_playback.py
@@ -12,7 +12,7 @@
 https://pytorch.org/get-started/locally/
 
 (3) pip3 install WhisperSpeech
-(4) pip3 install soundfile==0.12.1 sounddevice==0.4.6
+(4) pip3 install sounddevice==0.4.6
 (5) python gui_text_to_audio_playback.py
 '''
 

diff --git a/examples/minimal.py b/examples/minimal.py
@@ -1,6 +1,8 @@
 from whisperspeech.pipeline import Pipeline
 
-tts_pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model')
+tts_pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model') # uncomment the line for the model you want to use
+# tts_pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model') # uncomment the line for the model you want to use
+# tts_pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-small-en+pl.model') # uncomment the line for the model you want to use
 
-save_path = 'output.wav'
-tts_pipe.generate_to_file(save_path, "This is a test") 
+save_path = 'output.wav' # change the file extension to .mp3, .flac, .ogg etc. to save to a different file format
+tts_pipe.generate_to_file(save_path, "This is a test") 
diff --git a/examples/readme.md b/examples/readme.md
@@ -1 +1,33 @@
-This folder contains examples of basic usage of the WhisperSpeech library.
+# Example Scripts
+
+Contributions are welcome! Feel free to create an issue or pull request on GitHub.
+
+### `minimal.py`
+
+- Minimalistic script that takes hardcoded text input and outputs an audio file.
+
+### `text_to_playback.py`
+
+- Utilizes the new `generate_to_playback` method to directly convert hardcoded text to audio playback without intermediate steps. Designed for minimal script length, but does not include queue management to reduce latency.
+
+### `text_to_audio_playback.py`
+
+- Processes text one sentence at a time and adds them to a queue for playback. Designed for users who prefer a command-line approach but still want the efficiency of queued playback.
+
+### `gui_file_to_text_to_audio_playback.py`
+
+- Provides a graphical user interface allowing users to load a file. The text is then converted into speech, sentence by sentence using queue management in order to reduce latency.
+
+### `gui_text_to_audio_playback.py`
+
+- Similar to `gui_file_to_text_to_audio_playback.py`, but a user simply enters the text to be played back. Text is still processed one sentence at a time for low latency.
+
+
+| Feature | gui_file_to...<br>audio_playback.py | gui_text_to...<br>audio_playback.py | minimal.py | text_to_audio...<br>playback.py | text_to_playback.py |
+|:---------------------------------:|:-----------------------------------:|:-----------------------------------:|:----------:|:-------------------------------:|:-------------------:|
+| **GUI** | <center>✅</center> | <center>✅</center> | <center>❌</center> | <center>❌</center> | <center>❌</center> |
+| **Input** | File | Text Entry | Predefined Text | Predefined Text | Predefined Text |
+| **Output** | Audio Playback | Audio Playback | WAV File | Audio Playback | Audio Playback |
+| **Queue Management** | <center>✅</center> | <center>✅</center> | <center>❌</center> | <center>✅</center> | <center>❌</center> |
+| **Text-to-Speech<br> Conversion**| <center>✅</center> | <center>✅</center> | <center>✅</center> | <center>✅</center> | <center>✅</center> |
+| **Load File** | <center>✅</center> | <center>❌</center> | <center>❌</center> | <center>❌</center> | <center>❌</center> |
diff --git a/examples/text_to_audio_file.py b/examples/text_to_audio_file.py
diff --git a/examples/text_to_audio_playback.py b/examples/text_to_audio_playback.py
@@ -11,7 +11,7 @@
 https://pytorch.org/get-started/locally/
 
 (3) pip3 install WhisperSpeech
-(4) pip3 install soundfile==0.12.1 sounddevice==0.4.6
+(4) pip3 install sounddevice==0.4.6
 (5) python gui_text_to_audio_playback.py
 '''
 

diff --git a/examples/text_to_playback.py b/examples/text_to_playback.py
@@ -0,0 +1,33 @@
+'''
+DESCRIPTION~
+
+Processes a body of text directly into audio playback using the sounddevice library.
+
+PLEASE NOTE~
+
+If you need more granular control, such as being able to process sentences in one thread (one sentence at a time) while simultaneously playing them in another thread (reducing latency), consult the "text_to_audio_playback.py" example. It uses the "generate" method in conjunction with the "sounddevice" library directly.
+
+This example uses the "genereate_to_playback" method instead, which is good for reducing the length of your script, especially with shorter passages where latency is not as important.
+
+INSTALLATION INSTRUCTIONS~
+
+(1) create a virtual environment and activate it
+(2) install pytorch by going to the following website and running the appropriate command for your platform and setup:
+
+https://pytorch.org/get-started/locally/
+---This script has been tested up to Torch 2.2.0.
+
+(3) pip3 install WhisperSpeech
+(4) pip3 install sounddevice==0.4.6
+(5) python text_to_playback.py
+'''
+
+from whisperspeech.pipeline import Pipeline
+
+# pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-small-en+pl.model')
+# pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model')
+pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
+
+pipe.generate_to_playback("""
+ This is some sample text. You would add text here that you want spoken and then only leave one of the above lines uncommented for the model you want to test. This text is being used to test a new generate to playback method within the pipeline script. It would require adding sounddevice as a dependency since that's what performs the playback.
+""")