Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adamnsandle #493

Merged
merged 10 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 28 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,34 @@ https://user-images.githubusercontent.com/36505480/144874384-95f80f6d-a4f1-42cc-
</details>

<br/>

<h2 align="center">Fast start</h2>
<br/>

**Using pip**:
`pip install silero-vad`

```python3
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
model = load_silero_vad()
wav = read_audio('path_to_audio_file') # backend (sox, soundfile, or ffmpeg) required!
speech_timestamps = get_speech_timestamps(wav, model)
```

**Using torch.hub**:
```python3
import torch
torch.set_num_threads(1)

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad')
(get_speech_timestamps, _, read_audio, _, _) = utils

wav = read_audio('path_to_audio_file') # backend (sox, soundfile, or ffmpeg) required!
speech_timestamps = get_speech_timestamps(wav, model)
```

<br/>

<h2 align="center">Key Features</h2>
<br/>

Expand Down Expand Up @@ -57,21 +85,7 @@ https://user-images.githubusercontent.com/36505480/144874384-95f80f6d-a4f1-42cc-
Published under permissive license (MIT) Silero VAD has zero strings attached - no telemetry, no keys, no registration, no built-in expiration, no keys or vendor lock.

<br/>
<h2 align="center">Fast start</h2>
<br/>

```python3
import torch
torch.set_num_threads(1)

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad')
(get_speech_timestamps, _, read_audio, _, _) = utils

wav = read_audio('path_to_audio_file')
speech_timestamps = get_speech_timestamps(wav, model)
```

<br/>
<h2 align="center">Typical Use Cases</h2>
<br/>

Expand Down
1 change: 0 additions & 1 deletion files/lang_dict_95.json

This file was deleted.

1 change: 0 additions & 1 deletion files/lang_group_dict_95.json

This file was deleted.

21 changes: 10 additions & 11 deletions hubconf.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
dependencies = ['torch', 'torchaudio']
import torch
import json
import os
from utils_vad import (init_jit_model,
get_speech_timestamps,
save_audio,
read_audio,
VADIterator,
collect_chunks,
drop_chunks,
Validator,
OnnxWrapper)
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from silero_vad.utils_vad import (init_jit_model,
get_speech_timestamps,
save_audio,
read_audio,
VADIterator,
collect_chunks,
OnnxWrapper)


def versiontuple(v):
Expand All @@ -36,7 +35,7 @@ def silero_vad(onnx=False, force_onnx_cpu=False):
if versiontuple(installed_version) < versiontuple(supported_version):
raise Exception(f'Please install torch {supported_version} or greater ({installed_version} installed)')

model_dir = os.path.join(os.path.dirname(__file__), 'files')
model_dir = os.path.join(os.path.dirname(__file__), 'src', 'silero_vad', 'data')
if onnx:
model = OnnxWrapper(os.path.join(model_dir, 'silero_vad.onnx'), force_onnx_cpu)
else:
Expand Down
35 changes: 35 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "silero-vad"
version = "5.1"
authors = [
{name="Silero Team", email="hello@silero.ai"},
]
description = "Voice Activity Detector (VAD) by Silero"
readme = "README.md"
requires-python = ">=3.8"
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering",
]
dependencies = [
"torch>=1.12.0",
"torchaudio>=0.12.0",
"onnxruntime>=1.18.0",
]

[project.urls]
Homepage = "https://github.com/snakers4/silero-vad"
Issues = "https://github.com/snakers4/silero-vad/issues"
30 changes: 20 additions & 10 deletions silero-vad.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,30 @@
},
"outputs": [],
"source": [
"USE_PIP = True # download model using pip package or torch.hub\n",
"USE_ONNX = False # change this to True if you want to test onnx model\n",
"if USE_ONNX:\n",
" !pip install -q onnxruntime\n",
"if USE_PIP:\n",
" !pip install -q silero-vad\n",
" from silero_vad import (load_silero_vad,\n",
" read_audio,\n",
" get_speech_timestamps,\n",
" save_audio,\n",
" VADIterator,\n",
" collect_chunks)\n",
" model = load_silero_vad(onnx=USE_ONNX)\n",
"else:\n",
" model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n",
" model='silero_vad',\n",
" force_reload=True,\n",
" onnx=USE_ONNX)\n",
"\n",
"model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n",
" model='silero_vad',\n",
" force_reload=True,\n",
" onnx=USE_ONNX)\n",
"\n",
"(get_speech_timestamps,\n",
" save_audio,\n",
" read_audio,\n",
" VADIterator,\n",
" collect_chunks) = utils"
" (get_speech_timestamps,\n",
" save_audio,\n",
" read_audio,\n",
" VADIterator,\n",
" collect_chunks) = utils"
]
},
{
Expand Down
12 changes: 12 additions & 0 deletions src/silero_vad/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from importlib.metadata import version
try:
__version__ = version(__name__)
except:
pass

from silero_vad.model import load_silero_vad
from silero_vad.utils_vad import (get_speech_timestamps,
save_audio,
read_audio,
VADIterator,
collect_chunks)
Empty file added src/silero_vad/data/__init__.py
Empty file.
Binary file not shown.
Binary file not shown.
25 changes: 25 additions & 0 deletions src/silero_vad/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from .utils_vad import init_jit_model, OnnxWrapper
import torch
torch.set_num_threads(1)

def load_silero_vad(onnx=False):
model_name = 'silero_vad.onnx' if onnx else 'silero_vad.jit'
package_path = "silero_vad.data"

try:
import importlib_resources as impresources
model_file_path = str(impresources.files(package_path).joinpath(model_name))
except:
from importlib import resources as impresources
try:
with impresources.path(package_path, model_name) as f:
model_file_path = f
except:
model_file_path = str(impresources.files(package_path).joinpath(model_name))

if onnx:
model = OnnxWrapper(model_file_path, force_onnx_cpu=True)
else:
model = init_jit_model(model_file_path)

return model
11 changes: 6 additions & 5 deletions utils_vad.py → src/silero_vad/utils_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,19 @@ def __call__(self, inputs: torch.Tensor):

def read_audio(path: str,
sampling_rate: int = 16000):
list_backends = torchaudio.list_audio_backends()

assert len(list_backends) > 0, 'The list of available backends is empty, please install backend manually. \
\n Recommendations: \n \tSox (UNIX OS) \n \tSoundfile (Windows OS, UNIX OS) \n \tffmpeg (Windows OS, UNIX OS)'

sox_backends = set(['sox', 'sox_io'])
audio_backends = torchaudio.list_audio_backends()

if len(sox_backends.intersection(audio_backends)) > 0:
try:
effects = [
['channels', '1'],
['rate', str(sampling_rate)]
]

wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
else:
except:
wav, sr = torchaudio.load(path)

if wav.size(0) > 1:
Expand Down