refactor: initial commit

archinetai · Nov 25, 2022 · a65190a · a65190a
commit a65190a
Show file tree

Hide file tree

Showing 8 changed files with 203 additions and 0 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+ release:
+ types: [published]
+
+permissions:
+ contents: read
+
+jobs:
+ deploy:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v3
+ with:
+ python-version: '3.x'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build
+ - name: Build package
+ run: python -m build
+ - name: Publish package
+ uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+ with:
+ user: __token__
+ password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+.mypy_cache
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,41 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v2.3.0
+ hooks:
+ - id: end-of-file-fixer
+ - id: trailing-whitespace
+
+# Formats code correctly
+- repo: https://github.com/psf/black
+ rev: 22.3.0
+ hooks:
+ - id: black
+ args: [
+ '--experimental-string-processing'
+ ]
+
+# Sorts imports
+- repo: https://github.com/pycqa/isort
+ rev: 5.10.1
+ hooks:
+ - id: isort
+ name: isort (python)
+ args: ["--profile", "black"]
+
+# Checks unused imports, like lengths, etc
+- repo: https://gitlab.com/pycqa/flake8
+ rev: 4.0.0
+ hooks:
+ - id: flake8
+ args: [
+ '--per-file-ignores=__init__.py:F401',
+ '--max-line-length=88',
+ '--ignore=E1,W1,E2,W2,E4,W4,E5,W5' # Handled by black
+ ]
+
+# Checks types
+- repo: https://github.com/pre-commit/mirrors-mypy
+ rev: 'v0.971'
+ hooks:
+ - id: mypy
+ additional_dependencies: [data-science-types>=0.2, torch>=1.6]
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 archinet.ai
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,56 @@
+
+# ArchiSound
+
+A collection of pre-trained audio models in PyTorch from [`audio-encoders-pytorch`](https://github.com/archinetai/audio-encoders-pytorch) and [`audio-diffusion-pytorch`](https://github.com/archinetai/audio-diffusion-pytorch).
+
+```bash
+pip install archisound
+```
+
+[![PyPI - Python Version](https://img.shields.io/pypi/v/archisound?style=flat&colorA=black&colorB=black)](https://pypi.org/project/archisound/)
+
+
+## Autoencoders
+
+### [`autoencoder1d-AT-v1`](https://huggingface.co/archinetai/autoencoder1d-AT-v1/tree/main)
+
+```py
+from archisound import ArchiSound
+
+autoencoder = ArchiSound.from_pretrained('autoencoder1d-AT-v1')
+
+x = torch.randn(1, 2, 2**18) # [1, 2, 262144]
+z = autoencoder.encode(x) # [1, 32, 8192]
+y = autoencoder.decode(z) # [1, 2, 262144]
+```
+
+| Info | |
+| ------------- | ------------- |
+| Input type | Audio (stereo @ 48kHz) |
+| Number of parameters | 20.7M |
+| Compression Factor | 2x |
+| Downsampling Factor | 32x |
+| Bottleneck Type | Tanh |
+| Known Limitations | Slight blurriness in high frequency spectrogram reconstruction |
+
+
+### [`dmae1d-ATC64-v1`](https://huggingface.co/archinetai/dmae1d-ATC64-v1/tree/main)
+A diffusion based autoencoder with high compression ratio. Requires `audio_diffusion_pytorch==0.0.92`.
+
+```py
+from archisound import ArchiSound
+
+autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v1")
+
+x = torch.randn(1, 2, 2**18)
+z = autoencoder.encode(x) # [1, 32, 256]
+y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
+```
+
+| Info | |
+| ------------- | ------------- |
+| Input type | Audio (stereo @ 48kHz) |
+| Number of parameters | 234.2M |
+| Compression Factor | 64x |
+| Downsampling Factor | 1024x |
+| Bottleneck Type | Tanh |
diff --git a/archisound/__init__.py b/archisound/__init__.py
@@ -0,0 +1 @@
+from .archisound import ArchiSound
diff --git a/archisound/archisound.py b/archisound/archisound.py
@@ -0,0 +1,15 @@
+import torch.nn as nn
+from transformers import AutoModel
+
+REVISION = {
+ "autoencoder1d-AT-v1": "57b6cde1969208d10fdd3e813708c1abe49f25c1",
+ "dmae1d-ATC64-v1": "07885065867977af43b460bb9c1422bdc90c29a0",
+}
+
+
+class ArchiSound:
+ @staticmethod
+ def from_pretrained(name: str) -> nn.Module:
+ return AutoModel.from_pretrained(
+ f"archinetai/{name}", trust_remote_code=True, revision=REVISION[name]
+ )
diff --git a/setup.py b/setup.py
@@ -0,0 +1,28 @@
+from setuptools import find_packages, setup
+
+setup(
+ name="archisound",
+ packages=find_packages(exclude=[]),
+ version="0.0.1",
+ license="MIT",
+ description="ArchiSound",
+ long_description_content_type="text/markdown",
+ author="Flavio Schneider",
+ author_email="archinetai@protonmail.com",
+ url="https://github.com/archinetai/archisound",
+ keywords=["artificial intelligence", "deep learning"],
+ install_requires=[
+ "torch>=1.6",
+ "data-science-types>=0.2",
+ "transformers",
+ "audio-diffusion-pytorch",
+ "audio-encoders-pytorch",
+ ],
+ classifiers=[
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 3.6",
+ ],
+)