feat: add new dmaes, format readme

archinetai · Jan 18, 2023 · 473f79d · 473f79d
1 parent 1436a31
commit 473f79d
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -13,45 +13,101 @@ pip install archisound
 
 ## Autoencoders
 
-### [`autoencoder1d-AT-v1`](https://huggingface.co/archinetai/autoencoder1d-AT-v1/tree/main)
+* [`dmae1d-ATC32-v3`](https://huggingface.co/archinetai/dmae1d-ATC32-v3/tree/main)
+ <details> <summary> Usage and Info </summary>
 
-```py
-from archisound import ArchiSound
+ ```py
+ from archisound import ArchiSound
 
-autoencoder = ArchiSound.from_pretrained('autoencoder1d-AT-v1')
+ autoencoder = ArchiSound.from_pretrained("dmae1d-ATC32-v3")
 
-x = torch.randn(1, 2, 2**18) # [1, 2, 262144]
-z = autoencoder.encode(x)  # [1, 32, 8192]
-y = autoencoder.decode(z)  # [1, 2, 262144]
-```
+ x = torch.randn(1, 2, 2**18)
+ z = autoencoder.encode(x) # [1, 32, 512]
+ y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
+ ```
 
-| Info | |
-| ------------- | ------------- |
-| Input type | Audio (stereo @ 48kHz) |
-| Number of parameters | 20.7M |
-| Compression Factor | 2x |
-| Downsampling Factor | 32x |
-| Bottleneck Type | Tanh |
-| Known Limitations | Slight blurriness in high frequency spectrogram reconstruction |
+ | Info | |
+ | ------------- | ------------- |
+ | Input type | Audio (stereo @ 48kHz) |
+ | Number of parameters | - |
+ | Compression Factor | 32x |
+ | Downsampling Factor | 512x |
+ | Bottleneck Type | Tanh |
 
+ </details>
 
-### [`dmae1d-ATC64-v1`](https://huggingface.co/archinetai/dmae1d-ATC64-v1/tree/main)
-A diffusion based autoencoder with high compression ratio. Requires `audio_diffusion_pytorch==0.0.92`.
 
-```py
-from archisound import ArchiSound
+* [`dmae1d-ATC64-v2`](https://huggingface.co/archinetai/dmae1d-ATC64-v2/tree/main)
+ <details> <summary> Usage and Info </summary>
 
-autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v1")
+ ```py
+ from archisound import ArchiSound
 
-x = torch.randn(1, 2, 2**18)
-z = autoencoder.encode(x) # [1, 32, 256]
-y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
-```
+ autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v2")
+
+ x = torch.randn(1, 2, 2**18)
+ z = autoencoder.encode(x) # [1, 32, 256]
+ y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
+ ```
+
+ | Info | |
+ | ------------- | ------------- |
+ | Input type | Audio (stereo @ 48kHz) |
+ | Number of parameters | - |
+ | Compression Factor | 64x |
+ | Downsampling Factor | 1024x |
+ | Bottleneck Type | Tanh |
+
+ </details>
+
+
+
+* [`autoencoder1d-AT-v1`](https://huggingface.co/archinetai/autoencoder1d-AT-v1/tree/main)
+ <details> <summary> Usage and Info </summary>
+
+ ```py
+ from archisound import ArchiSound
+
+ autoencoder = ArchiSound.from_pretrained('autoencoder1d-AT-v1')
+
+ x = torch.randn(1, 2, 2**18) # [1, 2, 262144]
+ z = autoencoder.encode(x) # [1, 32, 8192]
+ y = autoencoder.decode(z) # [1, 2, 262144]
+ ```
+
+ | Info | |
+ | ------------- | ------------- |
+ | Input type | Audio (stereo @ 48kHz) |
+ | Number of parameters | 20.7M |
+ | Compression Factor | 2x |
+ | Downsampling Factor | 32x |
+ | Bottleneck Type | Tanh |
+ | Known Limitations | Slight blurriness in high frequency spectrogram reconstruction |
+
+ </details>
+
+
+
+* [`dmae1d-ATC64-v1`](https://huggingface.co/archinetai/dmae1d-ATC64-v1/tree/main)
+ <details> <summary> Usage and Info </summary>
+
+ A diffusion based autoencoder with high compression ratio. Requires `audio_diffusion_pytorch==0.0.92`.
+
+ ```py
+ from archisound import ArchiSound
+
+ autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v1")
+
+ x = torch.randn(1, 2, 2**18)
+ z = autoencoder.encode(x) # [1, 32, 256]
+ y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
+ ```
 
-| Info | |
-| ------------- | ------------- |
-| Input type | Audio (stereo @ 48kHz) |
-| Number of parameters | 234.2M |
-| Compression Factor | 64x |
-| Downsampling Factor | 1024x |
-| Bottleneck Type | Tanh |
+ | Info | |
+ | ------------- | ------------- |
+ | Input type | Audio (stereo @ 48kHz) |
+ | Number of parameters | 234.2M |
+ | Compression Factor | 64x |
+ | Downsampling Factor | 1024x |
+ | Bottleneck Type | Tanh |
+ </details>
diff --git a/archisound/archisound.py b/archisound/archisound.py
@@ -5,7 +5,7 @@
  "autoencoder1d-AT-v1": "57b6cde1969208d10fdd3e813708c1abe49f25c1",
  "dmae1d-ATC64-v1": "07885065867977af43b460bb9c1422bdc90c29a0",
  "dmae1d-ATC64-v2": "3ffeea68d4c069777055fce9ac77bbb67eec1d68",
- "diff-text-M-v1": "679c4550e0bc49074778238eb92588ccd5a60ff2",
+ "dmae1d-ATC32-v3": "3d43b811b83fa395d5ccd6cf58b796b85fddd1d2",
 }
 
 

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
  name="archisound",
  packages=find_packages(exclude=[]),
- version="0.0.4",
+ version="0.0.5",
  license="MIT",
  description="ArchiSound",
  long_description_content_type="text/markdown",