Skip to content

Latest commit

 

History

History
114 lines (86 loc) · 2.98 KB

README.md

File metadata and controls

114 lines (86 loc) · 2.98 KB

The table of contents

1. Multiprocessing with p_tqdm library
2. Get audio duration very fast
3. Use decorator to measure processing time of a function

1. Multiprocessing with p_tqdm library

Installation

pip install p_tqdm

Example

from scipy.io.wavfile import read
from p_tqdm import p_map, p_umap
import os
import pandas as pd

# defines the function which need to be processed in parallel
def process_wav(wav_path):
    sr, wav = read(wav_path)
    duration = len(wav)/sr
    return {
        "wav_path": wav_path,
        "duration": duration,
        "samplerate": sr
    }

# gets the list of audio paths
wav_dir = "/tmp/original_wavs"
wav_paths = [f"{wav_dir}/{file}" for file in os.listdir(wav_dir)]

### process in parallel and return the ordered results ###
ordered_results = p_map(process_wav, wav_paths, num_cpus=4)

### process in parallel and return the unordered results ###
unordered_results = p_umap(process_wav, wav_paths, num_cpus=4)

### in the process the progress bar will be printed ###
60%|████████████████████████            | 60/100 [00:02<00:01, 1.00s/it]

# convert the results to dataframe
pd.DataFrame(ordered_results)
wav_path duration samplerate
/tmp/original_wavs/audio1.wav 12.0 16000
/tmp/original_wavs/audio2.wav 3.55 16000
... ... ...

2. Get audio duration very fast

We will use a built-in module called wave to read the audio header only to get metadata.

Example

import wave

def get_duration(wav_file):
    wav = wave.open(wav_file) 
    sample_rate = wav.getframerate()
    nframes = wav.getnframes()
    return round(nframes/rate, 2)

print(get_duration("/tmp/test.wav")) # 3.21s

3. Use decorator to measure processing time of a function

Example

import time, random as rd

def time_decorator(function):
    def wrapper(*args, **kwargs):
        stime = time.time()
        result = function(*args, **kwargs)
        process_time = time.time() - stime
        print(f"{function.__name__} taken time: {process_time} ms")
        return result
    return wrapper 

@time_decorator
def process():
  total = 0
  for i in range(rd.randint(10000, 10_000_000)):
      total += i
  return total

# process taken time: 0.6141083240509033 ms
process()

4. [Kaggle/Colab] Permission denied although I have set to "anyone with link"

!pip install --upgrade --no-cache-dir gdown

import gdown; 
file_id = '1N3-c-IzIqYNB53ojvZKkEEMO0c7eEQZQ'
url = f"https://drive.google.com/uc?export=download&id={file_id}&confirm=t"
output = 'out.zip' 
gdown.download(url, output, quiet=False)