diff --git a/docs/source/main_classes/pipelines.mdx b/docs/source/main_classes/pipelines.mdx index a60dce9dc5877e..145f17f4481fda 100644 --- a/docs/source/main_classes/pipelines.mdx +++ b/docs/source/main_classes/pipelines.mdx @@ -79,14 +79,14 @@ GPU. If it doesn't don't hesitate to create an issue. import datasets from transformers import pipeline from transformers.pipelines.base import KeyDataset -import tqdm +from tqdm.auto import tqdm pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0) dataset = datasets.load_dataset("superb", name="asr", split="test") # KeyDataset (only *pt*) will simply return the item in the dict returned by the dataset item # as we're not interested in the *target* part of the dataset. -for out in tqdm.tqdm(pipe(KeyDataset(dataset, "file"))): +for out in tqdm(pipe(KeyDataset(dataset, "file"))): print(out) # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"} # {"text": ....} @@ -130,7 +130,6 @@ whenever the pipeline uses its streaming ability (so when passing lists or `Data from transformers import pipeline from transformers.pipelines.base import KeyDataset import datasets -import tqdm dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised") pipe = pipeline("text-classification", device=0) @@ -153,8 +152,7 @@ Example where it's mostly a speedup: ```python from transformers import pipeline from torch.utils.data import Dataset -import tqdm - +from tqdm.auto import tqdm pipe = pipeline("text-classification", device=0) @@ -172,7 +170,7 @@ dataset = MyDataset() for batch_size in [1, 8, 64, 256]: print("-" * 30) print(f"Streaming batch_size={batch_size}") - for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)): + for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)): pass ``` @@ -228,7 +226,7 @@ Streaming batch_size=256 0%| | 0/1000 [00:00 - for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)): + for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)): .... q = q / math.sqrt(dim_per_head) # (bs, n_heads, q_length, dim_per_head) RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)