Skip to content

Commit

Permalink
Adding MacOS unit tests on CircleCI (#1672)
Browse files Browse the repository at this point in the history
* Added macos unittests on circle ci

* Fix config.yml formatting

* updated setup_enc.sh to use curl instead of wget

* Installing cmake and ninja

* Fixing test_sentencepiece_with_dataloader on MacOS

* Testing with partial

* Enabling windows unit tests

* Removed commented code
  • Loading branch information
Nayef211 authored Apr 4, 2022
1 parent 5142463 commit b710c88
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 13 deletions.
53 changes: 53 additions & 0 deletions .circleci/config.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions .circleci/config.yml.in
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,47 @@ jobs:
- store_test_results:
path: test-results

unittest_macos:
<<: *binary_common
macos:
xcode: "12.0"
resource_class: large
steps:
- checkout
- designate_upload_channel
- load_conda_channel_flags
- fetch_cachekey
- run:
name: Setup
command: .circleci/unittest/linux/scripts/setup_env.sh
- run:
name: Install torchtext
command: .circleci/unittest/linux/scripts/install.sh
- restore_cache:
keys:
{% raw %}
- v1-macos-dataset-vector-{{ checksum ".cachekey" }}
{% endraw %}

- run:
name: Run tests
# Downloading embedding vector takes long time.
no_output_timeout: 30m
command: .circleci/unittest/linux/scripts/run_test.sh
- save_cache:
keys:
{% raw %}
key: v1-macos-dataset-vector-{{ checksum ".cachekey" }}
{% endraw %}

paths:
- .vector_cache
- run:
name: Post process
command: .circleci/unittest/linux/scripts/post_process.sh
- store_test_results:
path: test-results

unittest_windows:
<<: *binary_common
executor:
Expand Down
2 changes: 1 addition & 1 deletion .circleci/regenerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def indent(indentation, data_list):

def unittest_workflows(indentation=6):
w = []
for os_type in ["linux", "windows"]:
for os_type in ["linux", "windows", "macos"]:
for python_version in PYTHON_VERSIONS:
w.append(
{
Expand Down
10 changes: 7 additions & 3 deletions .circleci/unittest/linux/scripts/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ esac
# 1. Install conda at ./conda
if [ ! -d "${conda_dir}" ]; then
printf "* Installing conda\n"
wget -O miniconda.sh http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
curl --silent -L -o miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
bash ./miniconda.sh -b -f -p "${conda_dir}"
fi
eval "$(${conda_dir}/bin/conda shell.bash hook)"
Expand All @@ -34,11 +34,15 @@ if [ ! -d "${env_dir}" ]; then
fi
conda activate "${env_dir}"

# 3. Install Conda dependencies

# 3. Install minimal build tools
pip --quiet install cmake ninja

# 4. Install Conda dependencies
printf "* Installing dependencies (except PyTorch)\n"
conda env update --file "${this_dir}/environment.yml" --prune

# 4. Download
# 5. Download
printf "* Downloading SpaCy English models\n"
python -m spacy download en_core_web_sm
printf "* Downloading SpaCy German models\n"
Expand Down
18 changes: 9 additions & 9 deletions test/experimental/test_with_asset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import platform
import shutil
import tempfile
import unittest
from functools import partial

import torch
from test.common.torchtext_test_case import TorchtextTestCase
Expand All @@ -22,6 +21,12 @@
from ..common.assets import get_asset_path


# Windows and MaxOS doesn't support the nested function pickle
# Move the batch function out of the test_sentencepiece_with_dataloader test
def _batch_func(spm_processor, data):
return torch.tensor([spm_processor(text) for text in data], dtype=torch.long)


class TestTransformsWithAsset(TorchtextTestCase):
def test_vocab_transform(self):
asset_name = "vocab_test2.txt"
Expand Down Expand Up @@ -210,20 +215,15 @@ def test_builtin_pretrained_sentencepiece_processor(self):

# we separate out these errors because Windows runs into seg faults when propagating
# exceptions from C++ using pybind11
@unittest.skipIf(platform.system() == "Windows", "Test is known to fail on Windows.")
def test_sentencepiece_with_dataloader(self):
example_strings = ["the pretrained spm model names"] * 64
ref_results = torch.tensor([[13, 1465, 12824, 304, 24935, 5771, 3776]] * 16, dtype=torch.long)

# Windows doesn't support the nested function pickle
# Move the batch function out of the test_sentencepiece_with_dataloader test
sp_model_path = download_from_url(PRETRAINED_SP_MODEL["text_bpe_25000"])
spm_processor = sentencepiece_processor(sp_model_path)
batch_fn = partial(_batch_func, spm_processor)

def batch_func(data):
return torch.tensor([spm_processor(text) for text in data], dtype=torch.long)

dataloader = DataLoader(example_strings, batch_size=16, num_workers=2, collate_fn=batch_func)
dataloader = DataLoader(example_strings, batch_size=16, num_workers=2, collate_fn=batch_fn)
for item in dataloader:
self.assertEqual(item, ref_results)

Expand Down

0 comments on commit b710c88

Please sign in to comment.