Adding MacOS unit tests on CircleCI (#1672)

* Added macos unittests on circle ci * Fix config.yml formatting * updated setup_enc.sh to use curl instead of wget * Installing cmake and ninja * Fixing test_sentencepiece_with_dataloader on MacOS * Testing with partial * Enabling windows unit tests * Removed commented code
pytorch · Apr 4, 2022 · b710c88 · b710c88
1 parent 5142463
commit b710c88
Show file tree

Hide file tree

Showing 5 changed files with 111 additions and 13 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -451,6 +451,47 @@ jobs:
       - store_test_results:
           path: test-results
 
+  unittest_macos:
+    <<: *binary_common
+    macos:
+      xcode: "12.0"
+    resource_class: large
+    steps:
+      - checkout
+      - designate_upload_channel
+      - load_conda_channel_flags
+      - fetch_cachekey
+      - run:
+          name: Setup
+          command: .circleci/unittest/linux/scripts/setup_env.sh
+      - run:
+          name: Install torchtext
+          command: .circleci/unittest/linux/scripts/install.sh
+      - restore_cache:
+          keys:
+          {% raw %}
+            - v1-macos-dataset-vector-{{ checksum ".cachekey" }}
+          {% endraw %}
+
+      - run:
+          name: Run tests
+          # Downloading embedding vector takes long time.
+          no_output_timeout: 30m
+          command: .circleci/unittest/linux/scripts/run_test.sh
+      - save_cache:
+          keys:
+          {% raw %}
+          key: v1-macos-dataset-vector-{{ checksum ".cachekey" }}
+          {% endraw %}
+
+          paths:
+            - .vector_cache
+      - run:
+          name: Post process
+          command: .circleci/unittest/linux/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
+
   unittest_windows:
     <<: *binary_common
     executor:

diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py
@@ -147,7 +147,7 @@ def indent(indentation, data_list):
 
 def unittest_workflows(indentation=6):
     w = []
-    for os_type in ["linux", "windows"]:
+    for os_type in ["linux", "windows", "macos"]:
         for python_version in PYTHON_VERSIONS:
             w.append(
                 {

diff --git a/.circleci/unittest/linux/scripts/setup_env.sh b/.circleci/unittest/linux/scripts/setup_env.sh
@@ -22,7 +22,7 @@ esac
 # 1. Install conda at ./conda
 if [ ! -d "${conda_dir}" ]; then
     printf "* Installing conda\n"
-    wget -O miniconda.sh http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+    curl --silent -L -o miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
     bash ./miniconda.sh -b -f -p "${conda_dir}"
 fi
 eval "$(${conda_dir}/bin/conda shell.bash hook)"
@@ -34,11 +34,15 @@ if [ ! -d "${env_dir}" ]; then
 fi
 conda activate "${env_dir}"
 
-# 3. Install Conda dependencies
+
+# 3. Install minimal build tools
+pip --quiet install cmake ninja
+
+# 4. Install Conda dependencies
 printf "* Installing dependencies (except PyTorch)\n"
 conda env update --file "${this_dir}/environment.yml" --prune
 
-# 4. Download
+# 5. Download
 printf "* Downloading SpaCy English models\n"
 python -m spacy download en_core_web_sm
 printf "* Downloading SpaCy German models\n"

diff --git a/test/experimental/test_with_asset.py b/test/experimental/test_with_asset.py
@@ -1,8 +1,7 @@
 import os
-import platform
 import shutil
 import tempfile
-import unittest
+from functools import partial
 
 import torch
 from test.common.torchtext_test_case import TorchtextTestCase
@@ -22,6 +21,12 @@
 from ..common.assets import get_asset_path
 
 
+# Windows and MaxOS doesn't support the nested function pickle
+# Move the batch function out of the test_sentencepiece_with_dataloader test
+def _batch_func(spm_processor, data):
+    return torch.tensor([spm_processor(text) for text in data], dtype=torch.long)
+
+
 class TestTransformsWithAsset(TorchtextTestCase):
     def test_vocab_transform(self):
         asset_name = "vocab_test2.txt"
@@ -210,20 +215,15 @@ def test_builtin_pretrained_sentencepiece_processor(self):
 
     # we separate out these errors because Windows runs into seg faults when propagating
     # exceptions from C++ using pybind11
-    @unittest.skipIf(platform.system() == "Windows", "Test is known to fail on Windows.")
     def test_sentencepiece_with_dataloader(self):
         example_strings = ["the pretrained spm model names"] * 64
         ref_results = torch.tensor([[13, 1465, 12824, 304, 24935, 5771, 3776]] * 16, dtype=torch.long)
 
-        # Windows doesn't support the nested function pickle
-        # Move the batch function out of the test_sentencepiece_with_dataloader test
         sp_model_path = download_from_url(PRETRAINED_SP_MODEL["text_bpe_25000"])
         spm_processor = sentencepiece_processor(sp_model_path)
+        batch_fn = partial(_batch_func, spm_processor)
 
-        def batch_func(data):
-            return torch.tensor([spm_processor(text) for text in data], dtype=torch.long)
-
-        dataloader = DataLoader(example_strings, batch_size=16, num_workers=2, collate_fn=batch_func)
+        dataloader = DataLoader(example_strings, batch_size=16, num_workers=2, collate_fn=batch_fn)
         for item in dataloader:
             self.assertEqual(item, ref_results)