Revert "Reenable all google drive based tests" (#1201)

pytorch · Feb 22, 2021 · 811f74e · 811f74e
1 parent 082429b
commit 811f74e
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 4 deletions.
diff --git a/test/data/test_builtin_datasets.py b/test/data/test_builtin_datasets.py
@@ -3,12 +3,26 @@
 import os
 import torch
 import torchtext
+import unittest
 from torchtext.legacy import data
 from parameterized import parameterized
 from ..common.torchtext_test_case import TorchtextTestCase
 from ..common.parameterized_utils import load_params
 from ..common.assets import conditional_remove
 
+GOOGLE_DRIVE_BASED_DATASETS = [
+    'AmazonReviewFull',
+    'AmazonReviewPolarity',
+    'DBpedia',
+    'IMDB',
+    'IWSLT',
+    'SogouNews',
+    'WMT14',
+    'YahooAnswers',
+    'YelpReviewFull',
+    'YelpReviewPolarity'
+]
+
 
 def _raw_text_custom_name_func(testcase_func, param_num, param):
     info = param.args[0]
@@ -155,6 +169,8 @@ def test_raw_ag_news(self):
         name_func=_raw_text_custom_name_func)
     def test_raw_text_classification(self, info):
         dataset_name = info['dataset_name']
+        if dataset_name in GOOGLE_DRIVE_BASED_DATASETS:
+            return
 
         # Currently disabled due to incredibly slow download
         if dataset_name == "WMTNewsCrawl":
@@ -173,6 +189,8 @@ def test_raw_text_classification(self, info):
 
     @parameterized.expand(list(sorted(torchtext.datasets.DATASETS.keys())))
     def test_raw_datasets_split_argument(self, dataset_name):
+        if dataset_name in GOOGLE_DRIVE_BASED_DATASETS:
+            return
         if 'statmt' in torchtext.datasets.URLS[dataset_name]:
             return
         dataset = torchtext.datasets.DATASETS[dataset_name]
@@ -188,6 +206,8 @@ def test_raw_datasets_split_argument(self, dataset_name):
 
     @parameterized.expand(["AG_NEWS", "WikiText2", "IMDB"])
     def test_datasets_split_argument(self, dataset_name):
+        if dataset_name in GOOGLE_DRIVE_BASED_DATASETS:
+            return
         dataset = torchtext.experimental.datasets.DATASETS[dataset_name]
         train1 = dataset(split='train')
         train2, = dataset(split=('train',))
@@ -236,6 +256,7 @@ def test_imdb(self):
         self._helper_test_func(len(test_iter), 25000, next(test_iter)[1][:25], 'I love sci-fi and am will')
         del train_iter, test_iter
 
+    @unittest.skip("Dataset depends on Google drive")
     def test_iwslt(self):
         from torchtext.experimental.datasets import IWSLT
 

diff --git a/torchtext/data/datasets_utils.py b/torchtext/data/datasets_utils.py
@@ -164,10 +164,7 @@ def download_extract_validate(root, url, url_md5, downloaded_file, extracted_fil
                                     path=os.path.join(root, downloaded_file),
                                     hash_value=url_md5, hash_type=hash_type)
     extracted_files = extract_archive(dataset_tar)
-    extracted_files = [os.path.abspath(p) for p in extracted_files]
-    abs_path = os.path.abspath(path)
-    found_path = find_match(extracted_file, extracted_files)
-    assert found_path == abs_path
+    assert path == find_match(extracted_file, extracted_files)
     return path