Skip to content

Commit

Permalink
Revert "Reenable all google drive based tests" (#1201)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangguanheng66 authored Feb 22, 2021
1 parent 082429b commit 811f74e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
21 changes: 21 additions & 0 deletions test/data/test_builtin_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,26 @@
import os
import torch
import torchtext
import unittest
from torchtext.legacy import data
from parameterized import parameterized
from ..common.torchtext_test_case import TorchtextTestCase
from ..common.parameterized_utils import load_params
from ..common.assets import conditional_remove

GOOGLE_DRIVE_BASED_DATASETS = [
'AmazonReviewFull',
'AmazonReviewPolarity',
'DBpedia',
'IMDB',
'IWSLT',
'SogouNews',
'WMT14',
'YahooAnswers',
'YelpReviewFull',
'YelpReviewPolarity'
]


def _raw_text_custom_name_func(testcase_func, param_num, param):
info = param.args[0]
Expand Down Expand Up @@ -155,6 +169,8 @@ def test_raw_ag_news(self):
name_func=_raw_text_custom_name_func)
def test_raw_text_classification(self, info):
dataset_name = info['dataset_name']
if dataset_name in GOOGLE_DRIVE_BASED_DATASETS:
return

# Currently disabled due to incredibly slow download
if dataset_name == "WMTNewsCrawl":
Expand All @@ -173,6 +189,8 @@ def test_raw_text_classification(self, info):

@parameterized.expand(list(sorted(torchtext.datasets.DATASETS.keys())))
def test_raw_datasets_split_argument(self, dataset_name):
if dataset_name in GOOGLE_DRIVE_BASED_DATASETS:
return
if 'statmt' in torchtext.datasets.URLS[dataset_name]:
return
dataset = torchtext.datasets.DATASETS[dataset_name]
Expand All @@ -188,6 +206,8 @@ def test_raw_datasets_split_argument(self, dataset_name):

@parameterized.expand(["AG_NEWS", "WikiText2", "IMDB"])
def test_datasets_split_argument(self, dataset_name):
if dataset_name in GOOGLE_DRIVE_BASED_DATASETS:
return
dataset = torchtext.experimental.datasets.DATASETS[dataset_name]
train1 = dataset(split='train')
train2, = dataset(split=('train',))
Expand Down Expand Up @@ -236,6 +256,7 @@ def test_imdb(self):
self._helper_test_func(len(test_iter), 25000, next(test_iter)[1][:25], 'I love sci-fi and am will')
del train_iter, test_iter

@unittest.skip("Dataset depends on Google drive")
def test_iwslt(self):
from torchtext.experimental.datasets import IWSLT

Expand Down
5 changes: 1 addition & 4 deletions torchtext/data/datasets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,7 @@ def download_extract_validate(root, url, url_md5, downloaded_file, extracted_fil
path=os.path.join(root, downloaded_file),
hash_value=url_md5, hash_type=hash_type)
extracted_files = extract_archive(dataset_tar)
extracted_files = [os.path.abspath(p) for p in extracted_files]
abs_path = os.path.abspath(path)
found_path = find_match(extracted_file, extracted_files)
assert found_path == abs_path
assert path == find_match(extracted_file, extracted_files)
return path


Expand Down

0 comments on commit 811f74e

Please sign in to comment.