Import torchtext #1314 99557ef

Reviewed By: parmeet Differential Revision: D28683381 fbshipit-source-id: 7bfbf445dd512f0ce21c34096cf3f08332d90138
pytorch · May 25, 2021 · e9d7593 · e9d7593
1 parent 0c55dd9
commit e9d7593
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 4 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -44,7 +44,9 @@ commands:
  steps:
  - run:
  name: Generate CCI cache key
- command: echo "$(date "+%D")" > .cachekey
+ command:
+ echo "$(date "+%D")" > .cachekey
+ cat cached_datasets_list.txt >> .cachekey
  - persist_to_workspace:
  root: .
  paths:

diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -44,7 +44,9 @@ commands:
  steps:
  - run:
  name: Generate CCI cache key
- command: echo "$(date "+%D")" > .cachekey
+ command:
+ echo "$(date "+%D")" > .cachekey
+ cat cached_datasets_list.txt >> .cachekey
  - persist_to_workspace:
  root: .
  paths:

diff --git a/test/common/cache_utils.py b/test/common/cache_utils.py
@@ -9,11 +9,14 @@
 def check_cache_status():
  assert os.path.exists(CACHE_STATUS_FILE), "Cache status file does not exists"
  with open(CACHE_STATUS_FILE, 'r') as f:
+ missing_datasets = []
  cache_status = json.load(f)
  for dataset_name in cache_status:
  for split in cache_status[dataset_name]:
  if cache_status[dataset_name][split]['status'] == "fail":
- raise FileNotFoundError("Failing all raw dataset unit tests as cache is missing atleast one raw dataset")
+ missing_datasets.append(dataset_name + '_' + split)
+ if missing_datasets:
+ raise FileNotFoundError("Failing all raw dataset unit tests as cache is missing {} datasets".format(missing_datasets))
 
 
 def generate_data_cache():
@@ -30,7 +33,7 @@ def generate_data_cache():
  if dataset_name not in cache_status:
  cache_status[dataset_name] = {}
  try:
- if dataset_name == "Multi30k" or dataset_name == 'WMT14':
+ if dataset_name == 'WMT14':
  _ = torchtext.experimental.datasets.raw.DATASETS[dataset_name](split=split)
  else:
  _ = torchtext.datasets.DATASETS[dataset_name](split=split)