Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

[SeeKeR] Fix Tests #4449

Merged
merged 13 commits into from
Mar 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,26 +222,26 @@ commands:
- setupcuda
- fixgit
- restore_cache:
key: deps-20220227-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
key: deps-20220328-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
- setup
- installdeps
- << parameters.more_installs >>
- save_cache:
key: deps-20220227-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
key: deps-20220328-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
paths:
- "~/venv/bin"
- "~/venv/lib"
- findtests:
marker: << parameters.marker >>
- restore_cache:
key: data-20220227-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
key: data-20220328-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
- run:
name: Run tests
no_output_timeout: 60m
command: |
coverage run -m pytest -m << parameters.marker >> << parameters.pytest_flags >> --junitxml=test-results/junit.xml
- save_cache:
key: data-20220227-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
key: data-20220328-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
paths:
- "~/ParlAI/data"
- codecov
Expand All @@ -258,12 +258,12 @@ commands:
- checkout
- fixgit
- restore_cache:
key: deps-20220227-bw-{{ checksum "requirements.txt" }}
key: deps-20220328-bw-{{ checksum "requirements.txt" }}
- setup
- installdeps
- installtorchgpu
- save_cache:
key: deps-20220227-bw-{{ checksum "requirements.txt" }}
key: deps-20220328-bw-{{ checksum "requirements.txt" }}
paths:
- "~/venv/bin"
- "~/venv/lib"
Expand Down
52 changes: 48 additions & 4 deletions parlai/tasks/natural_questions/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import List, Optional, Tuple

from parlai.core.teachers import ChunkTeacher, DialogTeacher
from .build import build, DATASET_NAME_LOCAL
from .build import build, DATASET_NAME_LOCAL, build_sample
from .build_open import build as build_
from .utils.text_utils import simplify_nq_example

Expand Down Expand Up @@ -195,6 +195,18 @@ def _get_short_answers(self, example):
short_answers.append(annotation['yes_no_answer'])
return short_answers

def _get_fname(self, chunk_idx: int) -> str:
"""
Get the filname of the data chunk.

:param chunk_idx:
which chunk to get

:return chunk_name:
return the chunk fname
"""
return f'nq-{self.dtype}-{str(chunk_idx).zfill(2)}.jsonl'

def load_from_chunk(self, chunk_idx: int):
"""
Loads from a chunk of the dataset, given the chunk index.
Expand All @@ -213,7 +225,7 @@ def _extract_labels_indices(example, candidate_labels):
labels.append(candidate_labels[label_ind])
return labels

fname = f'nq-{self.dtype}-{str(chunk_idx).zfill(2)}.jsonl'
fname = self._get_fname(chunk_idx)
fpath = os.path.join(self.dpath, fname)
output = []
with jsonlines.open(fpath, 'r') as fi:
Expand Down Expand Up @@ -256,6 +268,38 @@ def create_message(self, example_components, entry_idx=0):
return message_dict


class NaturalQuestionsSampleTeacher(NaturalQuestionsTeacher):
"""
Loads the NQ Sample data for testing purposes.
"""

def __init__(self, opt, shared=None):
build_sample(opt)
self.use_html = opt.get('use_html', False)
self.use_long_answer = opt.get('use_long_answer', False)
self.use_context = opt.get('use_context', False)
self.id = 'natural_questions'
self.opt = copy.deepcopy(opt)
self.dtype = DatatypeHelper.fold(self.opt['datatype'])
if self.dtype == 'test':
logging.error("No test split for this teacher; overriding to valid")
self.dtype = 'valid'
self.dpath = os.path.join(
self.opt['datapath'], f"{DATASET_NAME_LOCAL}_sample", self.dtype
)
self.n_samples = None
ChunkTeacher.__init__(self, self.opt, shared)

def _get_fname(self, chunk_idx: int) -> str:
return f'nq-{self.dtype}-sample.jsonl'

def get_fold_chunks(self, opt) -> List[int]:
return list(range(1))

def get_num_samples(self, opt) -> Tuple[int, int]:
return (200, 200)


class InMetric(AverageMetric):
@staticmethod
def compute(guess: str, answers: List[str]) -> Optional["InMetric"]:
Expand All @@ -271,7 +315,7 @@ def compute(guess: str, answers: List[str]) -> Optional["InMetric"]:
class NaturalQuestionsOpenTeacher(DialogTeacher):
def __init__(self, opt: Opt, shared=None):
self.fold = opt["datatype"].split(":")[0]
self.dpath = os.path.join(opt["datapath"], "NaturalQuestions_retrieval")
self.dpath = os.path.join(opt["datapath"], "NaturalQuestionsOpen")
self.opt = opt
self.opt['datafile'] = os.path.join(self.dpath, self.fold + ".csv")
if shared is None:
Expand All @@ -283,7 +327,7 @@ def add_cmdline_args(
cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None
) -> ParlaiParser:
super().add_cmdline_args(parser, partial_opt)
group = parser.add_argument_group("Natural Questions retrieval")
group = parser.add_argument_group("Natural Questions Open")
group.add_argument(
"--normalize-everything",
default=False,
Expand Down
21 changes: 16 additions & 5 deletions parlai/tasks/natural_questions/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _import_google_cloud_client():
return storage


def _download_with_cloud_storage_client(dpath):
def _download_with_cloud_storage_client(dpath, sample: bool = False):
# Initiating the Cloud Storage Client with anonymous credentials
stm = _import_google_cloud_client()
storage_client = stm.Client.create_anonymous_client()
Expand All @@ -54,9 +54,14 @@ def _download_blobs_from_list(blobs_list, target_path):
if not blob_name.endswith('.gz'): # Not a zipped file
continue

if blob_name.startswith('v1.0/train'):
if sample and blob_name.startswith('v1.0/sample'):
if 'train' in blob_name:
train_blobs.append(blob)
else:
valid_blobs.append(blob)
elif not sample and blob_name.startswith('v1.0/train'):
train_blobs.append(blob)
elif blob_name.startswith('v1.0/dev'):
elif not sample and blob_name.startswith('v1.0/dev'):
valid_blobs.append(blob)

# Downloading the blobs to their respective dtype directory
Expand Down Expand Up @@ -94,8 +99,10 @@ def _move_valid_files_from_dev_to_valid(dpath):
os.rename(os.path.join(valid_path, f), os.path.join(valid_path, new))


def build(opt):
def build(opt, sample: bool = False):
dpath = os.path.join(opt['datapath'], DATASET_NAME_LOCAL)
if sample:
dpath = f"{dpath}_sample"
version = 'v1.0'

if not build_data.built(dpath, version_string=version):
Expand All @@ -105,7 +112,11 @@ def build(opt):
build_data.remove_dir(dpath)
logging.info('Removed the existing data (old version).')
build_data.make_dir(dpath)
_download_with_cloud_storage_client(dpath)
_download_with_cloud_storage_client(dpath, sample)
_untar_dataset_files(dpath)
_move_valid_files_from_dev_to_valid(dpath)
build_data.mark_done(dpath, version_string=version)


def build_sample(opt):
build(opt, True)
10 changes: 4 additions & 6 deletions parlai/tasks/natural_questions/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from parlai.utils.testing import AutoTeacherTest # noqa: F401
from parlai.utils.testing import AutoTeacherTest


class TestNaturalQuestionsTeacher(AutoTeacherTest):
task = 'natural_questions' # replace with your teacher name
class TestNaturalQuestionsSampleTeacher(AutoTeacherTest):
task = 'natural_questions:NaturalQuestionsSampleTeacher'


class TestNaturalQuestionsOpenTeacher(AutoTeacherTest):
task = (
'natural_questions:NaturalQuestionsOpenTeacher'
) # replace with your teacher name
task = 'natural_questions:NaturalQuestionsOpenTeacher'
Original file line number Diff line number Diff line change
Expand Up @@ -127,5 +127,5 @@ acts:
id: natural_questions:NaturalQuestionsOpenTeacher
text: what is the smallest prime number that is greater than 30
title: List of prime numbers
num_episodes: 1223
num_examples: 1223
num_episodes: 8757
num_examples: 8757
Loading