diff --git a/jiant/proj/main/modeling/taskmodels.py b/jiant/proj/main/modeling/taskmodels.py index 9de891469..f656229d9 100644 --- a/jiant/proj/main/modeling/taskmodels.py +++ b/jiant/proj/main/modeling/taskmodels.py @@ -46,7 +46,7 @@ def __init__(self, encoder, regression_head: heads.RegressionHead): def forward(self, batch, task, tokenizer, compute_loss: bool = False): encoder_output = get_output_from_encoder_and_batch(encoder=self.encoder, batch=batch) - # TODO: Abuse of notation - these aren't really logits (Issue #45) + # TODO: Abuse of notation - these aren't really logits (issue #1187) logits = self.regression_head(pooled=encoder_output.pooled) if compute_loss: loss_fct = nn.MSELoss() @@ -247,9 +247,9 @@ def forward(self, batch, task, tokenizer, compute_loss: bool = False): else: raise TypeError(type(self.pooler_head)) - # TODO: Abuse of notation - these aren't really logits (Issue #45) + # TODO: Abuse of notation - these aren't really logits (issue #1187) if compute_loss: - # TODO: make this optional? (Issue #45) + # TODO: make this optional? (issue #1187) return LogitsAndLossOutput( logits=logits, loss=torch.tensor([0.0]), # This is a horrible hack diff --git a/jiant/proj/main/preprocessing.py b/jiant/proj/main/preprocessing.py index 31c738389..e1f69fca4 100644 --- a/jiant/proj/main/preprocessing.py +++ b/jiant/proj/main/preprocessing.py @@ -39,7 +39,7 @@ def smart_truncate(dataset: torch_utils.ListDataset, max_seq_length: int, verbos range_idx = np.arange(max_seq_length) for datum in dataset.data: # TODO: document why reshape and max happen here (for cola this isn't necessary). - # (Issue #47) + # (issue #1185) indexer = datum["data_row"].input_mask.reshape(-1, max_seq_length).max(-2) valid_length_ls.append(range_idx[indexer.astype(bool)].max() + 1) max_valid_length = max(valid_length_ls) @@ -107,7 +107,7 @@ def convert_examples_to_dataset( Args: task (Task): Task object examples (list[Example]): list of task Examples. - tokenizer: TODO (Issue #44) + tokenizer: TODO (issue #1188) feat_spec (FeaturizationSpec): Tokenization-related metadata. phase (str): string identifying the data subset (e.g., train, val or test). verbose: If True, display progress bar. @@ -157,7 +157,7 @@ def tokenize_and_featurize( Args: task (Task): Task object examples (list[Example]): list of task Examples. - tokenizer: TODO (Issue #44) + tokenizer: TODO (issue #1188) feat_spec (FeaturizationSpec): Tokenization-related metadata. phase (str): string identifying the data subset (e.g., train, val or test). verbose: If True, display progress bar. @@ -166,7 +166,7 @@ def tokenize_and_featurize( List DataRows containing tokenized and featurized examples. """ - # TODO: Better solution (Issue #48) + # TODO: Better solution (issue #1184) if task.TASK_TYPE == TaskTypes.SQUAD_STYLE_QA: data_rows = [] for example in maybe_tqdm(examples, desc="Tokenizing", verbose=verbose): @@ -193,7 +193,7 @@ def iter_chunk_tokenize_and_featurize( Args: task (Task): Task object examples (list[Example]): list of task Examples. - tokenizer: TODO (Issue #44) + tokenizer: TODO (issue #1188) feat_spec (FeaturizationSpec): Tokenization-related metadata. phase (str): string identifying the data subset (e.g., train, val or test). verbose: If True, display progress bar. @@ -203,7 +203,7 @@ def iter_chunk_tokenize_and_featurize( """ for example in maybe_tqdm(examples, desc="Tokenizing", verbose=verbose): - # TODO: Better solution (Issue #48) + # TODO: Better solution (issue #1184) if task.TASK_TYPE == TaskTypes.SQUAD_STYLE_QA: yield from example.to_feature_list( tokenizer=tokenizer, diff --git a/jiant/proj/main/runner.py b/jiant/proj/main/runner.py index 623a21418..dada9655f 100644 --- a/jiant/proj/main/runner.py +++ b/jiant/proj/main/runner.py @@ -225,7 +225,7 @@ def complex_backpropagate(self, loss, gradient_accumulation_steps): ) def get_runner_state(self): - # TODO: Add fp16 (Issue #46) + # TODO: Add fp16 (issue #1186) state = { "model": torch_utils.get_model_for_saving(self.jiant_model).state_dict(), "optimizer": self.optimizer_scheduler.optimizer.state_dict(), diff --git a/jiant/proj/main/tokenize_and_cache.py b/jiant/proj/main/tokenize_and_cache.py index b84b32ece..9fe19bdf6 100644 --- a/jiant/proj/main/tokenize_and_cache.py +++ b/jiant/proj/main/tokenize_and_cache.py @@ -39,7 +39,7 @@ def chunk_and_save(task, phase, examples, feat_spec, tokenizer, args: RunConfigu phase (str): string identifying the data subset (e.g., train, val or test). examples (list[Example]): list of task Examples. feat_spec: (FeaturizationSpec): Tokenization-related metadata. - tokenizer: TODO (Issue #44) + tokenizer: TODO (issue #1188) args (RunConfiguration): run configuration object. """ @@ -71,7 +71,7 @@ def full_chunk_and_save(task, phase, examples, feat_spec, tokenizer, args: RunCo phase (str): string identifying the data subset (e.g., train, val or test). examples (list[Example]): list of task Examples. feat_spec: (FeaturizationSpec): Tokenization-related metadata. - tokenizer: TODO (Issue #44) + tokenizer: TODO (issue #1188) args (RunConfiguration): run configuration object. """ @@ -108,7 +108,7 @@ def iter_chunk_and_save(task, phase, examples, feat_spec, tokenizer, args: RunCo phase (str): string identifying the data subset (e.g., train, val or test). examples (list[Example]): list of task Examples. feat_spec: (FeaturizationSpec): Tokenization-related metadata. - tokenizer: TODO (Issue #44) + tokenizer: TODO (issue #1188) args (RunConfiguration): run configuration object. """ diff --git a/jiant/proj/simple/runscript.py b/jiant/proj/simple/runscript.py index 9b4a592a7..0db080999 100644 --- a/jiant/proj/simple/runscript.py +++ b/jiant/proj/simple/runscript.py @@ -153,7 +153,7 @@ def run_simple(args: RunConfiguration): ), output_dir=os.path.join(args.exp_dir, "cache", task_name), phases=phases_to_do, - # TODO: Need a strategy for task-specific max_seq_length issues (Issue #66) + # TODO: Need a strategy for task-specific max_seq_length issues (issue #1176) max_seq_length=args.max_seq_length, smart_truncate=True, do_iter=True, diff --git a/jiant/scripts/benchmarks/xtreme/xtreme_submission.py b/jiant/scripts/benchmarks/xtreme/xtreme_submission.py index 45d723b02..d07c92194 100644 --- a/jiant/scripts/benchmarks/xtreme/xtreme_submission.py +++ b/jiant/scripts/benchmarks/xtreme/xtreme_submission.py @@ -45,7 +45,7 @@ class RunConfiguration(zconf.RunConfig): # === Nuisance Parameters === # # Required for quickly setting up runner - # Remove/refactor with config refactor (Issue #66) + # Remove/refactor with config refactor (issue #1176) learning_rate = zconf.attr(default=1e-5, type=float) adam_epsilon = zconf.attr(default=1e-8, type=float) max_grad_norm = zconf.attr(default=1.0, type=float) diff --git a/jiant/shared/runner.py b/jiant/shared/runner.py index f1c67c6b3..4ac21feac 100644 --- a/jiant/shared/runner.py +++ b/jiant/shared/runner.py @@ -31,7 +31,7 @@ def complex_backpropagate( def get_train_dataloader_from_cache( train_cache: caching.ChunkedFilesDataCache, task, train_batch_size: int ): - # TODO: Expose buffer_size parameter (Issue #50) + # TODO: Expose buffer_size parameter (issue #1183) dataset = train_cache.get_iterable_dataset(buffer_size=10000, shuffle=True) train_dataloader = torch_utils.DataLoaderWithLength( dataset=dataset, batch_size=train_batch_size, collate_fn=task.collate_fn, diff --git a/jiant/tasks/evaluate/core.py b/jiant/tasks/evaluate/core.py index 65733904d..2bae0b809 100644 --- a/jiant/tasks/evaluate/core.py +++ b/jiant/tasks/evaluate/core.py @@ -908,7 +908,7 @@ def compute_metrics_from_preds_and_labels(cls, preds, labels): def get_evaluation_scheme_for_task(task) -> BaseEvaluationScheme: - # TODO: move logic to task? (Issue #52) + # TODO: move logic to task? (issue #1182) if isinstance( task, ( diff --git a/jiant/tasks/lib/templates/hacky_tokenization_matching.py b/jiant/tasks/lib/templates/hacky_tokenization_matching.py index ca1fdd7b9..96ed3be64 100644 --- a/jiant/tasks/lib/templates/hacky_tokenization_matching.py +++ b/jiant/tasks/lib/templates/hacky_tokenization_matching.py @@ -1,4 +1,4 @@ -"""TODO: Remove when Tokenizers gets better (Issue #43)""" +"""TODO: Remove when Tokenizers gets better (issue #1189)""" import transformers from jiant.tasks.utils import ExclusiveSpan @@ -83,7 +83,7 @@ def roberta_flat_strip(tokens, return_indices=False): def xlm_roberta_flat_strip(tokens, return_indices=False): - # TODO: Refactor to use general SentencePiece function (Issue #53) + # TODO: Refactor to use general SentencePiece function (issue #1181) return albert_flat_strip(tokens=tokens, return_indices=return_indices) diff --git a/jiant/tasks/retrieval.py b/jiant/tasks/retrieval.py index 6e6aca8d2..51d3855d9 100644 --- a/jiant/tasks/retrieval.py +++ b/jiant/tasks/retrieval.py @@ -147,7 +147,7 @@ def create_task_from_config(config: dict, base_path: Optional[str] = None, verbo task_class = get_task_class(config["task"]) for k in config["paths"].keys(): path = config["paths"][k] - # TODO: Refactor paths (Issue #54) + # TODO: Refactor paths (issue #1180) if isinstance(path, str) and not os.path.isabs(path): assert base_path config["paths"][k] = os.path.join(base_path, path) diff --git a/jiant/utils/torch_utils.py b/jiant/utils/torch_utils.py index 98b7b65b0..81404f4c1 100644 --- a/jiant/utils/torch_utils.py +++ b/jiant/utils/torch_utils.py @@ -94,7 +94,7 @@ def get_only_requires_grad(parameters, requires_grad=True): elif isinstance(parameters, dict): return {n: p for n, p in parameters if p.requires_grad == requires_grad} else: - # TODO: Support generators (Issue #56) + # TODO: Support generators (issue #1178) raise RuntimeError("generators not yet supported") @@ -111,7 +111,7 @@ def __getitem__(self, item): class DataLoaderWithLength(DataLoader): def __len__(self): - # TODO: Revert after https://github.com/pytorch/pytorch/issues/36176 addressed (Issue #55) + # TODO: Revert after https://github.com/pytorch/pytorch/issues/36176 addressed (issue #1179) # try: # return super().__len__() # except TypeError as e: diff --git a/jiant/utils/zconf/core.py b/jiant/utils/zconf/core.py index 68b0d3690..70eee36ae 100644 --- a/jiant/utils/zconf/core.py +++ b/jiant/utils/zconf/core.py @@ -58,7 +58,7 @@ def argparse_attr( def update_parser(parser, class_with_attributes: Any): - # TODO: Write more checks/tests for the parser creation in general (Issue #57) + # TODO: Write more checks/tests for the parser creation in general (issue #1177) for attribute in class_with_attributes.__attrs_attrs__: if "argparse_kwargs" in attribute.metadata: argparse_kwargs = attribute.metadata["argparse_kwargs"]