diff --git a/jiant/proj/main/modeling/taskmodels.py b/jiant/proj/main/modeling/taskmodels.py
index 9de891469..f656229d9 100644
--- a/jiant/proj/main/modeling/taskmodels.py
+++ b/jiant/proj/main/modeling/taskmodels.py
@@ -46,7 +46,7 @@ def __init__(self, encoder, regression_head: heads.RegressionHead):
 
     def forward(self, batch, task, tokenizer, compute_loss: bool = False):
         encoder_output = get_output_from_encoder_and_batch(encoder=self.encoder, batch=batch)
-        # TODO: Abuse of notation - these aren't really logits  (Issue #45)
+        # TODO: Abuse of notation - these aren't really logits  (issue #1187)
         logits = self.regression_head(pooled=encoder_output.pooled)
         if compute_loss:
             loss_fct = nn.MSELoss()
@@ -247,9 +247,9 @@ def forward(self, batch, task, tokenizer, compute_loss: bool = False):
         else:
             raise TypeError(type(self.pooler_head))
 
-        # TODO: Abuse of notation - these aren't really logits  (Issue #45)
+        # TODO: Abuse of notation - these aren't really logits  (issue #1187)
         if compute_loss:
-            # TODO: make this optional?   (Issue #45)
+            # TODO: make this optional?   (issue #1187)
             return LogitsAndLossOutput(
                 logits=logits,
                 loss=torch.tensor([0.0]),  # This is a horrible hack
diff --git a/jiant/proj/main/preprocessing.py b/jiant/proj/main/preprocessing.py
index 31c738389..e1f69fca4 100644
--- a/jiant/proj/main/preprocessing.py
+++ b/jiant/proj/main/preprocessing.py
@@ -39,7 +39,7 @@ def smart_truncate(dataset: torch_utils.ListDataset, max_seq_length: int, verbos
     range_idx = np.arange(max_seq_length)
     for datum in dataset.data:
         # TODO: document why reshape and max happen here (for cola this isn't necessary).
-        #       (Issue #47)
+        #       (issue #1185)
         indexer = datum["data_row"].input_mask.reshape(-1, max_seq_length).max(-2)
         valid_length_ls.append(range_idx[indexer.astype(bool)].max() + 1)
     max_valid_length = max(valid_length_ls)
@@ -107,7 +107,7 @@ def convert_examples_to_dataset(
     Args:
         task (Task): Task object
         examples (list[Example]): list of task Examples.
-        tokenizer: TODO  (Issue #44)
+        tokenizer: TODO  (issue #1188)
         feat_spec (FeaturizationSpec): Tokenization-related metadata.
         phase (str): string identifying the data subset (e.g., train, val or test).
         verbose: If True, display progress bar.
@@ -157,7 +157,7 @@ def tokenize_and_featurize(
     Args:
         task (Task): Task object
         examples (list[Example]): list of task Examples.
-        tokenizer: TODO  (Issue #44)
+        tokenizer: TODO  (issue #1188)
         feat_spec (FeaturizationSpec): Tokenization-related metadata.
         phase (str): string identifying the data subset (e.g., train, val or test).
         verbose: If True, display progress bar.
@@ -166,7 +166,7 @@ def tokenize_and_featurize(
         List DataRows containing tokenized and featurized examples.
 
     """
-    # TODO: Better solution  (Issue #48)
+    # TODO: Better solution  (issue #1184)
     if task.TASK_TYPE == TaskTypes.SQUAD_STYLE_QA:
         data_rows = []
         for example in maybe_tqdm(examples, desc="Tokenizing", verbose=verbose):
@@ -193,7 +193,7 @@ def iter_chunk_tokenize_and_featurize(
     Args:
         task (Task): Task object
         examples (list[Example]): list of task Examples.
-        tokenizer: TODO  (Issue #44)
+        tokenizer: TODO  (issue #1188)
         feat_spec (FeaturizationSpec): Tokenization-related metadata.
         phase (str): string identifying the data subset (e.g., train, val or test).
         verbose: If True, display progress bar.
@@ -203,7 +203,7 @@ def iter_chunk_tokenize_and_featurize(
 
     """
     for example in maybe_tqdm(examples, desc="Tokenizing", verbose=verbose):
-        # TODO: Better solution  (Issue #48)
+        # TODO: Better solution  (issue #1184)
         if task.TASK_TYPE == TaskTypes.SQUAD_STYLE_QA:
             yield from example.to_feature_list(
                 tokenizer=tokenizer,
diff --git a/jiant/proj/main/runner.py b/jiant/proj/main/runner.py
index 623a21418..dada9655f 100644
--- a/jiant/proj/main/runner.py
+++ b/jiant/proj/main/runner.py
@@ -225,7 +225,7 @@ def complex_backpropagate(self, loss, gradient_accumulation_steps):
         )
 
     def get_runner_state(self):
-        # TODO: Add fp16  (Issue #46)
+        # TODO: Add fp16  (issue #1186)
         state = {
             "model": torch_utils.get_model_for_saving(self.jiant_model).state_dict(),
             "optimizer": self.optimizer_scheduler.optimizer.state_dict(),
diff --git a/jiant/proj/main/tokenize_and_cache.py b/jiant/proj/main/tokenize_and_cache.py
index b84b32ece..9fe19bdf6 100644
--- a/jiant/proj/main/tokenize_and_cache.py
+++ b/jiant/proj/main/tokenize_and_cache.py
@@ -39,7 +39,7 @@ def chunk_and_save(task, phase, examples, feat_spec, tokenizer, args: RunConfigu
         phase (str): string identifying the data subset (e.g., train, val or test).
         examples (list[Example]): list of task Examples.
         feat_spec: (FeaturizationSpec): Tokenization-related metadata.
-        tokenizer: TODO  (Issue #44)
+        tokenizer: TODO  (issue #1188)
         args (RunConfiguration): run configuration object.
 
     """
@@ -71,7 +71,7 @@ def full_chunk_and_save(task, phase, examples, feat_spec, tokenizer, args: RunCo
         phase (str): string identifying the data subset (e.g., train, val or test).
         examples (list[Example]): list of task Examples.
         feat_spec: (FeaturizationSpec): Tokenization-related metadata.
-        tokenizer: TODO  (Issue #44)
+        tokenizer: TODO  (issue #1188)
         args (RunConfiguration): run configuration object.
 
     """
@@ -108,7 +108,7 @@ def iter_chunk_and_save(task, phase, examples, feat_spec, tokenizer, args: RunCo
         phase (str): string identifying the data subset (e.g., train, val or test).
         examples (list[Example]): list of task Examples.
         feat_spec: (FeaturizationSpec): Tokenization-related metadata.
-        tokenizer: TODO  (Issue #44)
+        tokenizer: TODO  (issue #1188)
         args (RunConfiguration): run configuration object.
 
     """
diff --git a/jiant/proj/simple/runscript.py b/jiant/proj/simple/runscript.py
index 9b4a592a7..0db080999 100644
--- a/jiant/proj/simple/runscript.py
+++ b/jiant/proj/simple/runscript.py
@@ -153,7 +153,7 @@ def run_simple(args: RunConfiguration):
                     ),
                     output_dir=os.path.join(args.exp_dir, "cache", task_name),
                     phases=phases_to_do,
-                    # TODO: Need a strategy for task-specific max_seq_length issues (Issue #66)
+                    # TODO: Need a strategy for task-specific max_seq_length issues (issue #1176)
                     max_seq_length=args.max_seq_length,
                     smart_truncate=True,
                     do_iter=True,
diff --git a/jiant/scripts/benchmarks/xtreme/xtreme_submission.py b/jiant/scripts/benchmarks/xtreme/xtreme_submission.py
index 45d723b02..d07c92194 100644
--- a/jiant/scripts/benchmarks/xtreme/xtreme_submission.py
+++ b/jiant/scripts/benchmarks/xtreme/xtreme_submission.py
@@ -45,7 +45,7 @@ class RunConfiguration(zconf.RunConfig):
 
     # === Nuisance Parameters === #
     # Required for quickly setting up runner
-    # Remove/refactor with config refactor (Issue #66)
+    # Remove/refactor with config refactor (issue #1176)
     learning_rate = zconf.attr(default=1e-5, type=float)
     adam_epsilon = zconf.attr(default=1e-8, type=float)
     max_grad_norm = zconf.attr(default=1.0, type=float)
diff --git a/jiant/shared/runner.py b/jiant/shared/runner.py
index f1c67c6b3..4ac21feac 100644
--- a/jiant/shared/runner.py
+++ b/jiant/shared/runner.py
@@ -31,7 +31,7 @@ def complex_backpropagate(
 def get_train_dataloader_from_cache(
     train_cache: caching.ChunkedFilesDataCache, task, train_batch_size: int
 ):
-    # TODO: Expose buffer_size parameter  (Issue #50)
+    # TODO: Expose buffer_size parameter  (issue #1183)
     dataset = train_cache.get_iterable_dataset(buffer_size=10000, shuffle=True)
     train_dataloader = torch_utils.DataLoaderWithLength(
         dataset=dataset, batch_size=train_batch_size, collate_fn=task.collate_fn,
diff --git a/jiant/tasks/evaluate/core.py b/jiant/tasks/evaluate/core.py
index 65733904d..2bae0b809 100644
--- a/jiant/tasks/evaluate/core.py
+++ b/jiant/tasks/evaluate/core.py
@@ -908,7 +908,7 @@ def compute_metrics_from_preds_and_labels(cls, preds, labels):
 
 
 def get_evaluation_scheme_for_task(task) -> BaseEvaluationScheme:
-    # TODO: move logic to task?  (Issue #52)
+    # TODO: move logic to task?  (issue #1182)
     if isinstance(
         task,
         (
diff --git a/jiant/tasks/lib/templates/hacky_tokenization_matching.py b/jiant/tasks/lib/templates/hacky_tokenization_matching.py
index ca1fdd7b9..96ed3be64 100644
--- a/jiant/tasks/lib/templates/hacky_tokenization_matching.py
+++ b/jiant/tasks/lib/templates/hacky_tokenization_matching.py
@@ -1,4 +1,4 @@
-"""TODO: Remove when Tokenizers gets better  (Issue #43)"""
+"""TODO: Remove when Tokenizers gets better  (issue #1189)"""
 import transformers
 from jiant.tasks.utils import ExclusiveSpan
 
@@ -83,7 +83,7 @@ def roberta_flat_strip(tokens, return_indices=False):
 
 
 def xlm_roberta_flat_strip(tokens, return_indices=False):
-    # TODO: Refactor to use general SentencePiece function  (Issue #53)
+    # TODO: Refactor to use general SentencePiece function  (issue #1181)
     return albert_flat_strip(tokens=tokens, return_indices=return_indices)
 
 
diff --git a/jiant/tasks/retrieval.py b/jiant/tasks/retrieval.py
index 6e6aca8d2..51d3855d9 100644
--- a/jiant/tasks/retrieval.py
+++ b/jiant/tasks/retrieval.py
@@ -147,7 +147,7 @@ def create_task_from_config(config: dict, base_path: Optional[str] = None, verbo
     task_class = get_task_class(config["task"])
     for k in config["paths"].keys():
         path = config["paths"][k]
-        # TODO: Refactor paths  (Issue #54)
+        # TODO: Refactor paths  (issue #1180)
         if isinstance(path, str) and not os.path.isabs(path):
             assert base_path
             config["paths"][k] = os.path.join(base_path, path)
diff --git a/jiant/utils/torch_utils.py b/jiant/utils/torch_utils.py
index 98b7b65b0..81404f4c1 100644
--- a/jiant/utils/torch_utils.py
+++ b/jiant/utils/torch_utils.py
@@ -94,7 +94,7 @@ def get_only_requires_grad(parameters, requires_grad=True):
     elif isinstance(parameters, dict):
         return {n: p for n, p in parameters if p.requires_grad == requires_grad}
     else:
-        # TODO: Support generators  (Issue #56)
+        # TODO: Support generators  (issue #1178)
         raise RuntimeError("generators not yet supported")
 
 
@@ -111,7 +111,7 @@ def __getitem__(self, item):
 
 class DataLoaderWithLength(DataLoader):
     def __len__(self):
-        # TODO: Revert after https://github.com/pytorch/pytorch/issues/36176 addressed  (Issue #55)
+        # TODO: Revert after https://github.com/pytorch/pytorch/issues/36176 addressed (issue #1179)
         # try:
         #     return super().__len__()
         # except TypeError as e:
diff --git a/jiant/utils/zconf/core.py b/jiant/utils/zconf/core.py
index 68b0d3690..70eee36ae 100644
--- a/jiant/utils/zconf/core.py
+++ b/jiant/utils/zconf/core.py
@@ -58,7 +58,7 @@ def argparse_attr(
 
 
 def update_parser(parser, class_with_attributes: Any):
-    # TODO: Write more checks/tests for the parser creation in general  (Issue #57)
+    # TODO: Write more checks/tests for the parser creation in general  (issue #1177)
     for attribute in class_with_attributes.__attrs_attrs__:
         if "argparse_kwargs" in attribute.metadata:
             argparse_kwargs = attribute.metadata["argparse_kwargs"]