From e0a2da2dc782f8402ca7314d20dd9c054dae44be Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Tue, 17 Dec 2019 18:24:39 -0500
Subject: [PATCH 01/12] Adding BERT for MS-MARCO passage re-ranking pretrained
 model

---
 texar/torch/modules/pretrained/bert.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index 5f3de240e..eaf78ceea 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -34,6 +34,7 @@
 _BIOBERT_PATH = "https://github.com/naver/biobert-pretrained/releases/download/"
 _SCIBERT_PATH = "https://s3-us-west-2.amazonaws.com/ai2-s2-research/" \
                 "scibert/tensorflow_models/"
+_BERT_MSMARCO_PATH = "https://drive.google.com/file/d/"
 
 
 class PretrainedBERTMixin(PretrainedMixin, ABC):
@@ -97,6 +98,16 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
         * ``scibert-basevocab-cased``: Cased version of the model trained on
           the original BERT vocabulary.
 
+    * **BERT for MS-MARCO**: proposed in (`Nogueira et al`. 2019)
+      `Passage Re-ranking with BERT`_. A BERT model fine-tuned on MS-MARCO
+      (Nguyen et al., 2016) dataset. It's the best performing model (on Jan 8th
+      2019) on MS-MARCO Passage re-ranking task. Two models are included:
+
+        * ``bert-msmarco-base``: Original BERT base model fine-tuned on
+          MS-MARCO.
+        * ``bert-msmarco-large``: Original BERT large model fine-tuned on
+          MS-MARCO.
+
     We provide the following BERT classes:
 
       * :class:`~texar.torch.modules.BERTEncoder` for text encoding.
@@ -111,6 +122,9 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
 
     .. _`SciBERT: A Pretrained Language Model for Scientific Text`:
         https://arxiv.org/abs/1903.10676
+
+    .. _`BERT for MS-MARCO: Passage re-ranking with BERT`:
+        https://arxiv.org/abs/1901.04085
     """
 
     _MODEL_NAME = "BERT"
@@ -150,6 +164,12 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
             _SCIBERT_PATH + 'scibert_basevocab_uncased.tar.gz',
         'scibert-basevocab-cased':
             _SCIBERT_PATH + 'scibert_basevocab_cased.tar.gz',
+
+        # BERT for MS-MARCO
+        'bert-msmarco-base':
+            _BERT_MSMARCO_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/view',
+        'bert-msmarco-large':
+            _BERT_MSMARCO_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/view'
     }
     _MODEL2CKPT = {
         # Standard BERT
@@ -172,6 +192,10 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
         'scibert-scivocab-cased': 'bert_model.ckpt',
         'scibert-basevocab-uncased': 'bert_model.ckpt',
         'scibert-basevocab-cased': 'bert_model.ckpt',
+
+        # BERT for MSMARCO
+        'bert-msmarco-base': 'model.ckpt-100000',
+        'bert-msmarco-large': 'model.ckpt-100000',
     }
 
     @classmethod

From 90a06a18900f087f66f4d9396f32ef5d3d1fa2fa Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Fri, 20 Dec 2019 11:05:41 -0500
Subject: [PATCH 02/12] Adding logits layer weights and bias

---
 texar/torch/modules/pretrained/bert.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index eaf78ceea..2271d7f4e 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -167,9 +167,9 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
 
         # BERT for MS-MARCO
         'bert-msmarco-base':
-            _BERT_MSMARCO_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/view',
+            _BERT_MSMARCO_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/',
         'bert-msmarco-large':
-            _BERT_MSMARCO_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/view'
+            _BERT_MSMARCO_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/'
     }
     _MODEL2CKPT = {
         # Standard BERT
@@ -325,7 +325,9 @@ def _init_from_checkpoint(self, pretrained_model_name: str,
         }
         pooler_map = {
             'bert/pooler/dense/bias': 'pooler.0.bias',
-            'bert/pooler/dense/kernel': 'pooler.0.weight'
+            'bert/pooler/dense/kernel': 'pooler.0.weight',
+            'output_bias': '_logits_layer.bias',
+            'output_weights': '_logits_layer.weight',
         }
         tf_path = os.path.abspath(os.path.join(
             cache_dir, self._MODEL2CKPT[pretrained_model_name]))

From 014c558b0d1f0d6f1a5558357400206240df9a1d Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Fri, 20 Dec 2019 12:03:20 -0500
Subject: [PATCH 03/12] Making the PretrainedMixin work for both encoder and
 classifier

---
 .../modules/classifiers/bert_classifier.py    |  4 +++
 texar/torch/modules/pretrained/bert.py        | 26 ++++++++++++++++---
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/texar/torch/modules/classifiers/bert_classifier.py b/texar/torch/modules/classifiers/bert_classifier.py
index 5303e7259..a0dc2f8e8 100644
--- a/texar/torch/modules/classifiers/bert_classifier.py
+++ b/texar/torch/modules/classifiers/bert_classifier.py
@@ -71,6 +71,8 @@ def __init__(self,
 
         super().__init__(hparams=hparams)
 
+        self.load_pretrained_config(pretrained_model_name, cache_dir)
+
         # Create the underlying encoder
         encoder_hparams = dict_fetch(hparams,
                                      self._ENCODER_CLASS.default_hparams())
@@ -120,6 +122,8 @@ def __init__(self,
                          (self.num_classes <= 0 and
                           self._hparams.encoder.dim == 1)
 
+        self.init_pretrained_weights(class_type='classifier')
+
     @staticmethod
     def default_hparams():
         r"""Returns a dictionary of hyperparameters with default values.
diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index 2271d7f4e..5341fd875 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -326,12 +326,20 @@ def _init_from_checkpoint(self, pretrained_model_name: str,
         pooler_map = {
             'bert/pooler/dense/bias': 'pooler.0.bias',
             'bert/pooler/dense/kernel': 'pooler.0.weight',
+        }
+        classifier_map = {
             'output_bias': '_logits_layer.bias',
             'output_weights': '_logits_layer.weight',
         }
+        global_prefix_map = {
+            'classifier': '_encoder.'
+        }
         tf_path = os.path.abspath(os.path.join(
             cache_dir, self._MODEL2CKPT[pretrained_model_name]))
 
+        class_type = kwargs.get('class_type', 'encoder')
+        global_prefix = global_prefix_map.get(class_type, '')
+
         # Load weights from TF model
         init_vars = tf.train.list_variables(tf_path)
         tfnames, arrays = [], []
@@ -351,13 +359,14 @@ def _init_from_checkpoint(self, pretrained_model_name: str,
                 continue
 
             if name in global_tensor_map:
-                v_name = global_tensor_map[name]
+                v_name = global_prefix + global_tensor_map[name]
                 pointer = self._name_to_variable(v_name)
                 assert pointer.shape == array.shape
                 pointer.data = torch.from_numpy(array)
                 idx += 1
             elif name in pooler_map:
-                pointer = self._name_to_variable(pooler_map[name])
+                pointer = self._name_to_variable(global_prefix +
+                                                 pooler_map[name])
                 if name.endswith('bias'):
                     assert pointer.shape == array.shape
                     pointer.data = torch.from_numpy(array)
@@ -367,6 +376,13 @@ def _init_from_checkpoint(self, pretrained_model_name: str,
                     assert pointer.shape == array_t.shape
                     pointer.data = torch.from_numpy(array_t)
                     idx += 1
+            elif name in classifier_map:
+                if class_type != 'classifier':
+                    continue
+                pointer = self._name_to_variable(classifier_map[name])
+                assert pointer.shape == array.shape
+                pointer.data = torch.from_numpy(array)
+                idx += 1
             else:
                 # here name is the TensorFlow variable name
                 name_tmp = name.split("/")
@@ -375,12 +391,14 @@ def _init_from_checkpoint(self, pretrained_model_name: str,
                 name_tmp = "/".join(name_tmp[3:])
                 if name_tmp in layer_tensor_map:
                     v_name = layer_tensor_map[name_tmp].format(layer_no)
-                    pointer = self._name_to_variable(py_prefix + v_name)
+                    pointer = self._name_to_variable(global_prefix +
+                                                     py_prefix + v_name)
                     assert pointer.shape == array.shape
                     pointer.data = torch.from_numpy(array)
                 elif name_tmp in layer_transpose_map:
                     v_name = layer_transpose_map[name_tmp].format(layer_no)
-                    pointer = self._name_to_variable(py_prefix + v_name)
+                    pointer = self._name_to_variable(global_prefix +
+                                                     py_prefix + v_name)
                     array_t = np.transpose(array)
                     assert pointer.shape == array_t.shape
                     pointer.data = torch.from_numpy(array_t)

From 8938520c6032984a788cdd3ba8a2ab5e1669db7a Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Fri, 20 Dec 2019 13:31:27 -0500
Subject: [PATCH 04/12] Adding tokenizer part

---
 texar/torch/data/tokenizers/bert_tokenizer.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/texar/torch/data/tokenizers/bert_tokenizer.py b/texar/torch/data/tokenizers/bert_tokenizer.py
index 1d0f244ff..c5824989c 100644
--- a/texar/torch/data/tokenizers/bert_tokenizer.py
+++ b/texar/torch/data/tokenizers/bert_tokenizer.py
@@ -74,6 +74,10 @@ class BERTTokenizer(PretrainedBERTMixin, TokenizerBase):
         'scibert-scivocab-cased': 512,
         'scibert-basevocab-uncased': 512,
         'scibert-basevocab-cased': 512,
+
+        # BERT for MS-MARCO
+        'bert-msmarco-base': 512,
+        'bert-msmarco-large': 512,
     }
     _VOCAB_FILE_NAMES = {'vocab_file': 'vocab.txt'}
     _VOCAB_FILE_MAP = {
@@ -98,6 +102,10 @@ class BERTTokenizer(PretrainedBERTMixin, TokenizerBase):
             'scibert-scivocab-cased': 'vocab.txt',
             'scibert-basevocab-uncased': 'vocab.txt',
             'scibert-basevocab-cased': 'vocab.txt',
+
+            # BERT for MS-MARCO
+            'bert-msmarco-base': 'vocab.txt',
+            'bert-msmarco-large': 'vocab.txt',
         }
     }
 

From 703896f54b54ed56665d624b8d360b4fe495320e Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Fri, 20 Dec 2019 14:19:12 -0500
Subject: [PATCH 05/12] docstring

---
 texar/torch/modules/pretrained/bert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index 5341fd875..5da028057 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -123,7 +123,7 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
     .. _`SciBERT: A Pretrained Language Model for Scientific Text`:
         https://arxiv.org/abs/1903.10676
 
-    .. _`BERT for MS-MARCO: Passage re-ranking with BERT`:
+    .. _`Passage re-ranking with BERT`:
         https://arxiv.org/abs/1901.04085
     """
 

From e4a97381ecc1a2a582b6f404ca976813e7ed6678 Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Fri, 20 Dec 2019 14:43:00 -0500
Subject: [PATCH 06/12] Spelling

---
 texar/torch/modules/pretrained/bert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index 5da028057..088f8864e 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -123,7 +123,7 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
     .. _`SciBERT: A Pretrained Language Model for Scientific Text`:
         https://arxiv.org/abs/1903.10676
 
-    .. _`Passage re-ranking with BERT`:
+    .. _`Passage Re-ranking with BERT`:
         https://arxiv.org/abs/1901.04085
     """
 

From 67b1951a4cfa5e3dbeff6d5a8267b8695b48b45a Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Fri, 20 Dec 2019 15:17:22 -0500
Subject: [PATCH 07/12] Spelling

---
 texar/torch/modules/pretrained/bert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index 088f8864e..c327601c6 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -100,7 +100,7 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
 
     * **BERT for MS-MARCO**: proposed in (`Nogueira et al`. 2019)
       `Passage Re-ranking with BERT`_. A BERT model fine-tuned on MS-MARCO
-      (Nguyen et al., 2016) dataset. It's the best performing model (on Jan 8th
+      (`Nguyen et al`., 2016) dataset. It's the best performing model (on Jan 8th
       2019) on MS-MARCO Passage re-ranking task. Two models are included:
 
         * ``bert-msmarco-base``: Original BERT base model fine-tuned on

From e8663fd1f22b37e16f478573fa4ffa988929e152 Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Fri, 20 Dec 2019 15:34:03 -0500
Subject: [PATCH 08/12] Lint

---
 texar/torch/modules/pretrained/bert.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index c327601c6..ba3016a9f 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -100,8 +100,8 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
 
     * **BERT for MS-MARCO**: proposed in (`Nogueira et al`. 2019)
       `Passage Re-ranking with BERT`_. A BERT model fine-tuned on MS-MARCO
-      (`Nguyen et al`., 2016) dataset. It's the best performing model (on Jan 8th
-      2019) on MS-MARCO Passage re-ranking task. Two models are included:
+      (`Nguyen et al`., 2016) dataset. It's the best performing model (on Jan
+      8th 2019) on MS-MARCO Passage re-ranking task. Two models are included:
 
         * ``bert-msmarco-base``: Original BERT base model fine-tuned on
           MS-MARCO.

From 31d3e07a478f9fe5b30132bb57fccbf1ea27104f Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Mon, 23 Dec 2019 14:10:24 -0500
Subject: [PATCH 09/12] Changing name

---
 texar/torch/data/tokenizers/bert_tokenizer.py |  8 ++++----
 texar/torch/modules/pretrained/bert.py        | 14 +++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/texar/torch/data/tokenizers/bert_tokenizer.py b/texar/torch/data/tokenizers/bert_tokenizer.py
index c5824989c..f11dab9be 100644
--- a/texar/torch/data/tokenizers/bert_tokenizer.py
+++ b/texar/torch/data/tokenizers/bert_tokenizer.py
@@ -76,8 +76,8 @@ class BERTTokenizer(PretrainedBERTMixin, TokenizerBase):
         'scibert-basevocab-cased': 512,
 
         # BERT for MS-MARCO
-        'bert-msmarco-base': 512,
-        'bert-msmarco-large': 512,
+        'bert-msmarco-nogueira19-base': 512,
+        'bert-msmarco-nogueira19-large': 512,
     }
     _VOCAB_FILE_NAMES = {'vocab_file': 'vocab.txt'}
     _VOCAB_FILE_MAP = {
@@ -104,8 +104,8 @@ class BERTTokenizer(PretrainedBERTMixin, TokenizerBase):
             'scibert-basevocab-cased': 'vocab.txt',
 
             # BERT for MS-MARCO
-            'bert-msmarco-base': 'vocab.txt',
-            'bert-msmarco-large': 'vocab.txt',
+            'bert-msmarco-nogueira19-base': 'vocab.txt',
+            'bert-msmarco-nogueira19-large': 'vocab.txt',
         }
     }
 
diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index ba3016a9f..1b18a722d 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -34,7 +34,7 @@
 _BIOBERT_PATH = "https://github.com/naver/biobert-pretrained/releases/download/"
 _SCIBERT_PATH = "https://s3-us-west-2.amazonaws.com/ai2-s2-research/" \
                 "scibert/tensorflow_models/"
-_BERT_MSMARCO_PATH = "https://drive.google.com/file/d/"
+_BERT_MSMARCO_NOGUEIRA19_PATH = "https://drive.google.com/file/d/"
 
 
 class PretrainedBERTMixin(PretrainedMixin, ABC):
@@ -103,9 +103,9 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
       (`Nguyen et al`., 2016) dataset. It's the best performing model (on Jan
       8th 2019) on MS-MARCO Passage re-ranking task. Two models are included:
 
-        * ``bert-msmarco-base``: Original BERT base model fine-tuned on
+        * ``bert-msmarco-nogueira19-base``: Original BERT base model fine-tuned on
           MS-MARCO.
-        * ``bert-msmarco-large``: Original BERT large model fine-tuned on
+        * ``bert-msmarco-nogueira19-large``: Original BERT large model fine-tuned on
           MS-MARCO.
 
     We provide the following BERT classes:
@@ -167,9 +167,9 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
 
         # BERT for MS-MARCO
         'bert-msmarco-base':
-            _BERT_MSMARCO_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/',
+            _BERT_MSMARCO_NOGUEIRA19_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/',
         'bert-msmarco-large':
-            _BERT_MSMARCO_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/'
+            _BERT_MSMARCO_NOGUEIRA19_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/'
     }
     _MODEL2CKPT = {
         # Standard BERT
@@ -194,8 +194,8 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
         'scibert-basevocab-cased': 'bert_model.ckpt',
 
         # BERT for MSMARCO
-        'bert-msmarco-base': 'model.ckpt-100000',
-        'bert-msmarco-large': 'model.ckpt-100000',
+        'bert-msmarco-nogueira19-base': 'model.ckpt-100000',
+        'bert-msmarco-nogueira19-large': 'model.ckpt-100000',
     }
 
     @classmethod

From 824d947cc4b38306ca4afe675fe9ea1f200d0f83 Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Mon, 23 Dec 2019 14:23:46 -0500
Subject: [PATCH 10/12] Changing name

---
 texar/torch/modules/pretrained/bert.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index 1b18a722d..3a9be7563 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -166,9 +166,9 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
             _SCIBERT_PATH + 'scibert_basevocab_cased.tar.gz',
 
         # BERT for MS-MARCO
-        'bert-msmarco-base':
+        'bert-msmarco-nogueira19-base':
             _BERT_MSMARCO_NOGUEIRA19_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/',
-        'bert-msmarco-large':
+        'bert-msmarco-nogueira19-large':
             _BERT_MSMARCO_NOGUEIRA19_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/'
     }
     _MODEL2CKPT = {

From 7d56607f3cd3e90c4207fa6ebc77c64c73c57156 Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Mon, 23 Dec 2019 15:56:24 -0500
Subject: [PATCH 11/12] Avoiding duplicate downloads

---
 texar/torch/modules/classifiers/bert_classifier.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/texar/torch/modules/classifiers/bert_classifier.py b/texar/torch/modules/classifiers/bert_classifier.py
index a0dc2f8e8..9cc0a1b6f 100644
--- a/texar/torch/modules/classifiers/bert_classifier.py
+++ b/texar/torch/modules/classifiers/bert_classifier.py
@@ -76,9 +76,10 @@ def __init__(self,
         # Create the underlying encoder
         encoder_hparams = dict_fetch(hparams,
                                      self._ENCODER_CLASS.default_hparams())
+        encoder_hparams['pretrained_model_name'] = None
 
         self._encoder = self._ENCODER_CLASS(
-            pretrained_model_name=pretrained_model_name,
+            pretrained_model_name=None,
             cache_dir=cache_dir,
             hparams=encoder_hparams)
 

From 6e3920a2a86c0f340c52289edf849789d6a95e2c Mon Sep 17 00:00:00 2001
From: Atif Ahmed <atif.ahmed@petuum.com>
Date: Thu, 26 Dec 2019 13:58:22 -0500
Subject: [PATCH 12/12] lint

---
 texar/torch/modules/pretrained/bert.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/texar/torch/modules/pretrained/bert.py b/texar/torch/modules/pretrained/bert.py
index 3a9be7563..07db3da44 100644
--- a/texar/torch/modules/pretrained/bert.py
+++ b/texar/torch/modules/pretrained/bert.py
@@ -103,10 +103,10 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
       (`Nguyen et al`., 2016) dataset. It's the best performing model (on Jan
       8th 2019) on MS-MARCO Passage re-ranking task. Two models are included:
 
-        * ``bert-msmarco-nogueira19-base``: Original BERT base model fine-tuned on
-          MS-MARCO.
-        * ``bert-msmarco-nogueira19-large``: Original BERT large model fine-tuned on
-          MS-MARCO.
+        * ``bert-msmarco-nogueira19-base``: Original BERT base model fine-tuned
+          on MS-MARCO.
+        * ``bert-msmarco-nogueira19-large``: Original BERT large model
+          fine-tuned on MS-MARCO.
 
     We provide the following BERT classes:
 
@@ -167,7 +167,7 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
 
         # BERT for MS-MARCO
         'bert-msmarco-nogueira19-base':
-            _BERT_MSMARCO_NOGUEIRA19_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/',
+            _BERT_MSMARCO_NOGUEIRA19_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX',
         'bert-msmarco-nogueira19-large':
             _BERT_MSMARCO_NOGUEIRA19_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/'
     }