dmlc · leezu · Jan 14, 2020 · Oct 25, 2019 · Oct 25, 2019 · Oct 25, 2019
@@ -236,13 +236,33 @@ Results using `xlnet_12_768_12`:
 +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
 | QQP             |Accuracy             |90                     |`log <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_QQP.log>`__                    |`command <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_QQP.sh>`__                                      |
 +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| MNLI            |Accuracy             |87/86                  |`log <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_MNLI.log>`__                   |`command <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_MNLI.sh>`__                                     |
+| MNLI            |Accuracy(m/mm)       |87/86                  |`log <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_MNLI.log>`__                   |`command <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_MNLI.sh>`__                                     |
 +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
 | QNLI            |Accuracy             |88                     |`log <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_QNLI.log>`__                   |`command <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_QNLI.sh>`__                                     |
 +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
 | RTE             |Accuracy             |74                     |`log <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_RTE.log>`__                    |`command <https://github.com/dmlc/web-data/tree/master/gluonnlp/logs/language_model/xlnet_l12_h768_a12_finetuned_RTE.sh>`__                                      |
 +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
+Results using `xlnet_24_1024_16`:
+We followed the hyperparameters reported by the paper authors.
+
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|Task Name        |Metrics              |Results on Dev Set     |log                                                                                                                                         |command                                                                                                                                                          |
++=================+=====================+=======================+============================================================================================================================================+=================================================================================================================================================================+
+| CoLA            |Matthew Corr.        |67                     |`log <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_CoLA.log>`__                  |`command <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_CoLA.sh>`__                                    |
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| SST-2           |Accuracy             |94                     |`log <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_SST.log>`__                   |`command <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_SST.sh>`__                                     |
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| MRPC            |Accuracy/F1          |90.2/93                |`log <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_MRPC.log>`__                  |`command <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_MRPC.sh>`__                                    |
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| STS-B           |Pearson Corr.        |91.37                  |`log <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_STS-B.log>`__                 |`command <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_STS-B.sh>`__                                   |
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| QQP             |Accuracy             |91.94                  |`log <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_QQP.log>`__                   |`command <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_QQP.sh>`__                                     |
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| MNLI            |Accuracy(m/mm)       |89.93/89.91            |`log <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_MNLI.log>`__                  |`command <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_MNLI.sh>`__                                    |
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| RTE             |Accuracy             |84.12                  |`log <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_RTE.log>`__                   |`command <https://github.com/dmlc/web-data/blob/master/gluonnlp/logs/language_model/xlnet_l24_h1024_a16_finetuned_RTE.sh>`__                                     |
++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 
 
@@ -17,7 +17,7 @@ def __init__(self, xl, units=768, num_classes=2, dropout=0.0,
             self.classifier = nn.HybridSequential(prefix=prefix)
             if dropout:
                 self.classifier.add(nn.Dropout(rate=dropout))
-            self.classifier.add(nn.Dense(units=num_classes))
+            self.classifier.add(nn.Dense(units=num_classes, flatten=False))
             self.pooler = nn.Dense(units=units, flatten=False, activation='tanh', prefix=prefix)
 
     def __call__(self, inputs, token_types, valid_length=None, mems=None):