Remove tensor2tensor dependency and update requirements, SRL evaluati…

…on bug fix
jgung · Dec 27, 2020 · 9b7a56d · 9b7a56d
1 parent 7a51c4e
commit 9b7a56d
Show file tree

Hide file tree

Showing 8 changed files with 395 additions and 35 deletions.
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ and test data. Note that the official CoNLL 2003 evaluation script requires Perl
 
 You can then begin training with the following command:
 ```bash
-python tfnlp.trainer.py --job-dir data/experiments/conll-03 \
+python tfnlp/trainer.py --job-dir data/experiments/conll-03 \
 --train path/to/conll03/eng.train \
 --valid path/to/conll03/eng.testa \
 --test path/to/conll03/eng.testb \
@@ -102,7 +102,7 @@ and optional test files `test-wsj.conll` and `test-brown.conll`.
 
 You can then begin training using the following command:
 ```bash
-python tfnlp.trainer.py --job-dir data/experiments/conll-05 \
+python tfnlp/trainer.py --job-dir data/experiments/conll-05 \
 --train path/to/conll05/train-set.conll \
 --valid path/to/conll05/dev-set.conll \
 --test path/to/conll05/test-wsj.conll \
@@ -130,7 +130,7 @@ into the Stanford Dependency format.
 To train on the English CoNLL-2009 dependency data (using provided predicted POS tags), 
 you can use the following command:
 ```bash
-python tfnlp.trainer.py --job-dir data/experiments/conll-09-en \
+python tfnlp/trainer.py --job-dir data/experiments/conll-09-en \
 --train path/to/CoNLL2009-ST-English/CoNLL2009-ST-English-train.txt \
 --valid path/to/CoNLL2009-ST-English-development.txt \
 --config data/config/parsing/parser-config.json \

diff --git a/requirements-cpu.txt b/requirements-cpu.txt
@@ -1,11 +1,8 @@
-tensorflow==1.15.2
-tensorflow_hub==0.7.0
-tensor2tensor==1.15.5
-tensorflow-probability==0.8.0
-albert-tensorflow
-bert-tensorflow
+tensorflow==1.15.*
+tensorflow_hub==0.10.0
+tensorflow-probability==0.7.0
+albert-tensorflow==1.1
+bert-tensorflow==1.0.1
 sentencepiece
 sklearn
-numpy
-nltk
-six>=1.13.0
+nltk
diff --git a/requirements.txt b/requirements.txt
@@ -1,11 +1,8 @@
-tensorflow-gpu==1.15.2
-tensorflow_hub==0.7.0
-tensor2tensor==1.15.5
-tensorflow-probability==0.8.0
-albert-tensorflow
-bert-tensorflow
+tensorflow-gpu==1.15.*
+tensorflow_hub==0.10.0
+tensorflow-probability==0.7.0
+albert-tensorflow==1.1
+bert-tensorflow==1.0.1
 sentencepiece
 sklearn
-numpy
-nltk
-six>=1.13.0
+nltk
diff --git a/setup.py b/setup.py
@@ -1,11 +1,10 @@
 from setuptools import find_packages, setup
 
 REQUIRED_PACKAGES = [
-    "tensorflow-gpu==1.15.0",
-    "tensorflow-hub==0.7.0",
-    "tensor2tensor==1.15.5",
+    "tensorflow-gpu==1.15.*",
+    "tensorflow-hub==0.10.0",
     "bert-tensorflow==1.0.1",
-    "tensorflow-probability==0.8.0",
+    "tensorflow-probability==0.7.0",
     "numpy>=1.14.2",
     "nltk>=3.2.5",
 ]

diff --git a/tfnlp/cli/evaluators.py b/tfnlp/cli/evaluators.py
@@ -189,8 +189,10 @@ def start(self):
         self.indices = []
 
     def accumulate(self, instance, result):
-        self.labels.append([label for label in result[self.target_key] if label != BERT_SUBLABEL])
-        self.gold.append([label for label in instance[self.labels_key] if label != BERT_SUBLABEL])
+        gold = [label for label in instance[self.labels_key] if label != BERT_SUBLABEL]
+        predicted = [label for label in result[self.target_key] if label != BERT_SUBLABEL][:len(gold)]
+        self.labels.append(predicted)
+        self.gold.append(gold)
         self.indices.append(instance[constants.SENTENCE_INDEX])
 
     def evaluate(self, identifier=None):
@@ -342,7 +344,10 @@ def start(self):
 
     def accumulate(self, instance, result):
         super().accumulate(instance, result)
-        self.markers.append(instance[constants.MARKER_KEY])
+        idx = instance[constants.MARKER_KEY]
+        if isinstance(idx, list):
+            idx = idx.index('1')
+        self.markers.append(idx)
 
     def evaluate(self, identifier='.'):
         write_props_to_file(self.output_path + '.gold.txt', self.gold, self.markers, self.indices)

diff --git a/tfnlp/layers/layers.py b/tfnlp/layers/layers.py
@@ -2,7 +2,7 @@
 import tensorflow as tf
 import tensorflow_estimator as tfe
 import tensorflow_hub as hub
-from tensor2tensor.layers.common_attention import add_timing_signal_1d, attention_bias_ignore_padding, multihead_attention
+
 from tensorflow.compat.v1 import get_variable
 from tensorflow.compat.v1 import logging
 from tensorflow.compat.v1 import variable_scope
@@ -21,6 +21,7 @@
 from tensorflow.python.ops.rnn_cell_impl import DropoutWrapper, LSTMStateTuple, LayerRNNCell
 
 from tfnlp.common import constants
+from tfnlp.layers.transformers import add_timing_signal_1d, attention_bias_ignore_padding, multihead_attention
 
 ELMO_URL = "https://tfhub.dev/google/elmo/2"
 
@@ -546,14 +547,13 @@ def _residual(_x, _y):
             x = _layer_norm(inputs)
 
             # multi-head self-attention
-            y = multihead_attention(query_antecedent=x, memory_antecedent=None,
+            y = multihead_attention(query_antecedent=x,
                                     bias=attention_bias,
                                     total_key_depth=self_attention_dim,
                                     total_value_depth=self_attention_dim,
                                     output_depth=self_attention_dim,
                                     num_heads=config.num_heads,
-                                    dropout_rate=config.attention_dropout if training else 0,
-                                    attention_type="dot_product")
+                                    dropout_rate=config.attention_dropout if training else 0)
             x = _residual(x, y)
 
         with variable_scope("ffnn"):