From 0e458e9e820835884ade573a3cd4c69982b25a2a Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Sun, 26 Jul 2015 12:39:38 +0300
Subject: [PATCH 1/8] Add missing space in message

---
 parser/lstm-parse.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index 44b95a8..1eb0548 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -531,7 +531,7 @@ int main(int argc, char** argv) {
 
   if (conf.count("words")) {
     pretrained[kUNK] = vector<float>(PRETRAINED_DIM, 0);
-    cerr << "Loading from " << conf["words"].as<string>() << " with" << PRETRAINED_DIM << " dimensions\n";
+    cerr << "Loading from " << conf["words"].as<string>() << " with " << PRETRAINED_DIM << " dimensions\n";
     ifstream in(conf["words"].as<string>().c_str());
     string line;
     getline(in, line);

From fef8696c75db00bb09d6fbaa53b54cda7c360522 Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Tue, 28 Jul 2015 14:02:32 +0300
Subject: [PATCH 2/8] Fix clab/lstm-parser#4: support reading zipped word
 vectors

---
 CMakeLists.txt       |  2 +-
 parser/lstm-parse.cc | 48 ++++++++++++++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1687a20..12ac42e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ if(DEFINED ENV{BOOST_ROOT})
   set(Boost_NO_SYSTEM_PATHS ON)
 endif()
 set(Boost_REALPATH ON)
-find_package(Boost COMPONENTS program_options serialization REQUIRED)
+find_package(Boost COMPONENTS program_options serialization iostreams REQUIRED)
 include_directories(${Boost_INCLUDE_DIR})
 set(LIBS ${LIBS} ${Boost_LIBRARIES})
 
diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index 1eb0548..53a77d9 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -15,8 +15,11 @@
 #include <unistd.h>
 #include <signal.h>
 
+#include <boost/algorithm/string/predicate.hpp>
 #include <boost/archive/text_oarchive.hpp>
 #include <boost/archive/text_iarchive.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+#include <boost/iostreams/filtering_streambuf.hpp>
 #include <boost/program_options.hpp>
 
 #include "cnn/training.h"
@@ -481,6 +484,21 @@ void output_conll(const vector<unsigned>& sentence, const vector<unsigned>& pos,
   cout << endl;
 }
 
+void init_pretrained(istream &in) {
+  string line;
+  vector<float> v(PRETRAINED_DIM, 0);
+  string word;
+  while (getline(in, line)) {
+    if (word.empty() && line.find('.') == std::string::npos)
+      continue; // first line contains vocabulary size and dimensions
+    istringstream lin(line);
+    lin >> word;
+    for (unsigned i = 0; i < PRETRAINED_DIM; ++i) lin >> v[i];
+    unsigned id = corpus.get_or_add_word(word);
+    pretrained[id] = v;
+  }
+}
+
 
 int main(int argc, char** argv) {
   cnn::Initialize(argc, argv);
@@ -525,24 +543,24 @@ int main(int argc, char** argv) {
   const string fname = os.str();
   cerr << "Writing parameters to file: " << fname << endl;
   bool softlinkCreated = false;
-  corpus.load_correct_actions(conf["training_data"].as<string>());	
+  corpus.load_correct_actions(conf["training_data"].as<string>());
   const unsigned kUNK = corpus.get_or_add_word(cpyp::Corpus::UNK);
   kROOT_SYMBOL = corpus.get_or_add_word(ROOT_SYMBOL);
 
   if (conf.count("words")) {
     pretrained[kUNK] = vector<float>(PRETRAINED_DIM, 0);
-    cerr << "Loading from " << conf["words"].as<string>() << " with " << PRETRAINED_DIM << " dimensions\n";
-    ifstream in(conf["words"].as<string>().c_str());
-    string line;
-    getline(in, line);
-    vector<float> v(PRETRAINED_DIM, 0);
-    string word;
-    while (getline(in, line)) {
-      istringstream lin(line);
-      lin >> word;
-      for (unsigned i = 0; i < PRETRAINED_DIM; ++i) lin >> v[i];
-      unsigned id = corpus.get_or_add_word(word);
-      pretrained[id] = v;
+    const string& words_fname = conf["words"].as<string>();
+    cerr << "Loading from " << words_fname << " with " << PRETRAINED_DIM << " dimensions\n";
+    if (boost::algorithm::ends_with(words_fname, ".gz")) {
+      ifstream file(words_fname.c_str(), ios_base::in | ios_base::binary);
+      boost::iostreams::filtering_streambuf<boost::iostreams::input> zip;
+      zip.push(boost::iostreams::zlib_decompressor());
+      zip.push(file);
+      istream in(&zip);
+      init_pretrained(in);
+    } else {
+      ifstream in(words_fname.c_str());
+      init_pretrained(in); // read as normal text
     }
   }
 
@@ -611,7 +629,7 @@ int main(int argc, char** argv) {
              for (auto& w : tsentence)
                if (singletons.count(w) && cnn::rand01() < unk_prob) w = kUNK;
            }
-	   const vector<unsigned>& sentencePos=corpus.sentencesPos[order[si]]; 
+	   const vector<unsigned>& sentencePos=corpus.sentencesPos[order[si]];
 	   const vector<unsigned>& actions=corpus.correct_act_sent[order[si]];
            ComputationGraph hg;
            parser.log_prob_parser(&hg,sentence,tsentence,sentencePos,actions,corpus.actions,corpus.intToWords,&right);
@@ -644,7 +662,7 @@ int main(int argc, char** argv) {
         auto t_start = std::chrono::high_resolution_clock::now();
         for (unsigned sii = 0; sii < dev_size; ++sii) {
            const vector<unsigned>& sentence=corpus.sentencesDev[sii];
-	   const vector<unsigned>& sentencePos=corpus.sentencesPosDev[sii]; 
+	   const vector<unsigned>& sentencePos=corpus.sentencesPosDev[sii];
 	   const vector<unsigned>& actions=corpus.correct_act_sentDev[sii];
            vector<unsigned> tsentence=sentence;
            for (auto& w : tsentence)

From db29f8557353428f79a84304384d0ebddc89bedc Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Tue, 28 Jul 2015 14:07:03 +0300
Subject: [PATCH 3/8] Remove unused imports

---
 parser/lstm-parse.cc | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index 53a77d9..5b4f695 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -2,16 +2,11 @@
 #include <algorithm>
 #include <sstream>
 #include <iostream>
-#include <vector>
-#include <limits>
-#include <cmath>
 #include <chrono>
-#include <ctime>
 
 #include <unordered_map>
 #include <unordered_set>
 
-#include <execinfo.h>
 #include <unistd.h>
 #include <signal.h>
 
@@ -25,9 +20,7 @@
 #include "cnn/training.h"
 #include "cnn/cnn.h"
 #include "cnn/expr.h"
-#include "cnn/nodes.h"
 #include "cnn/lstm.h"
-#include "cnn/rnn.h"
 #include "c2.h"
 
 cpyp::Corpus corpus;

From f3b4cca6e7a302c308a10cdd089f68da067a543a Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Tue, 28 Jul 2015 14:08:28 +0300
Subject: [PATCH 4/8] Fix indentation and trailing whitespace

---
 parser/lstm-parse.cc | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index 5b4f695..f30651b 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -439,19 +439,19 @@ unsigned compute_correct(const map<int,int>& ref, const map<int,int>& hyp, unsig
 }
 
 void output_conll(const vector<unsigned>& sentence, const vector<unsigned>& pos,
-                  const vector<string>& sentenceUnkStrings, 
-                  const map<unsigned, string>& intToWords, 
-                  const map<unsigned, string>& intToPos, 
+                  const vector<string>& sentenceUnkStrings,
+                  const map<unsigned, string>& intToWords,
+                  const map<unsigned, string>& intToPos,
                   const map<int,int>& hyp, const map<int,string>& rel_hyp) {
   for (unsigned i = 0; i < (sentence.size()-1); ++i) {
     auto index = i + 1;
-    assert(i < sentenceUnkStrings.size() && 
+    assert(i < sentenceUnkStrings.size() &&
            ((sentence[i] == corpus.get_or_add_word(cpyp::Corpus::UNK) &&
              sentenceUnkStrings[i].size() > 0) ||
             (sentence[i] != corpus.get_or_add_word(cpyp::Corpus::UNK) &&
              sentenceUnkStrings[i].size() == 0 &&
              intToWords.find(sentence[i]) != intToWords.end())));
-    string wit = (sentenceUnkStrings[i].size() > 0)? 
+    string wit = (sentenceUnkStrings[i].size() > 0)?
       sentenceUnkStrings[i] : intToWords.find(sentence[i])->second;
     auto pit = intToPos.find(pos[i]);
     assert(hyp.find(i) != hyp.end());
@@ -463,10 +463,10 @@ void output_conll(const vector<unsigned>& sentence, const vector<unsigned>& pos,
     size_t first_char_in_rel = hyp_rel.find('(') + 1;
     size_t last_char_in_rel = hyp_rel.rfind(')') - 1;
     hyp_rel = hyp_rel.substr(first_char_in_rel, last_char_in_rel - first_char_in_rel + 1);
-    cout << index << '\t'       // 1. ID 
+    cout << index << '\t'       // 1. ID
          << wit << '\t'         // 2. FORM
-         << "_" << '\t'         // 3. LEMMA 
-         << "_" << '\t'         // 4. CPOSTAG 
+         << "_" << '\t'         // 3. LEMMA
+         << "_" << '\t'         // 4. CPOSTAG
          << pit->second << '\t' // 5. POSTAG
          << "_" << '\t'         // 6. FEATS
          << hyp_head << '\t'    // 7. HEAD
@@ -496,7 +496,7 @@ void init_pretrained(istream &in) {
 int main(int argc, char** argv) {
   cnn::Initialize(argc, argv);
 
-  cerr << "COMMAND:"; 
+  cerr << "COMMAND:";
   for (unsigned i = 0; i < static_cast<unsigned>(argc); ++i) cerr << ' ' << argv[i];
   cerr << endl;
   unsigned status_every_i_iterations = 100;
@@ -622,8 +622,8 @@ int main(int argc, char** argv) {
              for (auto& w : tsentence)
                if (singletons.count(w) && cnn::rand01() < unk_prob) w = kUNK;
            }
-	   const vector<unsigned>& sentencePos=corpus.sentencesPos[order[si]];
-	   const vector<unsigned>& actions=corpus.correct_act_sent[order[si]];
+           const vector<unsigned>& sentencePos=corpus.sentencesPos[order[si]];
+           const vector<unsigned>& actions=corpus.correct_act_sent[order[si]];
            ComputationGraph hg;
            parser.log_prob_parser(&hg,sentence,tsentence,sentencePos,actions,corpus.actions,corpus.intToWords,&right);
            double lp = as_scalar(hg.incremental_forward());
@@ -655,15 +655,15 @@ int main(int argc, char** argv) {
         auto t_start = std::chrono::high_resolution_clock::now();
         for (unsigned sii = 0; sii < dev_size; ++sii) {
            const vector<unsigned>& sentence=corpus.sentencesDev[sii];
-	   const vector<unsigned>& sentencePos=corpus.sentencesPosDev[sii];
-	   const vector<unsigned>& actions=corpus.correct_act_sentDev[sii];
+           const vector<unsigned>& sentencePos=corpus.sentencesPosDev[sii];
+           const vector<unsigned>& actions=corpus.correct_act_sentDev[sii];
            vector<unsigned> tsentence=sentence;
            for (auto& w : tsentence)
              if (training_vocab.count(w) == 0) w = kUNK;
 
            ComputationGraph hg;
-	   vector<unsigned> pred = parser.log_prob_parser(&hg,sentence,tsentence,sentencePos,vector<unsigned>(),corpus.actions,corpus.intToWords,&right);
-	   double lp = 0;
+           vector<unsigned> pred = parser.log_prob_parser(&hg,sentence,tsentence,sentencePos,vector<unsigned>(),corpus.actions,corpus.intToWords,&right);
+           double lp = 0;
            llh -= lp;
            trs += actions.size();
            map<int,int> ref = parser.compute_heads(sentence.size(), actions, corpus.actions);
@@ -683,9 +683,9 @@ int main(int argc, char** argv) {
           // easier to refer to it in a shell script.
           if (!softlinkCreated) {
             string softlink = " latest_model";
-            if (system((string("rm -f ") + softlink).c_str()) == 0 && 
+            if (system((string("rm -f ") + softlink).c_str()) == 0 &&
                 system((string("ln -s ") + fname + softlink).c_str()) == 0) {
-              cerr << "Created " << softlink << " as a soft link to " << fname 
+              cerr << "Created " << softlink << " as a soft link to " << fname
                    << " for convenience." << endl;
             }
             softlinkCreated = true;
@@ -704,8 +704,8 @@ int main(int argc, char** argv) {
     unsigned corpus_size = corpus.nsentencesDev;
     for (unsigned sii = 0; sii < corpus_size; ++sii) {
       const vector<unsigned>& sentence=corpus.sentencesDev[sii];
-      const vector<unsigned>& sentencePos=corpus.sentencesPosDev[sii]; 
-      const vector<string>& sentenceUnkStr=corpus.sentencesStrDev[sii]; 
+      const vector<unsigned>& sentencePos=corpus.sentencesPosDev[sii];
+      const vector<string>& sentenceUnkStr=corpus.sentencesStrDev[sii];
       const vector<unsigned>& actions=corpus.correct_act_sentDev[sii];
       vector<unsigned> tsentence=sentence;
       for (auto& w : tsentence)

From 5f93b46db95f0e7fbb399a89483bb57050baad4b Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Tue, 28 Jul 2015 14:46:59 +0300
Subject: [PATCH 5/8] clab/lstm-parser#3: allow limiting number of iterations

---
 parser/lstm-parse.cc | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index f30651b..adde5c9 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -70,6 +70,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
         ("rel_dim", po::value<unsigned>()->default_value(10), "relation dimension")
         ("lstm_input_dim", po::value<unsigned>()->default_value(60), "LSTM input dimension")
         ("train,t", "Should training be run?")
+        ("maxit,M", po::value<unsigned>()->default_value(8000), "Maximum number of training iterations")
         ("words,w", po::value<string>(), "Pretrained word embeddings")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -522,6 +523,8 @@ int main(int argc, char** argv) {
   }
   const double unk_prob = conf["unk_prob"].as<double>();
   assert(unk_prob >= 0.); assert(unk_prob <= 1.);
+  const unsigned maxit = conf["maxit"].as<unsigned>();
+  cerr << "Maximum number of iterations: " << maxit << "\n";
   ostringstream os;
   os << "parser_" << (USE_POS ? "pos" : "nopos")
      << '_' << LAYERS
@@ -603,11 +606,10 @@ int main(int argc, char** argv) {
     double right = 0;
     double llh = 0;
     bool first = true;
-    int iter = -1;
+    unsigned iter = 0;
     time_t time_start = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
     cerr << "TRAINING STARTED AT: " << put_time(localtime(&time_start), "%c %Z") << endl;
-    while(!requested_stop) {
-      ++iter;
+    while(!requested_stop && iter < maxit) {
       for (unsigned sii = 0; sii < status_every_i_iterations; ++sii) {
            if (si == corpus.nsentences) {
              si = 0;
@@ -692,6 +694,10 @@ int main(int argc, char** argv) {
           }
         }
       }
+      ++iter;
+    }
+    if (iter >= maxit) {
+      cerr << "\nMaximum number of iterations reached (" << iter << "), terminating optimization...\n";
     }
   } // should do training?
   if (true) { // do test evaluation

From e8432811caf8472ee0934c36ff9a36fe96dd30af Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Tue, 28 Jul 2015 15:04:49 +0300
Subject: [PATCH 6/8] Fix clab/lstm-parser#3: allow limiting optimization by
 dev uas tolerance

---
 parser/lstm-parse.cc | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index adde5c9..e6d2f69 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -71,6 +71,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
         ("lstm_input_dim", po::value<unsigned>()->default_value(60), "LSTM input dimension")
         ("train,t", "Should training be run?")
         ("maxit,M", po::value<unsigned>()->default_value(8000), "Maximum number of training iterations")
+        ("tolerance", po::value<double>()->default_value(0.0), "Tolerance on dev uas for stopping training")
         ("words,w", po::value<string>(), "Pretrained word embeddings")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -525,6 +526,8 @@ int main(int argc, char** argv) {
   assert(unk_prob >= 0.); assert(unk_prob <= 1.);
   const unsigned maxit = conf["maxit"].as<unsigned>();
   cerr << "Maximum number of iterations: " << maxit << "\n";
+  const double tolerance = conf["tolerance"].as<double>();
+  cerr << "Optimization tolerance: " << tolerance << "\n";
   ostringstream os;
   os << "parser_" << (USE_POS ? "pos" : "nopos")
      << '_' << LAYERS
@@ -607,9 +610,12 @@ int main(int argc, char** argv) {
     double llh = 0;
     bool first = true;
     unsigned iter = 0;
+    double uas = -1;
+    double prev_uas = -1;
     time_t time_start = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
     cerr << "TRAINING STARTED AT: " << put_time(localtime(&time_start), "%c %Z") << endl;
-    while(!requested_stop && iter < maxit) {
+    while(!requested_stop && iter < maxit &&
+        (uas < 0 || prev_uas < 0 || abs(prev_uas - uas) > tolerance)) {
       for (unsigned sii = 0; sii < status_every_i_iterations; ++sii) {
            if (si == corpus.nsentences) {
              si = 0;
@@ -675,7 +681,9 @@ int main(int argc, char** argv) {
            total_heads += sentence.size() - 1;
         }
         auto t_end = std::chrono::high_resolution_clock::now();
-        cerr << "  **dev (iter=" << iter << " epoch=" << (tot_seen / corpus.nsentences) << ")\tllh=" << llh << " ppl: " << exp(llh / trs) << " err: " << (trs - right) / trs << " uas: " << (correct_heads / total_heads) << "\t[" << dev_size << " sents in " << std::chrono::duration<double, std::milli>(t_end-t_start).count() << " ms]" << endl;
+        prev_uas = uas;
+        uas = correct_heads / total_heads;
+        cerr << "  **dev (iter=" << iter << " epoch=" << (tot_seen / corpus.nsentences) << ")\tllh=" << llh << " ppl: " << exp(llh / trs) << " err: " << (trs - right) / trs << " uas: " << uas << "\t[" << dev_size << " sents in " << std::chrono::duration<double, std::milli>(t_end-t_start).count() << " ms]" << endl;
         if (correct_heads > best_correct_heads) {
           best_correct_heads = correct_heads;
           ofstream out(fname);
@@ -698,6 +706,8 @@ int main(int argc, char** argv) {
     }
     if (iter >= maxit) {
       cerr << "\nMaximum number of iterations reached (" << iter << "), terminating optimization...\n";
+    } else if (!requested_stop) {
+      cerr << "\nScore tolerance reached (" << tolerance << "), terminating optimization...\n";
     }
   } // should do training?
   if (true) { // do test evaluation

From 0b39d5702927029977f69ae17d91ef19c371acfd Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Tue, 28 Jul 2015 15:59:39 +0300
Subject: [PATCH 7/8] Fix clab/lstm-parser#2: calculate and print las on test

---
 parser/lstm-parse.cc | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index e6d2f69..b96732e 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -428,7 +428,8 @@ void signal_callback_handler(int /* signum */) {
   requested_stop = true;
 }
 
-unsigned compute_correct(const map<int,int>& ref, const map<int,int>& hyp, unsigned len) {
+template<typename T>
+unsigned compute_correct(const map<int,T>& ref, const map<int,T>& hyp, unsigned len) {
   unsigned res = 0;
   for (unsigned i = 0; i < len; ++i) {
     auto ri = ref.find(i);
@@ -440,6 +441,24 @@ unsigned compute_correct(const map<int,int>& ref, const map<int,int>& hyp, unsig
   return res;
 }
 
+template<typename T1, typename T2>
+unsigned compute_correct(const map<int,T1>& ref1, const map<int,T1>& hyp1,
+                         const map<int,T2>& ref2, const map<int,T2>& hyp2, unsigned len) {
+  unsigned res = 0;
+  for (unsigned i = 0; i < len; ++i) {
+    auto r1 = ref1.find(i);
+    auto h1 = hyp1.find(i);
+    auto r2 = ref2.find(i);
+    auto h2 = hyp2.find(i);
+    assert(r1 != ref1.end());
+    assert(h1 != hyp1.end());
+    assert(r2 != ref2.end());
+    assert(h2 != hyp2.end());
+    if (r1->second == h1->second && r2->second == h2->second) ++res;
+  }
+  return res;
+}
+
 void output_conll(const vector<unsigned>& sentence, const vector<unsigned>& pos,
                   const vector<string>& sentenceUnkStrings,
                   const map<unsigned, string>& intToWords,
@@ -714,7 +733,8 @@ int main(int argc, char** argv) {
     double llh = 0;
     double trs = 0;
     double right = 0;
-    double correct_heads = 0;
+    double correct_heads_unlabeled = 0;
+    double correct_heads_labeled = 0;
     double total_heads = 0;
     auto t_start = std::chrono::high_resolution_clock::now();
     unsigned corpus_size = corpus.nsentencesDev;
@@ -736,11 +756,12 @@ int main(int argc, char** argv) {
       map<int,int> ref = parser.compute_heads(sentence.size(), actions, corpus.actions, &rel_ref);
       map<int,int> hyp = parser.compute_heads(sentence.size(), pred, corpus.actions, &rel_hyp);
       output_conll(sentence, sentencePos, sentenceUnkStr, corpus.intToWords, corpus.intToPos, hyp, rel_hyp);
-      correct_heads += compute_correct(ref, hyp, sentence.size() - 1);
+      correct_heads_unlabeled += compute_correct(ref, hyp, sentence.size() - 1);
+      correct_heads_labeled += compute_correct(ref, hyp, rel_ref, rel_hyp, sentence.size() - 1);
       total_heads += sentence.size() - 1;
     }
     auto t_end = std::chrono::high_resolution_clock::now();
-    cerr << "TEST llh=" << llh << " ppl: " << exp(llh / trs) << " err: " << (trs - right) / trs << " uas: " << (correct_heads / total_heads) << "\t[" << corpus_size << " sents in " << std::chrono::duration<double, std::milli>(t_end-t_start).count() << " ms]" << endl;
+    cerr << "TEST llh=" << llh << " ppl: " << exp(llh / trs) << " err: " << (trs - right) / trs << " uas: " << (correct_heads_unlabeled / total_heads) << " las: " << (correct_heads_labeled / total_heads) << "\t[" << corpus_size << " sents in " << std::chrono::duration<double, std::milli>(t_end-t_start).count() << " ms]" << endl;
   }
   for (unsigned i = 0; i < corpus.actions.size(); ++i) {
     //cerr << corpus.actions[i] << '\t' << parser.p_r->values[i].transpose() << endl;

From 090e81127d9b48eb7ba46c16c1c1ef39199149dd Mon Sep 17 00:00:00 2001
From: Daniel Hershcovich <danielh@cs.huji.ac.il>
Date: Sun, 9 Aug 2015 11:29:29 +0300
Subject: [PATCH 8/8] Change default tolerance to -1, meaning no
 tolerance-based stopping

---
 parser/lstm-parse.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/parser/lstm-parse.cc b/parser/lstm-parse.cc
index b96732e..a3816ce 100644
--- a/parser/lstm-parse.cc
+++ b/parser/lstm-parse.cc
@@ -71,7 +71,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
         ("lstm_input_dim", po::value<unsigned>()->default_value(60), "LSTM input dimension")
         ("train,t", "Should training be run?")
         ("maxit,M", po::value<unsigned>()->default_value(8000), "Maximum number of training iterations")
-        ("tolerance", po::value<double>()->default_value(0.0), "Tolerance on dev uas for stopping training")
+        ("tolerance", po::value<double>()->default_value(-1.0), "Tolerance on dev uas for stopping training")
         ("words,w", po::value<string>(), "Pretrained word embeddings")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -546,7 +546,9 @@ int main(int argc, char** argv) {
   const unsigned maxit = conf["maxit"].as<unsigned>();
   cerr << "Maximum number of iterations: " << maxit << "\n";
   const double tolerance = conf["tolerance"].as<double>();
-  cerr << "Optimization tolerance: " << tolerance << "\n";
+  if (tolerance > 0.0) {
+    cerr << "Optimization tolerance: " << tolerance << "\n";
+  }
   ostringstream os;
   os << "parser_" << (USE_POS ? "pos" : "nopos")
      << '_' << LAYERS
@@ -634,7 +636,7 @@ int main(int argc, char** argv) {
     time_t time_start = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
     cerr << "TRAINING STARTED AT: " << put_time(localtime(&time_start), "%c %Z") << endl;
     while(!requested_stop && iter < maxit &&
-        (uas < 0 || prev_uas < 0 || abs(prev_uas - uas) > tolerance)) {
+        (tolerance < 0 || uas < 0 || prev_uas < 0 || abs(prev_uas - uas) > tolerance)) {
       for (unsigned sii = 0; sii < status_every_i_iterations; ++sii) {
            if (si == corpus.nsentences) {
              si = 0;