diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am
index 78bb527b..4f031cd0 100644
--- a/lttoolbox/Makefile.am
+++ b/lttoolbox/Makefile.am
@@ -4,7 +4,7 @@ h_sources = alphabet.h att_compiler.h buffer.h compiler.h compression.h \
match_exe.h match_node.h match_state.h my_stdio.h node.h \
pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h \
transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h exception.h tmx_compiler.h \
- ustring.h
+ ustring.h sorted_vector.hpp
cc_sources = alphabet.cc att_compiler.cc compiler.cc compression.cc entry_token.cc \
expander.cc file_utils.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \
match_node.cc match_state.cc node.cc pattern_list.cc \
diff --git a/lttoolbox/sorted_vector.hpp b/lttoolbox/sorted_vector.hpp
new file mode 100644
index 00000000..e914ab74
--- /dev/null
+++ b/lttoolbox/sorted_vector.hpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2022 Apertium
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#pragma once
+#ifndef c6d28b7452ec699b_SORTED_VECTOR_HPP
+#define c6d28b7452ec699b_SORTED_VECTOR_HPP
+#include
+#include
+#include
+#include
+
+namespace detail {
+ template
+ bool is_sorted(ForwardIt first, ForwardIt last, Comp comp) {
+ if (first != last) {
+ ForwardIt next = first;
+ while (++next != last) {
+ if (comp(*next, *first)) {
+ return false;
+ }
+ first = next;
+ }
+ }
+ return true;
+ }
+}
+
+template>
+class sorted_vector {
+public:
+ typedef typename std::vector container;
+ typedef typename container::iterator iterator;
+ typedef typename container::const_iterator const_iterator;
+ typedef typename container::const_reverse_iterator const_reverse_iterator;
+ typedef typename container::size_type size_type;
+ typedef T value_type;
+ typedef T key_type;
+
+ sorted_vector() {}
+
+ sorted_vector(const std::set& o) {
+ insert(o.begin(), o.end());
+ }
+
+ std::pair insert(T t) {
+ if (elements.empty()) {
+ elements.push_back(t);
+ return std::make_pair(elements.begin(), true);
+ }
+ iterator it = std::lower_bound(elements.begin(), elements.end(), t, comp);
+ size_t at = std::distance(elements.begin(), it);
+ if (it == elements.end() || comp(*it, t) || comp(t, *it)) {
+ elements.insert(it, t);
+ return std::make_pair(elements.begin() + at, true);
+ }
+ return std::make_pair(elements.begin() + at, false);
+ }
+
+ template
+ void insert(It b, It e) {
+ size_t d = std::distance(b, e);
+ if (d == 1) {
+ insert(*b);
+ return;
+ }
+
+ static thread_local container merged;
+ merged.resize(0);
+ merged.reserve(elements.size() + d);
+
+ if (detail::is_sorted(b, e, comp)) {
+ std::merge(elements.begin(), elements.end(), b, e, std::back_inserter(merged), comp);
+ }
+ else {
+ static thread_local container sorted;
+ sorted.assign(b, e);
+ std::sort(sorted.begin(), sorted.end(), comp);
+ std::merge(elements.begin(), elements.end(), sorted.begin(), sorted.end(), std::back_inserter(merged), comp);
+ }
+
+ merged.swap(elements);
+ auto it = std::unique(elements.begin(), elements.end());
+ elements.erase(it, elements.end());
+ }
+
+ void push_back(T t) {
+ insert(t);
+ }
+
+ bool erase(T t) {
+ if (elements.empty()) {
+ return false;
+ }
+ if (comp(elements.back(), t)) {
+ return false;
+ }
+ if (comp(t, elements.front())) {
+ return false;
+ }
+ auto it = lower_bound(t);
+ if (it != elements.end() && !comp(*it, t) && !comp(t, *it)) {
+ elements.erase(it);
+ return true;
+ }
+ return false;
+ }
+
+ const_iterator erase(const_iterator it) {
+ size_type o = std::distance(elements.begin(), it);
+ return elements.erase(elements.begin() + o);
+ }
+
+ template
+ void erase(It b, It e) {
+ for (; b != e; ++b) {
+ erase(*b);
+ }
+ }
+
+ const_iterator find(T t) const {
+ if (elements.empty()) {
+ return elements.end();
+ }
+ if (comp(elements.back(), t)) {
+ return elements.end();
+ }
+ if (comp(t, elements.front())) {
+ return elements.end();
+ }
+ auto it = lower_bound(t);
+ if (it != elements.end() && (comp(*it, t) || comp(t, *it))) {
+ return elements.end();
+ }
+ return it;
+ }
+
+ size_t count(T t) const {
+ return (find(t) != end());
+ }
+
+ iterator begin() {
+ return elements.begin();
+ }
+
+ iterator end() {
+ return elements.end();
+ }
+
+ const_iterator begin() const {
+ return elements.begin();
+ }
+
+ const_iterator end() const {
+ return elements.end();
+ }
+
+ const_iterator cbegin() const {
+ return elements.cbegin();
+ }
+
+ const_iterator cend() const {
+ return elements.cend();
+ }
+
+ const_reverse_iterator rbegin() const {
+ return elements.rbegin();
+ }
+
+ const_reverse_iterator rend() const {
+ return elements.rend();
+ }
+
+ T front() const {
+ return elements.front();
+ }
+
+ T back() const {
+ return elements.back();
+ }
+
+ iterator lower_bound(T t) {
+ return std::lower_bound(elements.begin(), elements.end(), t, comp);
+ }
+
+ const_iterator lower_bound(T t) const {
+ return std::lower_bound(elements.begin(), elements.end(), t, comp);
+ }
+
+ const_iterator upper_bound(T t) const {
+ return std::upper_bound(elements.begin(), elements.end(), t, comp);
+ }
+
+ bool intersects(const sorted_vector& other) const {
+ auto ti = begin();
+ auto oi = other.begin();
+ auto te = end();
+ auto oe = other.end();
+ while (ti != te && oi != oe) {
+ if (*ti == *oi) {
+ return true;
+ }
+ else if (comp(*ti, *oi)) {
+ ++ti;
+ }
+ else {
+ ++oi;
+ }
+ }
+ return false;
+ }
+
+ size_type size() const {
+ return elements.size();
+ }
+
+ size_type capacity() const {
+ return elements.capacity();
+ }
+
+ bool empty() const {
+ return elements.empty();
+ }
+
+ template
+ void assign(It b, It e) {
+ clear();
+ insert(b, e);
+ }
+
+ void assign(const_iterator b, const_iterator e) {
+ elements.assign(b, e);
+ }
+
+ void swap(sorted_vector& other) {
+ elements.swap(other.elements);
+ }
+
+ void clear() {
+ elements.clear();
+ }
+
+ void sort() {
+ std::sort(elements.begin(), elements.end(), Comp());
+ }
+
+ void pop_back() {
+ elements.pop_back();
+ }
+
+ container& get() {
+ return elements;
+ }
+
+ bool operator<(const sorted_vector& o) const {
+ return elements < o.elements;
+ }
+
+private:
+ container elements;
+ Comp comp;
+};
+
+#endif
diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc
index 7f80fdad..0ea3e602 100644
--- a/lttoolbox/transducer.cc
+++ b/lttoolbox/transducer.cc
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
@@ -314,16 +315,16 @@ Transducer::isEmptyIntersection(std::set const &s1, std::set const &s2
void
Transducer::determinize(int const epsilon_tag)
{
- std::vector > R(2);
- std::vector> Q_prime;
- std::map, int> Q_prime_inv;
+ std::vector> R(2);
+ std::vector> Q_prime;
+ std::map, int> Q_prime_inv;
std::map > > transitions_prime;
// We're almost certainly going to need the closure of (nearly) every
// state, and we're often going to need the closure several times,
// so it's faster to precompute (though it does slow things down a bit).
- std::vector> all_closures;
+ std::vector> all_closures;
all_closures.reserve(transitions.size());
for (size_t i = 0; i < transitions.size(); i++) {
all_closures.push_back(closure(i, epsilon_tag));
@@ -345,7 +346,7 @@ Transducer::determinize(int const epsilon_tag)
int t = 0;
- std::set finals_state;
+ sorted_vector finals_state;
for(auto& it : finals) {
finals_state.insert(it.first);
}
@@ -357,8 +358,7 @@ Transducer::determinize(int const epsilon_tag)
for(auto& it : R[t])
{
- if(!isEmptyIntersection(Q_prime[it], finals_state))
- {
+ if (Q_prime[it].intersects(finals_state)) {
double w = default_weight;
auto it3 = finals.find(it);
if (it3 != finals.end()) {
@@ -367,7 +367,7 @@ Transducer::determinize(int const epsilon_tag)
finals_prime.insert(std::make_pair(it, w));
}
- std::map, std::set > mymap;
+ std::map, sorted_vector > mymap;
for(auto& it2 : Q_prime[it])
{