diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am index 78bb527b..4f031cd0 100644 --- a/lttoolbox/Makefile.am +++ b/lttoolbox/Makefile.am @@ -4,7 +4,7 @@ h_sources = alphabet.h att_compiler.h buffer.h compiler.h compression.h \ match_exe.h match_node.h match_state.h my_stdio.h node.h \ pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h \ transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h exception.h tmx_compiler.h \ - ustring.h + ustring.h sorted_vector.hpp cc_sources = alphabet.cc att_compiler.cc compiler.cc compression.cc entry_token.cc \ expander.cc file_utils.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \ match_node.cc match_state.cc node.cc pattern_list.cc \ diff --git a/lttoolbox/sorted_vector.hpp b/lttoolbox/sorted_vector.hpp new file mode 100644 index 00000000..e914ab74 --- /dev/null +++ b/lttoolbox/sorted_vector.hpp @@ -0,0 +1,277 @@ +/* + * Copyright (C) 2022 Apertium + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#pragma once +#ifndef c6d28b7452ec699b_SORTED_VECTOR_HPP +#define c6d28b7452ec699b_SORTED_VECTOR_HPP +#include +#include +#include +#include + +namespace detail { + template + bool is_sorted(ForwardIt first, ForwardIt last, Comp comp) { + if (first != last) { + ForwardIt next = first; + while (++next != last) { + if (comp(*next, *first)) { + return false; + } + first = next; + } + } + return true; + } +} + +template> +class sorted_vector { +public: + typedef typename std::vector container; + typedef typename container::iterator iterator; + typedef typename container::const_iterator const_iterator; + typedef typename container::const_reverse_iterator const_reverse_iterator; + typedef typename container::size_type size_type; + typedef T value_type; + typedef T key_type; + + sorted_vector() {} + + sorted_vector(const std::set& o) { + insert(o.begin(), o.end()); + } + + std::pair insert(T t) { + if (elements.empty()) { + elements.push_back(t); + return std::make_pair(elements.begin(), true); + } + iterator it = std::lower_bound(elements.begin(), elements.end(), t, comp); + size_t at = std::distance(elements.begin(), it); + if (it == elements.end() || comp(*it, t) || comp(t, *it)) { + elements.insert(it, t); + return std::make_pair(elements.begin() + at, true); + } + return std::make_pair(elements.begin() + at, false); + } + + template + void insert(It b, It e) { + size_t d = std::distance(b, e); + if (d == 1) { + insert(*b); + return; + } + + static thread_local container merged; + merged.resize(0); + merged.reserve(elements.size() + d); + + if (detail::is_sorted(b, e, comp)) { + std::merge(elements.begin(), elements.end(), b, e, std::back_inserter(merged), comp); + } + else { + static thread_local container sorted; + sorted.assign(b, e); + std::sort(sorted.begin(), sorted.end(), comp); + std::merge(elements.begin(), elements.end(), sorted.begin(), sorted.end(), std::back_inserter(merged), comp); + } + + merged.swap(elements); + auto it = std::unique(elements.begin(), elements.end()); + elements.erase(it, elements.end()); + } + + void push_back(T t) { + insert(t); + } + + bool erase(T t) { + if (elements.empty()) { + return false; + } + if (comp(elements.back(), t)) { + return false; + } + if (comp(t, elements.front())) { + return false; + } + auto it = lower_bound(t); + if (it != elements.end() && !comp(*it, t) && !comp(t, *it)) { + elements.erase(it); + return true; + } + return false; + } + + const_iterator erase(const_iterator it) { + size_type o = std::distance(elements.begin(), it); + return elements.erase(elements.begin() + o); + } + + template + void erase(It b, It e) { + for (; b != e; ++b) { + erase(*b); + } + } + + const_iterator find(T t) const { + if (elements.empty()) { + return elements.end(); + } + if (comp(elements.back(), t)) { + return elements.end(); + } + if (comp(t, elements.front())) { + return elements.end(); + } + auto it = lower_bound(t); + if (it != elements.end() && (comp(*it, t) || comp(t, *it))) { + return elements.end(); + } + return it; + } + + size_t count(T t) const { + return (find(t) != end()); + } + + iterator begin() { + return elements.begin(); + } + + iterator end() { + return elements.end(); + } + + const_iterator begin() const { + return elements.begin(); + } + + const_iterator end() const { + return elements.end(); + } + + const_iterator cbegin() const { + return elements.cbegin(); + } + + const_iterator cend() const { + return elements.cend(); + } + + const_reverse_iterator rbegin() const { + return elements.rbegin(); + } + + const_reverse_iterator rend() const { + return elements.rend(); + } + + T front() const { + return elements.front(); + } + + T back() const { + return elements.back(); + } + + iterator lower_bound(T t) { + return std::lower_bound(elements.begin(), elements.end(), t, comp); + } + + const_iterator lower_bound(T t) const { + return std::lower_bound(elements.begin(), elements.end(), t, comp); + } + + const_iterator upper_bound(T t) const { + return std::upper_bound(elements.begin(), elements.end(), t, comp); + } + + bool intersects(const sorted_vector& other) const { + auto ti = begin(); + auto oi = other.begin(); + auto te = end(); + auto oe = other.end(); + while (ti != te && oi != oe) { + if (*ti == *oi) { + return true; + } + else if (comp(*ti, *oi)) { + ++ti; + } + else { + ++oi; + } + } + return false; + } + + size_type size() const { + return elements.size(); + } + + size_type capacity() const { + return elements.capacity(); + } + + bool empty() const { + return elements.empty(); + } + + template + void assign(It b, It e) { + clear(); + insert(b, e); + } + + void assign(const_iterator b, const_iterator e) { + elements.assign(b, e); + } + + void swap(sorted_vector& other) { + elements.swap(other.elements); + } + + void clear() { + elements.clear(); + } + + void sort() { + std::sort(elements.begin(), elements.end(), Comp()); + } + + void pop_back() { + elements.pop_back(); + } + + container& get() { + return elements; + } + + bool operator<(const sorted_vector& o) const { + return elements < o.elements; + } + +private: + container elements; + Comp comp; +}; + +#endif diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc index 7f80fdad..0ea3e602 100644 --- a/lttoolbox/transducer.cc +++ b/lttoolbox/transducer.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -314,16 +315,16 @@ Transducer::isEmptyIntersection(std::set const &s1, std::set const &s2 void Transducer::determinize(int const epsilon_tag) { - std::vector > R(2); - std::vector> Q_prime; - std::map, int> Q_prime_inv; + std::vector> R(2); + std::vector> Q_prime; + std::map, int> Q_prime_inv; std::map > > transitions_prime; // We're almost certainly going to need the closure of (nearly) every // state, and we're often going to need the closure several times, // so it's faster to precompute (though it does slow things down a bit). - std::vector> all_closures; + std::vector> all_closures; all_closures.reserve(transitions.size()); for (size_t i = 0; i < transitions.size(); i++) { all_closures.push_back(closure(i, epsilon_tag)); @@ -345,7 +346,7 @@ Transducer::determinize(int const epsilon_tag) int t = 0; - std::set finals_state; + sorted_vector finals_state; for(auto& it : finals) { finals_state.insert(it.first); } @@ -357,8 +358,7 @@ Transducer::determinize(int const epsilon_tag) for(auto& it : R[t]) { - if(!isEmptyIntersection(Q_prime[it], finals_state)) - { + if (Q_prime[it].intersects(finals_state)) { double w = default_weight; auto it3 = finals.find(it); if (it3 != finals.end()) { @@ -367,7 +367,7 @@ Transducer::determinize(int const epsilon_tag) finals_prime.insert(std::make_pair(it, w)); } - std::map, std::set > mymap; + std::map, sorted_vector > mymap; for(auto& it2 : Q_prime[it]) {