From b4e768cfb0ac2ccd26d00d99d3c7ab478c415a64 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 22 Nov 2024 13:56:03 -0800 Subject: [PATCH] adding plugin for local search strings --- src/ast/sls/CMakeLists.txt | 1 + src/ast/sls/sls_seq_plugin.cpp | 306 +++++++++++++++++++++++++++++++++ src/ast/sls/sls_seq_plugin.h | 81 +++++++++ src/util/zstring.h | 2 +- 4 files changed, 389 insertions(+), 1 deletion(-) create mode 100644 src/ast/sls/sls_seq_plugin.cpp create mode 100644 src/ast/sls/sls_seq_plugin.h diff --git a/src/ast/sls/CMakeLists.txt b/src/ast/sls/CMakeLists.txt index a63fc099463..b393d90ed74 100644 --- a/src/ast/sls/CMakeLists.txt +++ b/src/ast/sls/CMakeLists.txt @@ -15,6 +15,7 @@ z3_add_component(ast_sls sls_context.cpp sls_datatype_plugin.cpp sls_euf_plugin.cpp + sls_seq_plugin.cpp sls_smt_plugin.cpp sls_smt_solver.cpp COMPONENT_DEPENDENCIES diff --git a/src/ast/sls/sls_seq_plugin.cpp b/src/ast/sls/sls_seq_plugin.cpp new file mode 100644 index 00000000000..aa47f61f965 --- /dev/null +++ b/src/ast/sls/sls_seq_plugin.cpp @@ -0,0 +1,306 @@ +/*++ +Copyright (c) 2024 Microsoft Corporation + +Module Name: + + sls_seq_plugin.cpp + +Abstract: + + Sequence/String SLS + +Author: + + Nikolaj Bjorner (nbjorner) 2024-11-22 + +--*/ + +#include "ast/sls/sls_seq_plugin.h" +#include "ast/sls/sls_context.h" + + +namespace sls { + + seq_plugin::seq_plugin(context& c): + plugin(c), + seq(c.get_manager()) + {} + + void seq_plugin::propagate_literal(sat::literal lit) { + } + + expr_ref seq_plugin::get_value(expr* e) { + return expr_ref(m); + } + + bool seq_plugin::propagate() { + return false; + } + + bool seq_plugin::is_sat() { + return true; + } + + void seq_plugin::register_term(expr* e) { + } + + std::ostream& seq_plugin::display(std::ostream& out) const { + return out; + } + + bool seq_plugin::set_value(expr* e, expr* v) { + return false; + } + + zstring& seq_plugin::strval0(expr* e) { + SASSERT(seq.is_string(e->get_sort())); + unsigned id = e->get_id(); + m_values.reserve(id + 1); + if (!m_values[id]) + m_values.set(id, alloc(eval, m)); + return m_values[id]->val0.svalue; + } + + bool seq_plugin::bval1(expr* e) { + SASSERT(is_app(e)); + if (to_app(e)->get_family_id() == seq.get_family_id()) + return bval1_seq(to_app(e)); + + NOT_IMPLEMENTED_YET(); + return false; + } + + bool seq_plugin::bval1_seq(app* e) { + expr* a, *b; + switch (e->get_decl_kind()) { + case OP_SEQ_CONTAINS: { + VERIFY(seq.str.is_contains(e, a, b)); + if (seq.is_string(a->get_sort())) + return strval0(a).contains(strval0(b)); + else { + NOT_IMPLEMENTED_YET(); + } + break; + } + case OP_SEQ_PREFIX: + case OP_SEQ_SUFFIX: + case OP_SEQ_NTH: + case OP_SEQ_NTH_I: + case OP_SEQ_NTH_U: + case OP_SEQ_IN_RE: + case OP_SEQ_FOLDL: + case OP_SEQ_FOLDLI: + case OP_STRING_LT: + case OP_STRING_LE: + case OP_STRING_IS_DIGIT: + NOT_IMPLEMENTED_YET(); + break; + default: + break; + } + return false; + } + + zstring const& seq_plugin::strval1(expr* e) { + SASSERT(is_app(e)); + SASSERT(seq.is_string(e->get_sort())); + if (to_app(e)->get_family_id() == seq.get_family_id()) { + switch (to_app(e)->get_decl_kind()) { + case OP_SEQ_UNIT: + case OP_SEQ_EMPTY: + case OP_SEQ_CONCAT: + case OP_SEQ_EXTRACT: + case OP_SEQ_REPLACE: + case OP_SEQ_AT: + case OP_SEQ_NTH: + case OP_SEQ_NTH_I: + case OP_SEQ_NTH_U: + case OP_SEQ_LENGTH: + case OP_SEQ_INDEX: + case OP_SEQ_LAST_INDEX: + case OP_SEQ_TO_RE: + case OP_SEQ_IN_RE: + case OP_SEQ_REPLACE_RE_ALL: + case OP_SEQ_REPLACE_RE: + case OP_SEQ_REPLACE_ALL: + case OP_SEQ_MAP: + case OP_SEQ_MAPI: + case OP_SEQ_FOLDL: + case OP_SEQ_FOLDLI: + case OP_RE_PLUS: + case OP_RE_STAR: + case OP_RE_OPTION: + case OP_RE_RANGE: + case OP_RE_CONCAT: + case OP_RE_UNION: + case OP_RE_DIFF: + case OP_RE_INTERSECT: + case OP_RE_LOOP: + case OP_RE_POWER: + case OP_RE_COMPLEMENT: + case OP_RE_EMPTY_SET: + case OP_RE_FULL_SEQ_SET: + case OP_RE_FULL_CHAR_SET: + case OP_RE_OF_PRED: + case OP_RE_REVERSE: + case OP_RE_DERIVATIVE: + case OP_STRING_CONST: + case OP_STRING_ITOS: + case OP_STRING_STOI: + case OP_STRING_UBVTOS: + case OP_STRING_SBVTOS: + case OP_STRING_LT: + case OP_STRING_LE: + case OP_STRING_IS_DIGIT: + case OP_STRING_TO_CODE: + case OP_STRING_FROM_CODE: + NOT_IMPLEMENTED_YET(); + break; + default: + UNREACHABLE(); + break; + } + } + auto const& v = strval0(e); + m_values[e->get_id()]->val1.svalue = v; + return m_values[e->get_id()]->val1.svalue; + } + + void seq_plugin::repair_up(app* e) { + } + + bool seq_plugin::repair_down(app* e) { + switch (e->get_decl_kind()) { + case OP_SEQ_UNIT: + case OP_SEQ_EMPTY: + case OP_SEQ_CONCAT: + case OP_SEQ_PREFIX: + case OP_SEQ_SUFFIX: + case OP_SEQ_CONTAINS: + return repair_contains(e); + case OP_SEQ_EXTRACT: + case OP_SEQ_REPLACE: + case OP_SEQ_AT: + case OP_SEQ_NTH: + case OP_SEQ_NTH_I: + case OP_SEQ_NTH_U: + case OP_SEQ_LENGTH: + case OP_SEQ_INDEX: + case OP_SEQ_LAST_INDEX: + case OP_SEQ_TO_RE: + case OP_SEQ_IN_RE: + case OP_SEQ_REPLACE_RE_ALL: + case OP_SEQ_REPLACE_RE: + case OP_SEQ_REPLACE_ALL: + case OP_SEQ_MAP: + case OP_SEQ_MAPI: + case OP_SEQ_FOLDL: + case OP_SEQ_FOLDLI: + case OP_RE_PLUS: + case OP_RE_STAR: + case OP_RE_OPTION: + case OP_RE_RANGE: + case OP_RE_CONCAT: + case OP_RE_UNION: + case OP_RE_DIFF: + case OP_RE_INTERSECT: + case OP_RE_LOOP: + case OP_RE_POWER: + case OP_RE_COMPLEMENT: + case OP_RE_EMPTY_SET: + case OP_RE_FULL_SEQ_SET: + case OP_RE_FULL_CHAR_SET: + case OP_RE_OF_PRED: + case OP_RE_REVERSE: + case OP_RE_DERIVATIVE: + case OP_STRING_CONST: + case OP_STRING_ITOS: + case OP_STRING_STOI: + case OP_STRING_UBVTOS: + case OP_STRING_SBVTOS: + case OP_STRING_LT: + case OP_STRING_LE: + case OP_STRING_IS_DIGIT: + case OP_STRING_TO_CODE: + case OP_STRING_FROM_CODE: + default: + break; + } + return false; + } + + bool seq_plugin::repair_contains(expr* e) { + expr* a, *b; + VERIFY(seq.str.is_contains(e, a, b)); + zstring sa = strval0(a); + zstring sb = strval0(b); + unsigned lena = sa.length(); + unsigned lenb = sb.length(); + + m_str_updates.reset(); + if (ctx.is_true(e)) { + // add b to a in front + // add b to a in back + // add part of b to a front/back + // take random subsequence of a and set it to b + // reduce size of b + + m_str_updates.push_back({ a, sb + sa, 1 }); + m_str_updates.push_back({ a, sa + sb, 1 }); + if (lena > 1) { + unsigned mid = ctx.rand(lena-2) + 1; + zstring sa1 = sa.extract(0, mid); + zstring sa2 = sa.extract(mid, lena - mid); + m_str_updates.push_back({ a, sa1 + sb + sa2, 1}); + } + if (lenb > 0) { + m_str_updates.push_back({ b, sb.extract(0, lenb-1), 1}); + m_str_updates.push_back({ b, sb.extract(1, lenb-1), 1}); + } + } + else { + // remove occurrences of b in a, if b is non-empty + // append character to b + // set b to be a + character + // + } + return apply_str_update(); + } + + bool seq_plugin::apply_str_update() { + double sum_scores = 0; + for (auto const& [e, val, score] : m_str_updates) + sum_scores += score; + + while (!m_str_updates.empty()) { + unsigned i = m_str_updates.size(); + double lim = sum_scores * ((double)ctx.rand() / random_gen().max_value()); + do { + lim -= m_str_updates[--i].m_score; + } + while (lim >= 0 && i > 0); + + auto [e, value, score] = m_str_updates[i]; + + if (update(e, value)) + return true; + + sum_scores -= score; + m_str_updates[i] = m_str_updates.back(); + m_str_updates.pop_back(); + } + + return false; + } + + bool seq_plugin::update(expr* e, zstring const& value) { + strval0(e) = value; + ctx.new_value_eh(e); + return true; + } + + + void seq_plugin::repair_literal(sat::literal lit) { + } + +} diff --git a/src/ast/sls/sls_seq_plugin.h b/src/ast/sls/sls_seq_plugin.h new file mode 100644 index 00000000000..59d875d8aa2 --- /dev/null +++ b/src/ast/sls/sls_seq_plugin.h @@ -0,0 +1,81 @@ +/*++ +Copyright (c) 2024 Microsoft Corporation + +Module Name: + + sls_seq_plugin.h + +Abstract: + + Sequence/String SLS + +Author: + + Nikolaj Bjorner (nbjorner) 2024-11-22 + +--*/ +#pragma once + +#include "ast/sls/sls_context.h" +#include "ast/seq_decl_plugin.h" + + +namespace sls { + + class seq_plugin : public plugin { + struct value { + value(ast_manager& m): evalue(m) {} + zstring svalue; + expr_ref evalue; + }; + + struct eval { + eval(ast_manager& m): + val0(m), val1(m) {} + value val0; + value val1; + }; + + seq_util seq; + scoped_ptr_vector m_values; + + struct str_update { + expr* e; + zstring value; + unsigned m_score; + }; + vector m_str_updates; + bool apply_str_update(); + bool update(expr* e, zstring const& value); + + bool bval1(expr* e); + bool bval1_seq(app* e); + zstring& strval0(expr* e); + zstring const& strval1(expr* e); + + bool repair_contains(expr* e); + + public: + seq_plugin(context& c); + ~seq_plugin() override {} + expr_ref get_value(expr* e) override; + void initialize() override {} + void start_propagation() override {} + void propagate_literal(sat::literal lit) override; + bool propagate() override; + bool is_sat() override; + void register_term(expr* e) override; + std::ostream& display(std::ostream& out) const override; + bool set_value(expr* e, expr* v) override; + bool include_func_interp(func_decl* f) const override { return true; } + + void repair_up(app* e) override; + bool repair_down(app* e) override; + void repair_literal(sat::literal lit) override; + + void collect_statistics(statistics& st) const override {} + void reset_statistics() override {} + + }; + +} diff --git a/src/util/zstring.h b/src/util/zstring.h index cb462238a1e..e661b5389c2 100644 --- a/src/util/zstring.h +++ b/src/util/zstring.h @@ -79,7 +79,7 @@ class zstring { bool contains(zstring const& other) const; int indexofu(zstring const& other, unsigned offset) const; int last_indexof(zstring const& other) const; - zstring extract(unsigned lo, unsigned hi) const; + zstring extract(unsigned offset, unsigned length) const; zstring operator+(zstring const& other) const; bool operator==(const zstring& other) const; bool operator!=(const zstring& other) const;