Skip to content

Commit

Permalink
add code docs and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hendrikmuhs committed Feb 19, 2024
1 parent 83c97bc commit 2a1b7ad
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 5 deletions.
40 changes: 35 additions & 5 deletions python/src/pxds/dictionary.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ cdef extern from "keyvi/dictionary/dictionary.h" namespace "keyvi::dictionary":

cdef cppclass Dictionary:
# wrap-doc:
# Keyvi dictionary, basically a set of key values. Keyvi dictionaries are immutable containers,
# created by a previours compile run. Immutability has performance benefits. If you are looking for
# an updateable container, have a look at keyvi index.
# Keyvi dictionary, basically a set of key values. Keyvi dictionaries
# are immutable containers, created by a previours compile run.
# Immutability has performance benefits. If you are looking for an
# updateable container, have a look at keyvi index.
#
# Keyvi dictionaries allow multiple types of approximate and completion matches due to its internal
# FST based data structure.
# Keyvi dictionaries allow multiple types of approximate and completion
# matches due to its internal FST based data structure.
Dictionary (libcpp_utf8_string filename) except +
Dictionary (libcpp_utf8_string filename, loading_strategy_types) except +
bool Contains (libcpp_utf8_string key) # wrap-ignore
Expand All @@ -41,9 +42,38 @@ cdef extern from "keyvi/dictionary/dictionary.h" namespace "keyvi::dictionary":
_MatchIteratorPair GetFuzzy (libcpp_utf8_string key, int32_t max_edit_distance) except + # wrap-as:match_fuzzy
_MatchIteratorPair GetFuzzy (libcpp_utf8_string key, int32_t max_edit_distance, size_t minimum_exact_prefix) except + # wrap-as:match_fuzzy
_MatchIteratorPair GetPrefixCompletion (libcpp_utf8_string key) except + # wrap-as:complete_prefix
# wrap-doc:
# complete the given key to full matches by matching the given key as
# prefix. In case the used dictionary supports inner weights, the
# completer traverses the dictionary according to weights. If weights
# are not available the dictionary gets traversed in byte-order.
_MatchIteratorPair GetPrefixCompletion (libcpp_utf8_string key, size_t top_n) except + # wrap-as:complete_prefix
# wrap-doc:
# complete the given key to full matches by matching the given key as
# prefix. This version of prefix completions ensure the return of the
# top name completions. Due to depth-first traversal the traverser
# immediately yields results when it visits them. The results are
# neither in order nor limited to n. It is up to the caller to resort
# and truncate the lists of results.
# Only the number of top completions is guaranteed.
_MatchIteratorPair GetPrefixCompletion (libcpp_utf8_string key, match_filter filter, void* filter_data) # wrap-ignore
_MatchIteratorPair GetPrefixCompletion (libcpp_utf8_string key, match_filter filter) # wrap-as:complete_prefix
# wrap-doc:
# complete the given key to full matches by matching the given key as
# prefix. This version of prefix completions allows the definition of a
# custom filter method. The filter method retrieves the match and must
# return a tuple of bool and int:
#
# def my_filter(match):
# ...
# accept_match = True
# min_weight = 42
# return accept_match, min_weight
#
# Only if the filter accepts the match, it is passed downstream.
# min_weight controls the internal traverser. Only branches with a
# weight greater or equal than min_weight are visited, others are
# skipped.
_MatchIteratorPair GetAllItems () # wrap-ignore
_MatchIteratorPair Lookup(libcpp_utf8_string key) # wrap-as:search
_MatchIteratorPair LookupText(libcpp_utf8_string text) # wrap-as:search_tokenized
Expand Down
49 changes: 49 additions & 0 deletions python/tests/dictionary/prefix_completion_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# Usage: py.test tests

import sys
import os

from keyvi.compiler import CompletionDictionaryCompiler

root = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(root, "../"))
from test_tools import tmp_dictionary


def test_prefix_simple():
c = CompletionDictionaryCompiler({"memory_limit_mb": "10"})
c.Add("eric", 33)
c.Add("jeff", 33)
c.Add("eric bla", 233)
c.Add("eric blu", 113)
c.Add("eric ble", 413)
c.Add("eric blx", 223)
c.Add("eric bllllx", 193)
c.Add("eric bxxxx", 23)
c.Add("eric boox", 143)
with tmp_dictionary(c, "completion.kv") as d:
assert [m.matched_string for m in d.complete_prefix("eric")] == [
"eric",
"eric ble",
"eric bla",
"eric blx",
"eric bllllx",
"eric blu",
"eric boox",
"eric bxxxx",
]
assert [m.matched_string for m in d.complete_prefix("eric", 2)] == [
"eric",
"eric ble",
]

def my_filter(m):
return m.matched_string.endswith("x"), 40

assert [m.matched_string for m in d.complete_prefix("eric", my_filter)] == [
"eric blx",
"eric bllllx",
"eric boox",
]
# same with lambda, not working yet: assert [m.matched_string for m in d.complete_prefix("eric", lambda m: (m.matched_string.endswith('x'), 40))] == ['eric blx', 'eric bllllx', 'eric boox']

0 comments on commit 2a1b7ad

Please sign in to comment.