Skip to content

Commit

Permalink
Merge pull request #50 from heiglandreas/allowOwnDictionaries
Browse files Browse the repository at this point in the history
Allow own dictionaries
  • Loading branch information
heiglandreas authored Jun 27, 2020
2 parents a4a2372 + 34aed98 commit ea34c3b
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 11 deletions.
33 changes: 29 additions & 4 deletions docs/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ add the provided autoloader either using your own autoloader or by
invoking the ``Hyphenators`` own autoloader

::

<?php
require_once 'path/to/Org/Heigl/Hyphenator/Hyphenator.php';
\Org\Heigl\Hyphenator\Hyphenator::registerAutoload()
Expand All @@ -18,7 +18,7 @@ Simple Example
==============

::

<?php
use \Org\Heigl\Hyphenator as h;
$hyphenator = h\Hyphenator::factory();
Expand Down Expand Up @@ -47,7 +47,7 @@ Invoke the ``Hyphenator`` manually
==================================

::

<?php
use \Org\Heigl\Hyphenator as h;
$o = new h\Options();
Expand All @@ -68,7 +68,7 @@ Get the hyphenation of a single word as array
=============================================

::

<?php
use \Org\Heigl\Hyphenator as h;
$o = new h\Options();
Expand Down Expand Up @@ -100,3 +100,28 @@ Get the hyphenation of a single word as array
reuse the once created instance.
Reading the largest hyphenation-pattern-file takes up to one
second on a 2.5GHz Intel Core2 Duo using 4GB RAM.

Add your own dictionary rules to the hyphenator
===============================================

::
use \Org\Heigl\Hyphenator\Hyphenator;
use Org\Heigl\Hyphenator\Dictionary\Dictionary;

$hyphenator = new Hyphenator();
$dictionary = Dictionary::fromFile('/path/to/my/dictionary/file.ini');
$hyphenator->getDictionaries()->addDictionary($dictionary);

This will add the hyphenation patterns in file `file.ini` as further patterns.

.. note::

The patterns in file `file.ini` need to look like this: `@:[string]="[numerical pattern]"`
where `[string]` is the string that will be matched and `[numerical pattern]` describes the
hyphenation pattern in digits from 0 to 9 where odd numbers mark hyphenation positions and
even numbers mark positions where a hyphenation is forbidden. The higher the number the
later it will be respected. Higher numbers will overwrite lower numbers when the patterns
are merged. The pattern always consists of one number more than the number of characters
of the string. The first number marks the position before the first character of the string,
the second number marks the position between the first and the second character and so on until the
last number which marks the position after the last character.
33 changes: 30 additions & 3 deletions src/Dictionary/Dictionary.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@

namespace Org\Heigl\Hyphenator\Dictionary;

use RuntimeException;
use function mb_substr;
use function parse_ini_file;
use function str_replace;

/**
* This class provides a generic dictionary containing hyphenation-patterns
Expand Down Expand Up @@ -91,6 +94,29 @@ public static function factory($locale)
return $dict;
}

public static function fromLocale($locale): Dictionary
{
$dictionary = new Dictionary();
$dictionary->load($locale);

return $dictionary;
}

public static function fromFile(string $file): Dictionary
{
if (! is_file($file)) {
throw new RuntimeException(sprintf("The file \"%s\" is not readable", $file));
}

$dictionary = new Dictionary();

foreach (parse_ini_file($file) as $key => $val) {
$dictionary->dictionary[str_replace('@:', '', $key)] = $val;
}

return $dictionary;
}

/**
* Load a given locale-file as base for the dictionary
*
Expand Down Expand Up @@ -180,10 +206,11 @@ public function getPatternsForWord($word)
for ($i = 0; $i <= $strlen; $i ++) {
for ($j = 2; $j <= ($strlen-$i); $j++) {
$substr = mb_substr($word, $i, $j);
if (! isset($this->dictionary[$substr])) {
$lowerSubstring = mb_strtolower($substr);
if (! isset($this->dictionary[$lowerSubstring])) {
continue;
}
$return[$substr] = $this->dictionary[$substr];
$return[$substr] = $this->dictionary[$lowerSubstring];
}
}

Expand All @@ -198,7 +225,7 @@ public function getPatternsForWord($word)
*
* @return \Org\Heigl\Hyphenator\Dictionary\Dictionary
*/
public function addPAttern($string, $pattern)
public function addPattern($string, $pattern)
{
$this->dictionary[$string] = $pattern;

Expand Down
4 changes: 1 addition & 3 deletions src/Hyphenator.php
Original file line number Diff line number Diff line change
Expand Up @@ -425,9 +425,7 @@ public function filter(TokenRegistry $registry)
*/
public function getPatternForToken(WordToken $token)
{
foreach ($this->getDictionaries() as $dictionary) {
$token->addPattern($dictionary->getPatternsForWord($token->get()));
}
$token->addPattern($this->getDictionaries()->getHyphenationPatterns($token->get()));

return $token;
}
Expand Down
7 changes: 6 additions & 1 deletion tests/HyphenatorFeatureTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ public function testHyphenationOfSingleWordWithDefaultOutput($word, $language, $
$h = new h\Hyphenator();
$h->setOptions($o);

$h->getDictionaries()->add(h\Dictionary\Dictionary::fromFile(__DIR__ . '/share/de_DE.ini'));

// $h->getDictionaries()->getDictionaryWithKey(0)->addPattern('strategie', '9800000000');

$this->assertEquals($expected, $h->hyphenate($word));
}

Expand All @@ -122,6 +126,7 @@ public function hyphenationOfSingleWordWithDefaultOutputProvider()
['urinstinkt ', 'de_DE', 'ur^in^stinkt ', h\Hyphenator::QUALITY_HIGHEST],
['Brücke ', 'de_DE', 'Brü^cke ', h\Hyphenator::QUALITY_NORMAL],
['Röcke ', 'de_DE', 'Rö^cke '],
['Produktionsstrategie ', 'de_DE', 'Pro^duk^ti^ons^stra^te^gie '],
];
}

Expand Down Expand Up @@ -152,7 +157,7 @@ public function hyphenationOfHtmlWithDefaultOutputProvider()
[
'<xml>Otto<br/>Aussichtsturm</html>',
'de_DE',
'<xml>Ot^to<br/>Aus^sicht^sturm</html>',
'<xml>Ot^to<br/>Aus^sicht^s^turm</html>',
h\Hyphenator::QUALITY_NORMAL
],
];
Expand Down
1 change: 1 addition & 0 deletions tests/share/de_DE.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
@:produktionsstrategie = "000000000009800000000"

0 comments on commit ea34c3b

Please sign in to comment.