Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanPyrohivskyi committed Jan 15, 2025
1 parent 40e4d56 commit f3fde36
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 40 deletions.
15 changes: 6 additions & 9 deletions OsmAnd-java/src/main/java/net/osmand/CollatorStringMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,13 @@ public boolean matches(String name) {
return cmatches(collator, name, part, mode);
}

public static boolean cmatches(Collator collator, String fullName, String part, StringMatcherMode mode){
String withoutDiacritic = ArabicNormalizer.normalize(fullName);
boolean matchDiacritic = false;
if (!fullName.equals(withoutDiacritic)) {
matchDiacritic = cmatchInternal(collator, withoutDiacritic, part, mode);
public static boolean cmatches(Collator collator, String fullName, String part, StringMatcherMode mode) {
if (ArabicNormalizer.isSpecialArabic(fullName)) {
fullName = ArabicNormalizer.normalize(fullName);
}
if (ArabicNormalizer.isSpecialArabic(part)) {
part = ArabicNormalizer.normalize(part);
}
return matchDiacritic || cmatchInternal(collator, fullName, part, mode);
}

private static boolean cmatchInternal(Collator collator, String fullName, String part, StringMatcherMode mode){
switch (mode) {
case CHECK_CONTAINS:
return ccontains(collator, fullName, part);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,8 @@
import net.osmand.search.SearchUICore.SearchResultMatcher;
import net.osmand.search.core.SearchPhrase.NameStringMatcher;
import net.osmand.search.core.SearchPhrase.SearchPhraseDataType;
import net.osmand.util.Algorithms;
import net.osmand.util.GeoPointParserUtil;
import net.osmand.util.GeoParsedPoint;
import net.osmand.util.LocationParser;
import net.osmand.util.*;
import net.osmand.util.LocationParser.ParsedOpenLocationCode;
import net.osmand.util.MapUtils;

import java.io.IOException;
import java.text.DecimalFormat;
Expand Down Expand Up @@ -556,6 +552,9 @@ public boolean isCancelled() {
Iterator<BinaryMapIndexReader> offlineIterator = phrase.getRadiusOfflineIndexes(DEFAULT_ADDRESS_BBOX_RADIUS * 5,
SearchPhraseDataType.ADDRESS);
String wordToSearch = phrase.getUnknownWordToSearch();
if (ArabicNormalizer.isSpecialArabic(wordToSearch)) {
wordToSearch = ArabicNormalizer.normalize(wordToSearch);
}
while (offlineIterator.hasNext() && wordToSearch.length() > 0) {
BinaryMapIndexReader r = offlineIterator.next();
currentFile[0] = r;
Expand Down Expand Up @@ -609,6 +608,9 @@ public boolean search(final SearchPhrase phrase, final SearchResultMatcher resul
Iterator<BinaryMapIndexReader> offlineIterator = phrase.getRadiusOfflineIndexes(BBOX_RADIUS,
SearchPhraseDataType.POI);
String searchWord = phrase.getUnknownWordToSearch();
if (ArabicNormalizer.isSpecialArabic(searchWord)) {
searchWord = ArabicNormalizer.normalize(searchWord);
}
final NameStringMatcher nm = phrase.getMainUnknownNameStringMatcher();
QuadRect bbox = phrase.getFileRequest() != null ? phrase.getRadiusBBoxToSearch(BBOX_RADIUS_POI_IN_CITY) : phrase.getRadiusBBoxToSearch(BBOX_RADIUS_INSIDE);
final Set<String> ids = new HashSet<String>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -803,12 +803,7 @@ public boolean matches(Collection<String> map) {

@Override
public boolean matches(String name) {
String normalized = ArabicNormalizer.normalize(name);
boolean matchDiacritic = false;
if (!name.equals(normalized)) {
matchDiacritic = sm.matches(normalized);
}
return matchDiacritic || sm.matches(name);
return sm.matches(name);
}

}
Expand Down
72 changes: 52 additions & 20 deletions OsmAnd-java/src/main/java/net/osmand/util/ArabicNormalizer.java
Original file line number Diff line number Diff line change
@@ -1,35 +1,67 @@
package net.osmand.util;

import java.text.Normalizer;
import java.util.regex.Pattern;

public class ArabicNormalizer {

private static final Pattern DIACRITICS_PATTERN = Pattern.compile("\\p{Mn}");
private static final String DIACRITIC_REGEX = "[\\u064B-\\u0652]";
private static final String ARABIC_DIGITS = "٠١٢٣٤٥٦٧٨٩";
private static final String DIGITS_REPLACEMENT = "0123456789";
private static final String KASHIDA = "\u0640";

public static boolean isSpecialArabic(String text) {
if (text == null || text.isEmpty()) {
return false;
}
char first = text.charAt(0);
if (Character.UnicodeBlock.of(first) == Character.UnicodeBlock.ARABIC) {
for (char c : text.toCharArray()) {
if (isDiacritic(c) || isArabicDigit(c) || isKashida(c)) {
return true;
}
}
}
return false;
}

public static String normalize(String text) {
if (text == null || text.isEmpty()) {
return text;
}
String result = text.replaceAll(DIACRITIC_REGEX, "");
result = result.replace(KASHIDA, "");
return replaceDigits(result);
}

private static String replaceDigits(String text) {
if (text == null) {
return null; // Handle null input
}
char first = text.charAt(0);
if (Character.UnicodeBlock.of(first) != Character.UnicodeBlock.ARABIC) {
return text;
}

String normalized = Normalizer.normalize(text, Normalizer.Form.NFD);
normalized = DIACRITICS_PATTERN.matcher(normalized).replaceAll(""); // Remove diacritics efficiently

// Hamza variations
normalized = normalized.replace("إ", "ا"); // Initial hamza on alif
normalized = normalized.replace("أ", "ا"); // Initial hamza on waw
normalized = normalized.replace("ئ", "ي"); // Hamza on ya' (This should be 'ي' not 'ا' for better accuracy)
normalized = normalized.replace("ؤ", "و"); // Hamza on waw

// Other normalizations
normalized = normalized.replace("آ", "ا"); // Alif madda
normalized = normalized.replace("ى", "ي"); // Final form of ya'
normalized = normalized.replace("ة", "ه"); // Teh marbuta to ha'
char[] textChars = text.toCharArray();
for (int i = 0; i < ARABIC_DIGITS.length(); i++) {
char c = ARABIC_DIGITS.charAt(i);
char replacement = DIGITS_REPLACEMENT.charAt(i);
int index = text.indexOf(c);
while (index >= 0) {
textChars[index] = replacement;
index = text.indexOf(c, index + 1);
}
}
return String.valueOf(textChars);
}

// Kashida
normalized = normalized.trim().replaceAll("\u0640", "");// Kashida
private static boolean isDiacritic(char c) {
return c >= '\u064B' && c <= '\u0652'; // Diacritic range
}

return normalized;
private static boolean isArabicDigit(char c) {
return c >= '\u0660' && c <= '\u0669'; // Arabic-Indic digits ٠-٩
}

private static boolean isKashida(char c) {
return c == '\u0640'; // Kashida character
}
}

0 comments on commit f3fde36

Please sign in to comment.