From 956a3df9e4767012070e069b732dff95462bff10 Mon Sep 17 00:00:00 2001
From: Benoit Chevallier-Mames <benoit.chevalliermames@zama.ai>
Date: Tue, 13 Aug 2024 13:23:49 +0200
Subject: [PATCH] docs(frontend): adding a use-case for fuzzy encrypted name
 comparison

---
 .../IBAN_name_check.ipynb                     | 356 ++++++++++++++++++
 .../examples/levenshtein_distance/README.md   |  17 +-
 .../levenshtein_distance.py                   | 146 +++++--
 3 files changed, 476 insertions(+), 43 deletions(-)
 create mode 100644 frontends/concrete-python/examples/levenshtein_distance/IBAN_name_check.ipynb

diff --git a/frontends/concrete-python/examples/levenshtein_distance/IBAN_name_check.ipynb b/frontends/concrete-python/examples/levenshtein_distance/IBAN_name_check.ipynb
new file mode 100644
index 0000000000..61f2762637
--- /dev/null
+++ b/frontends/concrete-python/examples/levenshtein_distance/IBAN_name_check.ipynb
@@ -0,0 +1,356 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "bf2a3e8d-bf57-4841-a863-1eab96c91373",
+   "metadata": {},
+   "source": [
+    "# Comparing encrypted IBAN names\n",
+    "\n",
+    "When doing a transfer between Bank A and Bank B, Bank B has the obligation to check that the IBAN and the name of the recipient match. This is essential to combat frauds (fraudster impersonating someone else) and to avoid misdirected payments. Bank B would usually not reject a transfer if the name is close enough but doesn’t match exactly the recipient’s actual name. This is essential to make room for small spelling mistakes considering the impact of a rejected transfer (days / weeks of delays that can harm a business or a buyer, extra costs to handle the error, …). It is therefore important for Bank A to pre-check the name and inform the sender that the name is likely not matching, before initiating the transfer. For privacy reason however, it's better to do this pre-check over encrypted names.\n",
+    "\n",
+    "In this small tutorial, we show how to use our TFHE Levenshtein distance computations to perform such a privacy-preserving check, very simply and directly in Python. This tutorial can be easily configured, to change for example the way strings are normalized before encryption and comparison. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dc96a80f-0b14-4e64-a33f-31a60351453d",
+   "metadata": {},
+   "source": [
+    "## Importing our FHE Levenshtein computations\n",
+    "\n",
+    "One can have a look to this file to see how the FHE computations are handled."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "56ba9e20-ca46-4aa6-a0f7-86ca13480a52",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from levenshtein_distance import *\n",
+    "from time import time"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "494cd58c-ea28-4547-92c5-80ed4ba83964",
+   "metadata": {},
+   "source": [
+    "## Define the comparison functions\n",
+    "\n",
+    "FHE computation will happen in `calculate_and_return`, if `fhe_or_simulate` is set to `fhe`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2410b6b3-0c21-4178-b8dc-f734ec0afd40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def normalized_string(st):\n",
+    "    \"\"\"Normalize a string, to later make that the distance between non-normalized\n",
+    "    string 'John Doe' and 'doe john' is small. This function can be configured depending\n",
+    "    on the needs.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Force lower case\n",
+    "    st = st.lower()\n",
+    "\n",
+    "    # Replace - and . by spaces\n",
+    "    st = st.replace('-', ' ')\n",
+    "    st = st.replace('.', ' ')\n",
+    "    \n",
+    "    # Sort the words and join\n",
+    "    words = st.split()\n",
+    "    st = \"\".join(sorted(words))\n",
+    "\n",
+    "    return st\n",
+    "\n",
+    "def compare_IBAN_names(string0: str, string1: str, fhe_or_simulate: str):\n",
+    "    \"\"\"Compare two IBAN names: first, normalize the strings, then compute in FHE (look in \n",
+    "    calculate_and_return for FHE details).\"\"\"\n",
+    "    # Normalize strings\n",
+    "    string0 = normalized_string(string0)\n",
+    "    string1 = normalized_string(string1)\n",
+    "    max_string_length = max(len(string0), len(string1))\n",
+    "\n",
+    "    alphabet = Alphabet.init_by_name(\"name\")\n",
+    "    levenshtein_distance = LevenshteinDistance(alphabet, max_string_length, show_mlir = False, show_optimizer = False)\n",
+    "    time_begin = time()\n",
+    "    distance = levenshtein_distance.calculate_and_return(string0, string1, mode=fhe_or_simulate)    \n",
+    "    time_end = time()\n",
+    "    \n",
+    "    max_len = max(len(string0), len(string1))\n",
+    "    similarity = (max_len - distance) / max_len\n",
+    "\n",
+    "    print(f\"Similarity between the two strings is {similarity:.4f}, computed in {time_end - time_begin: .2f} seconds\")\n",
+    "    return similarity"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f9006416-8240-4d8b-be8d-9011547f4719",
+   "metadata": {},
+   "source": [
+    "This is the option to set to \"fhe\" to run computations in FHE. If you set it to \"simulate\", only simulation will be done, which is sufficient to debug what happens, but should not be used in production settings. Remark that computations in FHE can be long, especially if the strings are long. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "3b79df6b-8aff-4bfe-b119-e4aec78e04af",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fhe_or_simulate = \"fhe\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4062d22f-ae05-4493-a1ab-6a6a16bbc1f3",
+   "metadata": {},
+   "source": [
+    "## Make a few comparisons in a private setting\n",
+    "\n",
+    "First, with equal strings, the match is perfect."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "60ccaded-7579-4bd4-a972-7eea98d5d585",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Similarity between the two strings is 1.0000, computed in  149.59 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "string0 = \"John Doe\"\n",
+    "string1 = \"John Doe\"\n",
+    "\n",
+    "assert compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate) == 1.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f92bc91b-cd26-4ced-af4e-49811bea2353",
+   "metadata": {},
+   "source": [
+    "With reversed names, the match is also perfect, thanks to our definition of `normalized_string`. If it is a non-desired property, we can change it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c9658e10-94dd-4e6a-8352-639493ac36f7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Similarity between the two strings is 1.0000, computed in  154.02 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "string0 = \"John Doe\"\n",
+    "string1 = \"Doe John\"\n",
+    "\n",
+    "assert compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate) == 1.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c871b320-f93c-4fdb-9a70-5d423811961e",
+   "metadata": {},
+   "source": [
+    "With a typo, similarity is smaller, but still quite high."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a822a188-a7ae-466f-8caa-15d91131fc5c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Similarity between the two strings is 0.8571, computed in  133.71 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "string0 = \"John Doe\"\n",
+    "string1 = \"John Do\"\n",
+    "\n",
+    "assert round(compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate), 2) == 0.86"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a7c26654-08da-4755-8eba-25aef6d49e2a",
+   "metadata": {},
+   "source": [
+    "With an added letter, it is also high."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "fba38c06-d26a-4dc8-9442-d1f128068d1b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Similarity between the two strings is 0.8750, computed in  166.83 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "string0 = \"John Doe\"\n",
+    "string1 = \"John W Doe\"\n",
+    "\n",
+    "assert round(compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate), 2) == 0.88"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fab3e31c-5533-4983-a854-bfb9bb360611",
+   "metadata": {},
+   "source": [
+    "With the way we have normalized strings, we consider '-' and ' ' as equal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "fc8a70c6-65ee-40c5-98a4-bbc681f2b873",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Similarity between the two strings is 1.0000, computed in  150.00 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "string0 = \"John Doe\"\n",
+    "string1 = \"John-Doe\"\n",
+    "\n",
+    "assert round(compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate), 2) == 1.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7e2db4f0-e1ef-4726-b9d4-d9aedf20ad43",
+   "metadata": {},
+   "source": [
+    "Finally, with totally different names, we can see a very low similarity."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "330de097-fc30-4d46-b2bb-459ab8e00a27",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Similarity between the two strings is 0.1429, computed in  148.66 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "string0 = \"John Doe\"\n",
+    "string1 = \"Gill Cot\"\n",
+    "\n",
+    "assert round(compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate), 2) == 0.14"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "001c7c1e-37db-4488-925f-2c46a902d962",
+   "metadata": {},
+   "source": [
+    "Remark that, as we sort words in `normalized_string`, typos in the first letter can have bad impacts. It's not obvious to find a function which accepts word reordering but at the same time is not too impacted by mistakes on the first word letters. Choices can be done depending by the banks to fit their preference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "c5600fde-3c42-4f52-ad0c-fa0ebda9b0cf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Similarity between the two strings is 0.1429, computed in  155.03 seconds\n",
+      "Similarity between the two strings is 0.8571, computed in  148.72 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "# One typo in the first letter\n",
+    "string0 = \"John Doe\"\n",
+    "string1 = \"John Poe\"\n",
+    "\n",
+    "assert round(compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate), 2) == 0.14\n",
+    "\n",
+    "# One typo in the last letter\n",
+    "string0 = \"John Doe\"\n",
+    "string1 = \"John Doy\"\n",
+    "\n",
+    "assert round(compare_IBAN_names(string0, string1, fhe_or_simulate = fhe_or_simulate), 2) == 0.86"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a61bc76-7c38-4251-bafd-51d7843dd3c7",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "We have shown how to use Levenshtein distances in FHE, to perform IBAN checks in a private way. And since the code is open-source and in Python, it's pretty easy for developers to modify it, to fine-tune it to their specific needs, eg in terms of string normalization."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/frontends/concrete-python/examples/levenshtein_distance/README.md b/frontends/concrete-python/examples/levenshtein_distance/README.md
index b7a686e833..df18885cb8 100644
--- a/frontends/concrete-python/examples/levenshtein_distance/README.md
+++ b/frontends/concrete-python/examples/levenshtein_distance/README.md
@@ -16,18 +16,20 @@ More information can be found for example on the [Wikipedia page](https://en.wik
 
 ## Computing the distance in FHE
 
-It can be interesting to compute this distance over encrypted data, for example in the banking sector.
+It can be interesting to compute this distance over encrypted data, for example in the banking sector (see also our [notebook](IBAN_name_check.ipynb) for IBAN name checks).
+
 We show in [our code](levenshtein_distance.py) how to do that simply, with our FHE modules.
 
 Available options are:
 
 ```
 usage: levenshtein_distance.py [-h] [--show_mlir] [--show_optimizer] [--autotest] [--autoperf] [--distance DISTANCE DISTANCE]
-                               [--alphabet {string,STRING,StRiNg,ACTG}] [--max_string_length MAX_STRING_LENGTH]
+                               [--alphabet {string,STRING,StRiNg,ACTG,name}] [--max_string_length MAX_STRING_LENGTH]
+                               [--normalize_strings_before_distance]
 
 Levenshtein distance in Concrete.
 
-optional arguments:
+options:
   -h, --help            show this help message and exit
   --show_mlir           Show the MLIR
   --show_optimizer      Show the optimizer outputs
@@ -35,10 +37,12 @@ optional arguments:
   --autoperf            Run benchmarks
   --distance DISTANCE DISTANCE
                         Compute a distance
-  --alphabet {string,STRING,StRiNg,ACTG}
+  --alphabet {string,STRING,StRiNg,ACTG,name}
                         Setting the alphabet
   --max_string_length MAX_STRING_LENGTH
                         Setting the maximal size of strings
+  --normalize_strings_before_distance
+                        Normalize strings before computing their distance
 ```
 
 The different alphabets are:
@@ -46,6 +50,7 @@ The different alphabets are:
 - STRING: capitalized letters, ie `[A-Z]*`
 - StRiNg: non capitalized letters and capitalized letters
 - ACTG: `[ACTG]*`, for DNA analysis
+- name: non capitalized letters and capitalized letters, plus spaces, '.' and '-'
 
 It is very easy to add a new alphabet in the code.
 
@@ -155,6 +160,8 @@ Typical performances for alphabet StRiNg, with string of maximal length:
     Computing Levenshtein between strings 'hZyX' and 'vhHH' - OK in 30.11 seconds
     Computing Levenshtein between strings 'sJdj' and 'strn' - OK in 30.48 seconds
 
+FIXME: to be updated, by launching on hpc7a
+
 Successful end
 ```
 
@@ -203,5 +210,7 @@ Typical performances for alphabet StRiNg, with string of maximal length:
     Computing Levenshtein between strings 'QGCj' and 'Lknx' - OK in 29.82 seconds
     Computing Levenshtein between strings 'fKVC' and 'xqaI' - OK in 30.27 seconds
 
+FIXME: to be updated, by launching on hpc7a
+
 Successful end
 ```
diff --git a/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py
index 6174647319..a8038800dc 100644
--- a/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py
+++ b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py
@@ -34,6 +34,11 @@ def anycase():
         """Set any-case alphabet."""
         return Alphabet.lowercase() + Alphabet.uppercase()
 
+    @staticmethod
+    def name():
+        """Set alphabet for family names"""
+        return Alphabet.lowercase() + Alphabet.uppercase() + Alphabet("-. ")
+
     @staticmethod
     def dna():
         """Set DNA alphabet."""
@@ -52,7 +57,7 @@ def __add__(self, other: "Alphabet") -> "Alphabet":
     @staticmethod
     def return_available_alphabets() -> list:
         """Return available alphabets."""
-        return ["string", "STRING", "StRiNg", "ACTG"]
+        return ["string", "STRING", "StRiNg", "ACTG", "name"]
 
     @staticmethod
     def init_by_name(alphabet_name: str) -> "Alphabet":
@@ -67,6 +72,8 @@ def init_by_name(alphabet_name: str) -> "Alphabet":
             return Alphabet.uppercase()
         if alphabet_name == "StRiNg":
             return Alphabet.anycase()
+        if alphabet_name == "name":
+            return Alphabet.name()
 
         assert alphabet_name == "ACTG", f"Unknown alphabet {alphabet_name}"
         return Alphabet.dna()
@@ -120,19 +127,29 @@ class LevenshteinDistance:
     alphabet: Alphabet
     module: fhe.module  # type: ignore
 
-    def __init__(self, alphabet: Alphabet, args):
+    def __init__(
+        self, alphabet: Alphabet, max_string_length: int, show_mlir: bool, show_optimizer: bool
+    ):
         self.alphabet = alphabet
 
-        self._compile_module(args)
+        self._compile_module(max_string_length, show_mlir, show_optimizer)
 
     def calculate(self, a: str, b: str, mode: str, show_distance: bool = False):
         """Compute a distance between two strings, either in fhe or in simulate."""
         if mode == "simulate":
-            self._compute_in_simulation([(a, b)])
+            self._compute_in_simulation([(a, b)], show_distance=show_distance)
         else:
             assert mode == "fhe", "Only 'simulate' and 'fhe' mode are available"
             self._compute_in_fhe([(a, b)], show_distance=show_distance)
 
+    def calculate_and_return(self, a: str, b: str, mode: str) -> int:
+        """Return distance between two strings, either in fhe or in simulate."""
+        if mode == "simulate":
+            return self._compute_and_return_in_simulation(a, b)
+
+        assert mode == "fhe", "Only 'simulate' and 'fhe' mode are available"
+        return self._compute_and_return_in_fhe(a, b)
+
     def calculate_list(self, pairs_to_compute_on: list, mode: str):
         """Compute a distance between strings of a list, either in fhe or in simulate."""
         for a, b in pairs_to_compute_on:
@@ -149,7 +166,7 @@ def _encode_and_encrypt_strings(self, a: str, b: str) -> tuple:
 
         return a_enc, b_enc
 
-    def _compile_module(self, args):
+    def _compile_module(self, max_string_length: int, show_mlir: bool, show_optimizer: bool):
         """Compile the FHE module."""
         assert len(self.alphabet.mapping_to_int) > 0, "Mapping not defined"
 
@@ -163,41 +180,63 @@ def _compile_module(self, args):
         inputset_mix = [
             (
                 np.random.randint(2),
-                np.random.randint(args.max_string_length),
-                np.random.randint(args.max_string_length),
-                np.random.randint(args.max_string_length),
-                np.random.randint(args.max_string_length),
+                np.random.randint(max_string_length),
+                np.random.randint(max_string_length),
+                np.random.randint(max_string_length),
+                np.random.randint(max_string_length),
             )
             for _ in range(1000)
         ]
 
         # pylint: disable-next=no-member
-        self.module = LevenshsteinModule.compile(
+        self.module = LevenshsteinModule.compile(  # type: ignore[attr-defined]
             {
                 "equal": inputset_equal,
                 "mix": inputset_mix,
                 "constant": [i for i in range(len(self.alphabet.mapping_to_int))],
             },
-            show_mlir=args.show_mlir,
+            show_mlir=show_mlir,
             p_error=10**-20,
-            show_optimizer=args.show_optimizer,
+            show_optimizer=show_optimizer,
             comparison_strategy_preference=fhe.ComparisonStrategy.ONE_TLU_PROMOTED,
             min_max_strategy_preference=fhe.MinMaxStrategy.ONE_TLU_PROMOTED,
         )
 
-    def _compute_in_simulation(self, list_patterns: list):
+    def _compute_and_return_in_simulation(self, a: str, b: str) -> int:
+        """Check equality between distance in simulation and clear distance, and return."""
+        a_as_int = self.alphabet.encode(a)
+        b_as_int = self.alphabet.encode(b)
+
+        l1_simulate = levenshtein_simulate(self.module, a_as_int, b_as_int)
+        l1_clear = levenshtein_clear(a_as_int, b_as_int)
+
+        assert l1_simulate == l1_clear, f"    {l1_simulate=} and {l1_clear=} are different"
+
+        return int(l1_simulate)
+
+    def _compute_in_simulation(self, list_patterns: list, show_distance: bool = False):
         """Check equality between distance in simulation and clear distance."""
         for a, b in list_patterns:
             print(f"    Computing Levenshtein between strings '{a}' and '{b}'", end="")
 
-            a_as_int = self.alphabet.encode(a)
-            b_as_int = self.alphabet.encode(b)
+            l1_simulate = self._compute_and_return_in_simulation(a, b)
 
-            l1_simulate = levenshtein_simulate(self.module, a_as_int, b_as_int)
-            l1_clear = levenshtein_clear(a_as_int, b_as_int)
+            if not show_distance:
+                print(" - OK")
+            else:
+                print(f" - distance is {l1_simulate}")
 
-            assert l1_simulate == l1_clear, f"    {l1_simulate=} and {l1_clear=} are different"
-            print(" - OK")
+    def _compute_and_return_in_fhe(self, a: str, b: str):
+        """Check equality between distance in FHE and clear distance."""
+        a_enc, b_enc = self._encode_and_encrypt_strings(a, b)
+
+        l1_fhe_enc = levenshtein_fhe(self.module, a_enc, b_enc)
+        l1_fhe = self.module.mix.decrypt(l1_fhe_enc)  # type: ignore
+        l1_clear = levenshtein_clear(a, b)
+
+        assert l1_fhe == l1_clear, f"    {l1_fhe=} and {l1_clear=} are different"
+
+        return l1_fhe
 
     def _compute_in_fhe(self, list_patterns: list, show_distance: bool = False):
         """Check equality between distance in FHE and clear distance."""
@@ -207,18 +246,10 @@ def _compute_in_fhe(self, list_patterns: list, show_distance: bool = False):
         for a, b in list_patterns:
             print(f"    Computing Levenshtein between strings '{a}' and '{b}'", end="")
 
-            a_enc, b_enc = self._encode_and_encrypt_strings(a, b)
-
             time_begin = time.time()
-            l1_fhe_enc = levenshtein_fhe(self.module, a_enc, b_enc)
+            l1_fhe = self._compute_and_return_in_fhe(a, b)
             time_end = time.time()
 
-            l1_fhe = self.module.mix.decrypt(l1_fhe_enc)  # type: ignore
-
-            l1_clear = levenshtein_clear(a, b)
-
-            assert l1_fhe == l1_clear, f"    {l1_fhe=} and {l1_clear=} are different"
-
             if not show_distance:
                 print(f" - OK in {time_end - time_begin:.2f} seconds")
             else:
@@ -277,6 +308,21 @@ def mix(is_equal, if_equal, case_1, case_2, case_3):
     )
 
 
+def normalized_string(st):
+    """Normalize a string, to later make that the distance between non-normalized
+    string 'John Doe' and 'doe john' is small.
+    """
+
+    # Force lower case
+    st = st.lower()
+
+    # Sort the words and join
+    words = st.split()
+    st = "".join(sorted(words))
+
+    return st
+
+
 @lru_cache
 def levenshtein_clear(x: str, y: str):
     """Compute the distance in clear, for reference and comparison."""
@@ -382,6 +428,11 @@ def manage_args():
         default=4,
         help="Setting the maximal size of strings",
     )
+    parser.add_argument(
+        "--normalize_strings_before_distance",
+        action="store_true",
+        help="Normalize strings before computing their distance",
+    )
     args = parser.parse_args()
 
     # At least one option
@@ -402,7 +453,9 @@ def main():
     # Do what the user requested
     if args.autotest:
         alphabet = Alphabet.init_by_name(args.alphabet)
-        levenshtein_distance = LevenshteinDistance(alphabet, args)
+        levenshtein_distance = LevenshteinDistance(
+            alphabet, args.max_string_length, args.show_mlir, args.show_optimizer
+        )
 
         print(f"Making random tests with alphabet {args.alphabet}")
         print(f"Letters are {alphabet.letters}\n")
@@ -415,14 +468,16 @@ def main():
         print("")
 
     if args.autoperf:
-        for alphabet_name in ["ACTG", "string", "STRING", "StRiNg"]:
+        for alphabet_name in ["ACTG", "name", "string", "STRING", "StRiNg"]:
             print(
                 f"Typical performances for alphabet {alphabet_name}, with string of "
                 "maximal length:\n"
             )
 
             alphabet = Alphabet.init_by_name(alphabet_name)
-            levenshtein_distance = LevenshteinDistance(alphabet, args)
+            levenshtein_distance = LevenshteinDistance(
+                alphabet, args.max_string_length, args.show_mlir, args.show_optimizer
+            )
             list_patterns = alphabet.prepare_random_patterns(
                 args.max_string_length, args.max_string_length, 3
             )
@@ -432,21 +487,34 @@ def main():
     if args.distance is not None:
         print(
             f"Running distance between strings '{args.distance[0]}' and '{args.distance[1]}' "
-            f"for alphabet {args.alphabet}:\n"
+            f"for alphabet {args.alphabet}:"
         )
 
-        if max(len(args.distance[0]), len(args.distance[1])) > args.max_string_length:
-            args.max_string_length = max(len(args.distance[0]), len(args.distance[1]))
+        string0 = args.distance[0]
+        string1 = args.distance[1]
+
+        if args.normalize_strings_before_distance:
+            string0 = normalized_string(string0)
+            string1 = normalized_string(string1)
             print(
-                "Warning, --max_string_length was smaller than lengths of the input strings, "
-                "fixing it"
+                f"Normalized strings are '{string0}' and '{string1}' "
+                "(lower case, no space, sorted words)"
             )
 
+        if max(len(string0), len(string1)) > args.max_string_length:
+            args.max_string_length = max(len(string0), len(string1))
+            print(
+                "Warning, --max_string_length was smaller than lengths of "
+                "the input strings, fixing it"
+            )
+
+        print()
+
         alphabet = Alphabet.init_by_name(args.alphabet)
-        levenshtein_distance = LevenshteinDistance(alphabet, args)
-        levenshtein_distance.calculate(
-            args.distance[0], args.distance[1], mode="fhe", show_distance=True
+        levenshtein_distance = LevenshteinDistance(
+            alphabet, args.max_string_length, args.show_mlir, args.show_optimizer
         )
+        levenshtein_distance.calculate(string0, string1, mode="fhe", show_distance=True)
         print("")
 
     print("Successful end\n")