From 66a46dc1e4d076e9a5610a28da48750a5b64eb8f Mon Sep 17 00:00:00 2001
From: Aethor <oow.autre@gmail.com>
Date: Fri, 29 Sep 2023 15:23:36 +0200
Subject: [PATCH 1/3] fix a typo in CoOccurrencesGraphExtractor

---
 docs/pipeline.rst                   |  6 ++--
 renard/pipeline/graph_extraction.py | 50 +++++++++++++++++------------
 renard/pipeline/preconfigured.py    |  6 ++--
 renard_tutorial.py                  |  8 ++---
 4 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/docs/pipeline.rst b/docs/pipeline.rst
index 7991447..b479cc2 100644
--- a/docs/pipeline.rst
+++ b/docs/pipeline.rst
@@ -23,7 +23,7 @@ document. Here is a simple example:
            NLTKTokenizer(),
            NLTKNamedEntityRecognizer(),
            NaiveCharactersExtractor(min_appearance=10),
-           CoOccurrencesGraphExtractor(co_occurences_dist=25)
+           CoOccurrencesGraphExtractor(co_occurrences_dist=25)
        ]
    )
 
@@ -56,7 +56,7 @@ to compute them yourself :
        [
            NLTKNamedEntityRecognizer(),
            NaiveCharactersExtractor(min_appearance=10),
-           CoOccurrencesGraphExtractor(co_occurences_dist=25)
+           CoOccurrencesGraphExtractor(co_occurrences_dist=25)
        ]
    )
 
@@ -201,7 +201,7 @@ time. In Renard, such graphs are representend by a ``List`` of
            NLTKNamedEntityRecognizer(),
            NaiveCharactersExtractor(min_appearance=10),
            CoOccurrencesGraphExtractor(
-	       co_occurences_dist=25,
+	       co_occurrences_dist=25,
 	       dynamic=True,     # note the 'dynamic'
 	       dynamic_window=20 # and the 'dynamic_window' argument
 	   )
diff --git a/renard/pipeline/graph_extraction.py b/renard/pipeline/graph_extraction.py
index bdcca23..b13a1ed 100644
--- a/renard/pipeline/graph_extraction.py
+++ b/renard/pipeline/graph_extraction.py
@@ -80,13 +80,16 @@ class CoOccurrencesGraphExtractor(PipelineStep):
 
     def __init__(
         self,
-        co_occurences_dist: Union[int, Tuple[int, Literal["tokens", "sentences"]]],
+        co_occurrences_dist: Union[int, Tuple[int, Literal["tokens", "sentences"]]],
         dynamic: bool = False,
         dynamic_window: Optional[int] = None,
         dynamic_overlap: int = 0,
+        co_occurences_dist: Optional[
+            Union[int, Tuple[int, Literal["tokens", "sentences"]]]
+        ] = None,
     ) -> None:
         """
-        :param co_occurences_dist: max accepted distance between two
+        :param co_occurrences_dist: max accepted distance between two
             character appearances to form a co-occurence interaction.
 
                 - if an ``int`` is given, the distance is in number of
@@ -98,28 +101,35 @@ def __init__(
 
         :param dynamic:
 
-            - if ``False`` (the default), a static ``nx.graph`` is
-              extracted
+                - if ``False`` (the default), a static ``nx.graph`` is
+                  extracted
 
-            - if ``True``, several ``nx.graph`` are extracted.  In
-              that case, ``dynamic_window`` and
-              ``dynamic_overlap``*can* be specified.  If
-              ``dynamic_window`` is not specified, this step is
-              expecting the text to be cut into chapters', and a graph
-              will be extracted for each 'chapter'.  In that case,
-              ``chapters`` must be passed to the pipeline as a
-              ``List[str]`` at runtime.
+                - if ``True``, several ``nx.graph`` are extracted.  In
+                  that case, ``dynamic_window`` and
+                  ``dynamic_overlap``*can* be specified.  If
+                  ``dynamic_window`` is not specified, this step is
+                  expecting the text to be cut into chapters', and a
+                  graph will be extracted for each 'chapter'.  In that
+                  case, ``chapters`` must be passed to the pipeline as
+                  a ``List[str]`` at runtime.
 
         :param dynamic_window: dynamic window, in number of
             interactions.  a dynamic window of `n` means that each
             returned graph will be formed by `n` interactions.
 
         :param dynamic_overlap: overlap, in number of interactions.
+
+        :param co_occurences_dist: same as ``co_occurrences_dist``.
+            Included because of retro-compatibility, as it was a
+            previously included typo.
         """
+        # typo retrocompatibility
+        if not co_occurences_dist is None:
+            co_occurrences_dist = co_occurences_dist
 
-        if isinstance(co_occurences_dist, int):
-            co_occurences_dist = (co_occurences_dist, "tokens")
-        self.co_occurences_dist = co_occurences_dist
+        if isinstance(co_occurrences_dist, int):
+            co_occurrences_dist = (co_occurrences_dist, "tokens")
+        self.co_occurrences_dist = co_occurrences_dist
 
         if dynamic:
             if not dynamic_window is None:
@@ -181,25 +191,25 @@ def _mentions_interact(
 
         .. note::
 
-            the attribute ``self.co_occurences_dist`` is used to know wether mentions are in co_occurences
+            the attribute ``self.co_occurrences_dist`` is used to know wether mentions are in co_occurences
 
         :param mention_1:
         :param mention_2:
         :param sentences:
         :return: a boolean indicating wether the two mentions are co-occuring
         """
-        if self.co_occurences_dist[1] == "tokens":
+        if self.co_occurrences_dist[1] == "tokens":
             return (
                 abs(mention_2.start_idx - mention_1.start_idx)
-                <= self.co_occurences_dist[0]
+                <= self.co_occurrences_dist[0]
             )
-        elif self.co_occurences_dist[1] == "sentences":
+        elif self.co_occurrences_dist[1] == "sentences":
             assert not sentences is None
             mention_1_sent = sent_index_for_token_index(mention_1.start_idx, sentences)
             mention_2_sent = sent_index_for_token_index(
                 mention_2.end_idx - 1, sentences
             )
-            return abs(mention_2_sent - mention_1_sent) <= self.co_occurences_dist[0]
+            return abs(mention_2_sent - mention_1_sent) <= self.co_occurrences_dist[0]
         else:
             raise NotImplementedError
 
diff --git a/renard/pipeline/preconfigured.py b/renard/pipeline/preconfigured.py
index 994e4b5..30a54dd 100644
--- a/renard/pipeline/preconfigured.py
+++ b/renard/pipeline/preconfigured.py
@@ -27,8 +27,8 @@ def nltk_pipeline(
     characters_extractor_kwargs = characters_extractor_kwargs or {}
     graph_extractor_kwargs = graph_extractor_kwargs or {}
 
-    if not "co_occurences_dist" in graph_extractor_kwargs:
-        graph_extractor_kwargs["co_occurences_dist"] = (1, "sentences")
+    if not "co_occurrences_dist" in graph_extractor_kwargs:
+        graph_extractor_kwargs["co_occurrences_dist"] = (1, "sentences")
 
     return Pipeline(
         [
@@ -69,7 +69,7 @@ def bert_pipeline(
             NLTKTokenizer(),
             BertNamedEntityRecognizer(),
             GraphRulesCharactersExtractor(),
-            CoOccurrencesGraphExtractor(co_occurences_dist=(1, "sentences")),
+            CoOccurrencesGraphExtractor(co_occurrences_dist=(1, "sentences")),
         ],
         **pipeline_kwargs
     )
diff --git a/renard_tutorial.py b/renard_tutorial.py
index b4c6541..dd09c23 100644
--- a/renard_tutorial.py
+++ b/renard_tutorial.py
@@ -61,7 +61,7 @@
 #         NLTKTokenizer(),                                                 # tokenization
 #         NLTKNamedEntityRecognizer(),                                     # named entity recognition
 #         GraphRulesCharactersExtractor(),                                 # characters extraction
-#         CoOccurrencesGraphExtractor(co_occurences_dist=(1, "sentences")) # graph extraction
+#         CoOccurrencesGraphExtractor(co_occurrences_dist=(1, "sentences")) # graph extraction
 #     ]
 # )
 # ```
@@ -105,7 +105,7 @@
         GraphRulesCharactersExtractor(),
         # an interaction will be a co-occurence in a range of 3
         # sentences or less
-        CoOccurrencesGraphExtractor(co_occurences_dist=(3, "sentences")),
+        CoOccurrencesGraphExtractor(co_occurrences_dist=(3, "sentences")),
     ],
     lang="fra",
 )
@@ -143,7 +143,7 @@
         GraphRulesCharactersExtractor(min_appearances=3),
         # A co-occurence between two characters is counted if its
         # range is lower or equal to 10 sentences
-        CoOccurrencesGraphExtractor(co_occurences_dist=(10, "sentences")),
+        CoOccurrencesGraphExtractor(co_occurrences_dist=(10, "sentences")),
     ],
     lang="fra",
 )
@@ -180,7 +180,7 @@
     [
         GraphRulesCharactersExtractor(min_appearances=3),
         CoOccurrencesGraphExtractor(
-            co_occurences_dist=(20, "sentences"),
+            co_occurrences_dist=(20, "sentences"),
             dynamic=True,  # we want to extract a dynamic graph (i.e. a list of sequential graphs)
             dynamic_window=20,  # the size, in interaction, of each graph
             dynamic_overlap=0,  # overlap between windows

From 58ceac7e1fd78247158e634bd2b42bd55386d08c Mon Sep 17 00:00:00 2001
From: Aethor <oow.autre@gmail.com>
Date: Wed, 8 Nov 2023 10:39:07 +0100
Subject: [PATCH 2/3] fix a typo in CoOccurrencesGraphExtractor

---
 docs/pipeline.rst                   |  9 ++++--
 renard/pipeline/graph_extraction.py | 50 +++++++++++++++++------------
 renard_tutorial.py                  | 10 +++---
 3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/docs/pipeline.rst b/docs/pipeline.rst
index 092b791..134b9c9 100644
--- a/docs/pipeline.rst
+++ b/docs/pipeline.rst
@@ -22,8 +22,8 @@ document. Here is a simple example:
        [
            NLTKTokenizer(),
            NLTKNamedEntityRecognizer(),
-           GraphRulesCharacterUnifier(min_appearances=10),
-           CoOccurrencesGraphExtractor(co_occurences_dist=25)
+           NaiveCharactersExtractor(min_appearance=10),
+           CoOccurrencesGraphExtractor(co_occurrences_dist=25)
        ]
    )
 
@@ -55,8 +55,13 @@ to compute them yourself :
    pipeline = Pipeline(
        [
            NLTKNamedEntityRecognizer(),
+<<<<<<< HEAD
            GraphRulesCharacterUnifier(min_appearances=10),
            CoOccurrencesGraphExtractor(co_occurences_dist=25)
+=======
+           NaiveCharactersExtractor(min_appearance=10),
+           CoOccurrencesGraphExtractor(co_occurrences_dist=25)
+>>>>>>> 66a46dc (fix a typo in CoOccurrencesGraphExtractor)
        ]
    )
 
diff --git a/renard/pipeline/graph_extraction.py b/renard/pipeline/graph_extraction.py
index e70e957..b4cf5d3 100644
--- a/renard/pipeline/graph_extraction.py
+++ b/renard/pipeline/graph_extraction.py
@@ -81,13 +81,16 @@ class CoOccurrencesGraphExtractor(PipelineStep):
 
     def __init__(
         self,
-        co_occurences_dist: Union[int, Tuple[int, Literal["tokens", "sentences"]]],
+        co_occurrences_dist: Union[int, Tuple[int, Literal["tokens", "sentences"]]],
         dynamic: bool = False,
         dynamic_window: Optional[int] = None,
         dynamic_overlap: int = 0,
+        co_occurences_dist: Optional[
+            Union[int, Tuple[int, Literal["tokens", "sentences"]]]
+        ] = None,
     ) -> None:
         """
-        :param co_occurences_dist: max accepted distance between two
+        :param co_occurrences_dist: max accepted distance between two
             character appearances to form a co-occurence interaction.
 
                 - if an ``int`` is given, the distance is in number of
@@ -99,28 +102,35 @@ def __init__(
 
         :param dynamic:
 
-            - if ``False`` (the default), a static ``nx.graph`` is
-              extracted
+                - if ``False`` (the default), a static ``nx.graph`` is
+                  extracted
 
-            - if ``True``, several ``nx.graph`` are extracted.  In
-              that case, ``dynamic_window`` and
-              ``dynamic_overlap``*can* be specified.  If
-              ``dynamic_window`` is not specified, this step is
-              expecting the text to be cut into chapters', and a graph
-              will be extracted for each 'chapter'.  In that case,
-              ``chapters`` must be passed to the pipeline as a
-              ``List[str]`` at runtime.
+                - if ``True``, several ``nx.graph`` are extracted.  In
+                  that case, ``dynamic_window`` and
+                  ``dynamic_overlap``*can* be specified.  If
+                  ``dynamic_window`` is not specified, this step is
+                  expecting the text to be cut into chapters', and a
+                  graph will be extracted for each 'chapter'.  In that
+                  case, ``chapters`` must be passed to the pipeline as
+                  a ``List[str]`` at runtime.
 
         :param dynamic_window: dynamic window, in number of
             interactions.  a dynamic window of `n` means that each
             returned graph will be formed by `n` interactions.
 
         :param dynamic_overlap: overlap, in number of interactions.
+
+        :param co_occurences_dist: same as ``co_occurrences_dist``.
+            Included because of retro-compatibility, as it was a
+            previously included typo.
         """
+        # typo retrocompatibility
+        if not co_occurences_dist is None:
+            co_occurrences_dist = co_occurences_dist
 
-        if isinstance(co_occurences_dist, int):
-            co_occurences_dist = (co_occurences_dist, "tokens")
-        self.co_occurences_dist = co_occurences_dist
+        if isinstance(co_occurrences_dist, int):
+            co_occurrences_dist = (co_occurrences_dist, "tokens")
+        self.co_occurrences_dist = co_occurrences_dist
 
         if dynamic:
             if not dynamic_window is None:
@@ -181,25 +191,25 @@ def _mentions_interact(
 
         .. note::
 
-            the attribute ``self.co_occurences_dist`` is used to know wether mentions are in co_occurences
+            the attribute ``self.co_occurrences_dist`` is used to know wether mentions are in co_occurences
 
         :param mention_1:
         :param mention_2:
         :param sentences:
         :return: a boolean indicating wether the two mentions are co-occuring
         """
-        if self.co_occurences_dist[1] == "tokens":
+        if self.co_occurrences_dist[1] == "tokens":
             return (
                 abs(mention_2.start_idx - mention_1.start_idx)
-                <= self.co_occurences_dist[0]
+                <= self.co_occurrences_dist[0]
             )
-        elif self.co_occurences_dist[1] == "sentences":
+        elif self.co_occurrences_dist[1] == "sentences":
             assert not sentences is None
             mention_1_sent = sent_index_for_token_index(mention_1.start_idx, sentences)
             mention_2_sent = sent_index_for_token_index(
                 mention_2.end_idx - 1, sentences
             )
-            return abs(mention_2_sent - mention_1_sent) <= self.co_occurences_dist[0]
+            return abs(mention_2_sent - mention_1_sent) <= self.co_occurrences_dist[0]
         else:
             raise NotImplementedError
 
diff --git a/renard_tutorial.py b/renard_tutorial.py
index caa2bcb..4596042 100644
--- a/renard_tutorial.py
+++ b/renard_tutorial.py
@@ -60,8 +60,8 @@
 #     [
 #         NLTKTokenizer(),                                                 # tokenization
 #         NLTKNamedEntityRecognizer(),                                     # named entity recognition
-#         GraphRulesCharacterUnifier(),                                    # characters extraction
-#         CoOccurrencesGraphExtractor(co_occurences_dist=(1, "sentences")) # graph extraction
+#         GraphRulesCharactersExtractor(),                                 # characters extraction
+#         CoOccurrencesGraphExtractor(co_occurrences_dist=(1, "sentences")) # graph extraction
 #     ]
 # )
 # ```
@@ -105,7 +105,7 @@
         GraphRulesCharacterUnifier(),
         # an interaction will be a co-occurence in a range of 3
         # sentences or less
-        CoOccurrencesGraphExtractor(co_occurences_dist=(3, "sentences")),
+        CoOccurrencesGraphExtractor(co_occurrences_dist=(3, "sentences")),
     ],
     lang="fra",
 )
@@ -143,7 +143,7 @@
         GraphRulesCharacterUnifier(min_appearances=3),
         # A co-occurence between two characters is counted if its
         # range is lower or equal to 10 sentences
-        CoOccurrencesGraphExtractor(co_occurences_dist=(10, "sentences")),
+        CoOccurrencesGraphExtractor(co_occurrences_dist=(10, "sentences")),
     ],
     lang="fra",
 )
@@ -180,7 +180,7 @@
     [
         GraphRulesCharacterUnifier(min_appearances=3),
         CoOccurrencesGraphExtractor(
-            co_occurences_dist=(20, "sentences"),
+            co_occurrences_dist=(20, "sentences"),
             dynamic=True,  # we want to extract a dynamic graph (i.e. a list of sequential graphs)
             dynamic_window=20,  # the size, in interaction, of each graph
             dynamic_overlap=0,  # overlap between windows

From 1e2eb384497e7aa61afe13165fb2058f899248d1 Mon Sep 17 00:00:00 2001
From: Aethor <oow.autre@gmail.com>
Date: Wed, 8 Nov 2023 10:45:00 +0100
Subject: [PATCH 3/3] fix remaining issues with co_occurrences_dist typo

---
 renard/pipeline/graph_extraction.py | 6 +++++-
 renard/pipeline/preconfigured.py    | 8 ++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/renard/pipeline/graph_extraction.py b/renard/pipeline/graph_extraction.py
index b4cf5d3..8498041 100644
--- a/renard/pipeline/graph_extraction.py
+++ b/renard/pipeline/graph_extraction.py
@@ -81,7 +81,9 @@ class CoOccurrencesGraphExtractor(PipelineStep):
 
     def __init__(
         self,
-        co_occurrences_dist: Union[int, Tuple[int, Literal["tokens", "sentences"]]],
+        co_occurrences_dist: Optional[
+            Union[int, Tuple[int, Literal["tokens", "sentences"]]]
+        ],
         dynamic: bool = False,
         dynamic_window: Optional[int] = None,
         dynamic_overlap: int = 0,
@@ -127,6 +129,8 @@ def __init__(
         # typo retrocompatibility
         if not co_occurences_dist is None:
             co_occurrences_dist = co_occurences_dist
+        if co_occurrences_dist is None and co_occurences_dist is None:
+            raise ValueError()
 
         if isinstance(co_occurrences_dist, int):
             co_occurrences_dist = (co_occurrences_dist, "tokens")
diff --git a/renard/pipeline/preconfigured.py b/renard/pipeline/preconfigured.py
index 00b227a..d797231 100644
--- a/renard/pipeline/preconfigured.py
+++ b/renard/pipeline/preconfigured.py
@@ -56,6 +56,10 @@ def nltk_pipeline(
             **pipeline_kwargs
         )
     else:
+
+        if not "co_occurrences_dist" in graph_extractor_kwargs:
+            graph_extractor_kwargs["co_occurrences_dist"] = (1, "sentences")
+
         return Pipeline(
             [
                 NLTKTokenizer(**tokenizer_kwargs),
@@ -113,6 +117,10 @@ def bert_pipeline(
             **pipeline_kwargs
         )
     else:
+
+        if not "co_occurrences_dist" in graph_extractor_kwargs:
+            graph_extractor_kwargs["co_occurrences_dist"] = (1, "sentences")
+
         return Pipeline(
             [
                 NLTKTokenizer(**tokenizer_kwargs),