aphp · pidoux7 · May 15, 2024 · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/.gitignore b/.gitignore
@@ -53,6 +53,7 @@ _build/
 *.tar.gz
 *.tsv
 *.ann
+!text.ann
 
 # Editors
 .idea

diff --git a/changelog.md b/changelog.md
@@ -6,6 +6,14 @@
 
 - `eds.tables` accepts a minimum_table_size (default 2) argument to reduce pollution
 - `RuleBasedQualifier` now expose a `process` method that only returns qualified entities and token without actually tagging them, defering this task to the `__call__` method.
+- Relation implementation in `doc.spans["<label>"][i]._.rel = [{'type':'rel_type', 'target': <span>},]`
+- Relation connector with brat2docs and docs2brat in `edsnlp.connectors.brat` compatible with `edsnlp.data.read_*` and `edsnlp.data.write_*` (modified files : `edsnlp.data.converters`, `edsnlp.data.standoff`)
+- Rule based relation model using proximity and/or sentence in `edsnlp.pipes.misc.relations` registered as `eds.relation`
+- Documentation using Mkdocs for relations `docs.pipes.misc.relations.md` and `docs.pipes.misc.index.md`
+- Tests for relations `tests.pipelines.misc.test_relations` and ressources `ressources.relations`
+- `data.set_processing(...)` now expose an `autocast` parameter to disable or tweak the automatic casting of the tensor
+  during the processing. Autocasting should result in a slight speedup, but may lead to numerical instability.
+- Use `torch.inference_mode` to disable view tracking and version counter bumps during inference.
 
 ### Fixed
 

diff --git a/docs/pipes/misc/index.md b/docs/pipes/misc/index.md
@@ -16,5 +16,6 @@ For instance, the date detection and normalisation pipeline falls in this catego
 | `eds.sections`           | Section detection                           |
 | `eds.reason`             | Rule-based hospitalisation reason detection |
 | `eds.tables`             | Tables detection                            |
+| `eds.relations`          | Relations extraction                        |
 
 <!-- --8<-- [end:components] -->
diff --git a/docs/pipes/misc/relations.md b/docs/pipes/misc/relations.md
@@ -0,0 +1,8 @@
+# Relations {: #edsnlp.pipes.misc.relations.factory.create_component }
+
+::: edsnlp.pipes.misc.relations.factory.create_component
+    options:
+        heading_level: 2
+        show_bases: false
+        show_source: false
+        only_class_level: true
diff --git a/edsnlp/data/converters.py b/edsnlp/data/converters.py
@@ -191,6 +191,9 @@ class StandoffDict2DocConverter:
     span_attributes : Optional[AttributesMappingArg]
         Mapping from BRAT attributes to Span extensions (can be a list too).
         By default, all attributes are imported as Span extensions with the same name.
+    span_rel : Optional[AttributesMappingArg]
+        Mapping from BRAT relations to Span extensions (can be a list too).
+        By default, all relations are imported as Span extensions with the name rel.
     keep_raw_attribute_values : bool
         Whether to keep the raw attribute values (as strings) or to convert them to
         Python objects (e.g. booleans).
@@ -214,6 +217,7 @@ def __init__(
         tokenizer: Optional[Tokenizer] = None,
         span_setter: SpanSetterArg = {"ents": True, "*": True},
         span_attributes: Optional[AttributesMappingArg] = None,
+        span_rel: Optional[AttributesMappingArg] = None,  # to keep ?
         keep_raw_attribute_values: bool = False,
         bool_attributes: SequenceStr = [],
         default_attributes: AttributesMappingArg = {},
@@ -223,6 +227,7 @@ def __init__(
         self.tokenizer = tokenizer or (nlp.tokenizer if nlp is not None else None)
         self.span_setter = span_setter
         self.span_attributes = span_attributes  # type: ignore
+        self.span_rel = span_rel  # to keep ?
         self.keep_raw_attribute_values = keep_raw_attribute_values
         self.default_attributes = default_attributes
         self.notes_as_span_attribute = notes_as_span_attribute
@@ -244,12 +249,19 @@ def __call__(self, obj):
             if not Span.has_extension(dst):
                 Span.set_extension(dst, default=None)
 
+        ############## Modifications for relations ###############
+        dict_entities = {}  ## dict for entity storage
         for ent in obj.get("entities") or ():
+            begin = min(f["begin"] for f in ent["fragments"])  # start of the entity
+            end = max(f["end"] for f in ent["fragments"])  # end of the entity
+            dict_entities[ent["entity_id"]] = (
+                ent["label"] + ";" + str(begin) + ";" + str(end)
+            )
             fragments = (
                 [
                     {
-                        "begin": min(f["begin"] for f in ent["fragments"]),
-                        "end": max(f["end"] for f in ent["fragments"]),
+                        "begin": begin,
+                        "end": end,
                     }
                 ]
                 if not self.split_fragments
@@ -267,6 +279,11 @@ def __call__(self, obj):
                     if isinstance(ent["attributes"], list)
                     else ent["attributes"]
                 )
+                attributes = (
+                    {a["label"]: a["value"] for a in ent["attributes"]}
+                    if isinstance(ent["attributes"], list)
+                    else ent["attributes"]
+                )
                 if self.notes_as_span_attribute and ent["notes"]:
                     ent["attributes"][self.notes_as_span_attribute] = "|".join(
                         note["value"] for note in ent["notes"]
@@ -302,6 +319,67 @@ def __call__(self, obj):
                 if span._.get(attr) is None:
                     span._.set(attr, value)
 
+        ############## Modifications fo relations ###############
+        # add relations in spans
+        if self.span_rel is None and not Span.has_extension("rel"):
+            Span.set_extension("rel", default=[])
+
+        for rel in obj.get("relations") or ():  # iterates relations
+            for label in doc.spans:  # iterates source labels
+                for i, spa in enumerate(doc.spans[label]):  # iterates source spans
+                    bo = False
+
+                    # relations
+                    if dict_entities[rel["from_entity_id"]].split(";") == [
+                        label,
+                        str(spa.start_char),
+                        str(spa.end_char),
+                    ]:  # sif source entity is the same as the span
+                        for label2 in doc.spans:  # iiterates target labels
+                            for j, spa2 in enumerate(
+                                doc.spans[label2]
+                            ):  # iterates target label
+                                if dict_entities[rel["to_entity_id"]].split(";") == [
+                                    label2,
+                                    str(spa2.start_char),
+                                    str(spa2.end_char),
+                                ]:  # if target entity is the same as the span
+                                    relation = {
+                                        "type": rel["relation_label"],
+                                        "target": doc.spans[label2][j],
+                                    }  # create the relation
+                                    doc.spans[label][i]._.rel.append(
+                                        relation
+                                    )  # add the relation to the span
+                                    bo = True
+                                    break
+                            if bo:
+                                break
+                    bo = False
+
+                    # inverse relations
+                    if dict_entities[rel["to_entity_id"]].split(";") == [
+                        label,
+                        str(spa.start_char),
+                        str(spa.end_char),
+                    ]:
+                        for label2 in doc.spans:
+                            for j, spa2 in enumerate(doc.spans[label2]):
+                                if dict_entities[rel["from_entity_id"]].split(";") == [
+                                    label2,
+                                    str(spa2.start_char),
+                                    str(spa2.end_char),
+                                ]:
+                                    relation = {
+                                        "type": "inv_" + rel["relation_label"],
+                                        "target": doc.spans[label2][j],
+                                    }
+                                    doc.spans[label][i]._.rel.append(relation)
+                                    bo = True
+                                    break
+                            if bo:
+                                break
+
         return doc
 
 
@@ -346,29 +424,75 @@ def __init__(
 
     def __call__(self, doc):
         spans = get_spans(doc, self.span_getter)
+        entities = [
+            {
+                "entity_id": i,
+                "fragments": [
+                    {
+                        "begin": ent.start_char,
+                        "end": ent.end_char,
+                    }
+                ],
+                "attributes": {
+                    obj_name: getattr(ent._, ext_name)
+                    for ext_name, obj_name in self.span_attributes.items()
+                    if ent._.has(ext_name)
+                },
+                "label": ent.label_,
+            }
+            for i, ent in enumerate(sorted(dict.fromkeys(spans)))
+        ]
+
+        # mapping between entities and their `entity_id`
+        entity_map = {
+            (
+                ent["fragments"][0]["begin"],
+                ent["fragments"][0]["end"],
+                ent["label"],
+            ): ent["entity_id"]
+            for ent in entities
+        }
+
+        # doesn't include 'inv_' relations
+        relations = []
+        relation_idx = 1
+        for span_label, span_list in doc.spans.items():
+            for spa in span_list:
+                source_entity_id = entity_map.get(
+                    (spa.start_char, spa.end_char, spa.label_)
+                )
+                for rel in spa._.rel:
+                    if not rel["type"].startswith("inv_"):
+                        target_entity_id = entity_map.get(
+                            (
+                                rel["target"].start_char,
+                                rel["target"].end_char,
+                                rel["target"].label_,
+                            )
+                        )
+                        if (
+                            source_entity_id is not None
+                            and target_entity_id is not None
+                        ):
+                            relations.append(
+                                {
+                                    "rel_id": relation_idx,
+                                    "from_entity_id": source_entity_id,
+                                    "relation_type": rel["type"],
+                                    "to_entity_id": target_entity_id,
+                                }
+                            )
+                            relation_idx += 1
+
+        # final object
         obj = {
             FILENAME: doc._.note_id,
             "doc_id": doc._.note_id,
             "text": doc.text,
-            "entities": [
-                {
-                    "entity_id": i,
-                    "fragments": [
-                        {
-                            "begin": ent.start_char,
-                            "end": ent.end_char,
-                        }
-                    ],
-                    "attributes": {
-                        obj_name: getattr(ent._, ext_name)
-                        for ext_name, obj_name in self.span_attributes.items()
-                        if ent._.has(ext_name)
-                    },
-                    "label": ent.label_,
-                }
-                for i, ent in enumerate(sorted(dict.fromkeys(spans)))
-            ],
+            "entities": entities,
+            "relations": relations,
         }
+
         return obj
 
 

diff --git a/edsnlp/data/standoff.py b/edsnlp/data/standoff.py
@@ -264,20 +264,20 @@ def dump_standoff_file(
                                     file=f,
                                 )
                                 attribute_idx += 1
-
-                    # fmt: off
-                    # if "relations" in doc:
-                    #     for i, relation in enumerate(doc["relations"]):
-                    #         entity_from = entities_ids[relation["from_entity_id"]]
-                    #         entity_to = entities_ids[relation["to_entity_id"]]
-                    #         print(
-                    #             "R{}\t{} Arg1:{} Arg2:{}\t".format(
-                    #                 i + 1, str(relation["label"]), entity_from,
-                    #                 entity_to
-                    #             ),
-                    #             file=f,
-                    #         )
-                    # fmt: on
+            # Ajout du traitement des relations
+            relation_idx = 1
+            if "relations" in doc:
+                for relation in doc["relations"]:
+                    print(
+                        "R{}\t{} Arg1:{} Arg2:{}".format(
+                            relation_idx,
+                            relation["relation_type"],
+                            entities_ids[relation["from_entity_id"]],
+                            entities_ids[relation["to_entity_id"]],
+                        ),
+                        file=f,
+                    )
+                    relation_idx += 1
 
 
 class StandoffReader(BaseReader):

diff --git a/edsnlp/pipes/__init__.py b/edsnlp/pipes/__init__.py
@@ -22,6 +22,7 @@
     from .misc.dates.factory import create_component as dates
     from .misc.quantities.factory import create_component as quantities
     from .misc.reason.factory import create_component as reason
+    from .misc.relations.factory import create_component as relations
     from .misc.sections.factory import create_component as sections
     from .misc.tables.factory import create_component as tables
     from .ner.adicap.factory import create_component as adicap

diff --git a/edsnlp/pipes/misc/relations/__init__.py b/edsnlp/pipes/misc/relations/__init__.py
@@ -0,0 +1 @@
+from .relations import RelationsMatcher
diff --git a/edsnlp/pipes/misc/relations/factory.py b/edsnlp/pipes/misc/relations/factory.py
@@ -0,0 +1,17 @@
+from edsnlp.core import registry
+
+from .relations import RelationsMatcher
+
+DEFAULT_CONFIG = dict(
+    scheme=None,
+    use_sentences=False,
+    clean_rel=False,
+    proximity_method="right",
+    max_dist=45,
+)
+
+create_component = registry.factory.register(
+    "eds.relations",
+    assigns=["doc.spans"],
+    deprecated=["relations"],
+)(RelationsMatcher)
diff --git a/edsnlp/pipes/misc/relations/patterns.py b/edsnlp/pipes/misc/relations/patterns.py
@@ -0,0 +1,17 @@
+scheme = [
+    {
+        "source": [{"label": "Chemical_and_drugs", "attr": {"Tech": [None]}}],
+        "target": [
+            {
+                "label": "Temporal",
+                "attr": {"AttTemp": [None, "Duration", "Date", "Frequency"]},
+            },
+            {
+                "label": "Chemical_and_drugs",
+                "attr": {"Tech": ["dosage", "route", "strength", "form"]},
+            },
+        ],
+        "type": "Depend",
+        "inv_type": "inv_Depend",
+    },
+]
-Original file line number
+Diff line change
@@ Expand Up / @@ -53,6 +53,7 @@ _build/ @@
     *.tar.gz
     *.tsv
     *.ann
+    !text.ann
     # Editors
     .idea
@@ Expand Down @@