From b18157aaa57187f031d7a1b751bccd6c38e8b2eb Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 6 Jun 2024 19:13:30 +0300 Subject: [PATCH 01/62] #622 added nested structure for XSD files --- spinta/manifests/xsd/helpers.py | 107 +++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 35 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 6a0e395b9..cd546195b 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -483,9 +483,12 @@ def _properties_from_references( node: _Element, model: XSDModel, source_path: str = "" - ) -> dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]: + ) -> tuple[ + dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], + dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: properties = {} + root_properties = {} # if len(node) == 1: # # if this model has only one property, which is a reference, we don't create it, but pass it on. # ref_element = node.xpath("./*[@ref]")[0] @@ -495,6 +498,7 @@ def _properties_from_references( # xpath_query = f"//*[@name='{ref}']" # referenced_element = self.root.xpath(xpath_query)[0] # return self._properties_from_references(model=model, source_path=node.get("name")) + root_properties = {} for ref_element in node.xpath("./*[@ref]"): referenced_element = self._get_referenced_node(ref_element) @@ -505,7 +509,6 @@ def _properties_from_references( properties[property_id] = prop else: is_array = False - # try: # TODO fix this because it probably doesn't cover all cases, only something like # https://github.com/atviriduomenys/spinta/issues/613 complex_type = referenced_element.xpath("./*[local-name() = 'complexType']")[0] @@ -522,11 +525,9 @@ def _properties_from_references( referenced_element = new_referenced_element if ref_element.get("name") is not None: source_path += f'/{ref_element.get("name")}' - # except (TypeError, IndexError): - # pass if not (XSDReader.is_array(ref_element) or is_array): - referenced_model_names = self._create_model(referenced_element, source_path) + referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path) property_type = "ref" else: referenced_element_properties = { @@ -537,9 +538,11 @@ def _properties_from_references( } } property_type = "backref" - referenced_model_names = self._create_model(referenced_element, source_path, + referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path, additional_properties=referenced_element_properties) + root_properties.update(new_root_properties) + for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) @@ -548,7 +551,7 @@ def _properties_from_references( prop["model"] = f"{referenced_model_name}" properties[property_id] = prop - return properties + return properties, root_properties def _split_choice( self, @@ -560,7 +563,7 @@ def _split_choice( If there are choices in the element, we need to split it and create a separate model per each choice """ - + root_properties = {} model_names = [] node_copy = deepcopy(node) if self._node_has_separate_complex_type(node_copy): @@ -588,13 +591,20 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - model_names.extend(self._create_model(node_copy, source_path, additional_properties)) + returned_model_names, root_properties = self._create_model(node_copy, source_path, + additional_properties) + model_names.extend(returned_model_names) + root_properties.update(root_properties) for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) else: choice_node_parent.insert(0, choice) - model_names.extend(self._create_model(node_copy, source_path, additional_properties)) + returned_model_names, root_properties = self._create_model(node_copy, source_path, + additional_properties) + model_names.extend(returned_model_names) + root_properties.update(root_properties) + choice_node_parent.remove(choice) return model_names @@ -602,8 +612,9 @@ def _create_model( self, node: _Element, source_path: str = "", + is_root_model: bool = False, additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None - ) -> list[str]: + ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. """ @@ -612,6 +623,10 @@ def _create_model( if additional_properties is None: additional_properties = {} + # properties to add to the root model + root_properties = {} + + # properties of this model properties = {} properties.update(additional_properties) @@ -619,14 +634,8 @@ def _create_model( model.set_name(self.deduplicate(to_model_name(node.get("name")))) - # if this is complexType node which has complexContent, with a separate - # node, we need to join the contents of them both - - description = self.get_description(node) properties.update(model.attributes_to_properties(node)) - model_names = [] - if node.xpath(f'./*[local-name() = "complexType"]') or self._node_has_separate_complex_type(node): if self._node_has_separate_complex_type(node): @@ -650,11 +659,10 @@ def _create_model( # if complextype node's property mixed is true, it allows text inside if complex_type_node.get("mixed") == "true": properties.update(model.get_text_property()) - if complex_type_node.xpath(f'./*[local-name() = "complexContent"]'): - # TODO: this is only for the nodes where complex content extension base is abstract. - # it's the case for the RC documents, but might be different for other data providers - # https://github.com/atviriduomenys/spinta/issues/604 + # if this is complexType node which has complexContent, with a separate + # node, we need to join the contents of them both + if complex_type_node.xpath(f'./*[local-name() = "complexContent"]'): complex_type_node = complex_type_node.xpath(f'./*[local-name() = "complexContent"]/*[local-name() = "extension"]')[0] complex_content_base_name = complex_type_node.get("base") complex_content_base_node = self._get_separate_complex_type_node_by_type(complex_content_base_name) @@ -736,11 +744,15 @@ def _create_model( # and maxOccurs of it is 1, # then do not create reference, but add to the same - # properties.update( - # self._properties_from_references(sequence_or_all_node, model, new_source_path)) element = sequence_or_all_node.xpath(f'./*[local-name() = "element"]')[0] element = self._get_referenced_node(element) - return self._create_model(element, source_path=new_source_path, additional_properties=additional_properties) + if not is_root_model: + return self._create_model(element, source_path=new_source_path, + additional_properties=additional_properties) + else: + _, new_root_properties = self._create_model(element, source_path=new_source_path, + additional_properties=additional_properties) + root_properties.update(new_root_properties) elif sequence_or_all_node_length > 1 or properties: # properties from simple type or inline elements without references @@ -750,8 +762,9 @@ def _create_model( properties_required=properties_required)) # references - properties.update( - self._properties_from_references(sequence_or_all_node, model, new_source_path)) + properties_from_references, new_root_properties = self._properties_from_references(sequence_or_all_node, model, new_source_path) + properties.update(properties_from_references) + root_properties.update(new_root_properties) # complex type child nodes - to models for child_node in sequence_or_all_node: @@ -761,21 +774,48 @@ def _create_model( # TODO: maybe move this to a separate function paths = new_source_path.split("/") if not child_node.get("name") in paths: - self._create_model(child_node, source_path=new_source_path) + _, new_root_properties = self._create_model(child_node, source_path=new_source_path) + root_properties.update(new_root_properties) else: for index, path in enumerate(paths): if path == child_node.get("name"): paths[index] = f"/{path}" new_source_path = "/".join(paths) - if properties: + if properties or is_root_model: + + new_root_properties = {} + if is_root_model: + properties.update(root_properties) + else: + root_properties.update(deepcopy(properties)) + + # if we have additional properties, those are to add `ref` which means that on the other side + # there is a `backref` which means that this is for an array + if additional_properties: + array_sign = "[]" + else: + array_sign = "" + # add the model prefix to every property name and source + for root_property_id, root_property in root_properties.items(): + new_root_property = root_property + + if "external" in root_property: + new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + + # we don't need to add refs and backrefs, only actual fields + if not new_root_property.get("type") == "ref" and not new_root_property.get("type") == "backref": + root_property_id = f"{to_property_name(model.standalone_name)}{array_sign}.{root_property_id}" + new_root_properties[root_property_id] = new_root_property + model.properties = properties + model.add_external_info(external_name=new_source_path) - model.description = description + model.description = self.get_description(node) self.models.append(model) - return [model.name, ] - return [] + return [model.name, ], new_root_properties + return [], {} def _add_resource_model(self): resource_model = XSDModel(self) @@ -789,16 +829,13 @@ def _add_resource_model(self): self.models.append(resource_model) def _parse_root_node(self): - # get properties from elements - # Resource model - special case - for node in self.root: if ( self._is_element(node) and (not self.node_is_simple_type_or_inline(node) or self.node_is_ref(node)) and not self._node_is_referenced(node) ): - self._create_model(node) + self._create_model(node, is_root_model=True) def start(self): self._extract_root() From 9adfb26ae5f9baa0574e58d50630a671a8f7236a Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 6 Jun 2024 21:47:39 +0300 Subject: [PATCH 02/62] additional_properties passing as named patrameter --- spinta/manifests/xsd/helpers.py | 18 ++++++++++-------- tests/manifests/xsd/test_xsd.py | 6 ++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index cd546195b..fe740ac3c 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -558,7 +558,7 @@ def _split_choice( node: _Element, source_path: str, additional_properties: dict[str, dict[str, str | bool | dict[str, str]]] - ) -> list[str]: + ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ If there are choices in the element, we need to split it and create a separate model per each choice @@ -591,22 +591,24 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, root_properties = self._create_model(node_copy, source_path, - additional_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) model_names.extend(returned_model_names) - root_properties.update(root_properties) + root_properties.update(new_root_properties) for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) else: choice_node_parent.insert(0, choice) - returned_model_names, root_properties = self._create_model(node_copy, source_path, - additional_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) model_names.extend(returned_model_names) - root_properties.update(root_properties) + root_properties.update(new_root_properties) choice_node_parent.remove(choice) - return model_names + return model_names, root_properties def _create_model( self, diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index dfda3aec8..b7b181ea2 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -286,6 +286,9 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | text | string | | text() | | | | | | | | | | | asmenys[] | backref | Asmuo | | | | | | | + | | | | | asmuo[].id | string required | | asmuo/@id | | | | | | + | | | | | asmuo[].ak | string required | | asmuo/@ak | | | | | | + | | | | | asmuo[].text | string | | asmuo/text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -342,6 +345,9 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | text | string | | text() | | | | | | | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + """ path = tmp_path / 'manifest.xsd' From c2464f5434b96f1376139d32ac949a71248c2f18 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 6 Jun 2024 21:47:39 +0300 Subject: [PATCH 03/62] #622 additional_properties passing as named parameter --- spinta/manifests/xsd/helpers.py | 18 ++++++++++-------- tests/manifests/xsd/test_xsd.py | 6 ++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index cd546195b..fe740ac3c 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -558,7 +558,7 @@ def _split_choice( node: _Element, source_path: str, additional_properties: dict[str, dict[str, str | bool | dict[str, str]]] - ) -> list[str]: + ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ If there are choices in the element, we need to split it and create a separate model per each choice @@ -591,22 +591,24 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, root_properties = self._create_model(node_copy, source_path, - additional_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) model_names.extend(returned_model_names) - root_properties.update(root_properties) + root_properties.update(new_root_properties) for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) else: choice_node_parent.insert(0, choice) - returned_model_names, root_properties = self._create_model(node_copy, source_path, - additional_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) model_names.extend(returned_model_names) - root_properties.update(root_properties) + root_properties.update(new_root_properties) choice_node_parent.remove(choice) - return model_names + return model_names, root_properties def _create_model( self, diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index dfda3aec8..b7b181ea2 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -286,6 +286,9 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | text | string | | text() | | | | | | | | | | | asmenys[] | backref | Asmuo | | | | | | | + | | | | | asmuo[].id | string required | | asmuo/@id | | | | | | + | | | | | asmuo[].ak | string required | | asmuo/@ak | | | | | | + | | | | | asmuo[].text | string | | asmuo/text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -342,6 +345,9 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | text | string | | text() | | | | | | | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + """ path = tmp_path / 'manifest.xsd' From 3d1759ced17eb1bf7b19818a13c2eb051f34b4a5 Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 7 Jun 2024 23:43:49 +0300 Subject: [PATCH 04/62] nested properties git commit -am --- spinta/manifests/xsd/helpers.py | 96 +++++++++++++++++------------ tests/manifests/xsd/test_helpers.py | 66 ++++++++++++++++++-- tests/manifests/xsd/test_xsd.py | 53 +++++++++++++++- 3 files changed, 169 insertions(+), 46 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index fe740ac3c..5e4bae625 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -485,20 +485,10 @@ def _properties_from_references( source_path: str = "" ) -> tuple[ dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], - dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: + dict[str, dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]]: properties = {} root_properties = {} - # if len(node) == 1: - # # if this model has only one property, which is a reference, we don't create it, but pass it on. - # ref_element = node.xpath("./*[@ref]")[0] - # ref = ref_element.get("ref") - # if ":" in ref: - # ref = ref.split(":")[1] - # xpath_query = f"//*[@name='{ref}']" - # referenced_element = self.root.xpath(xpath_query)[0] - # return self._properties_from_references(model=model, source_path=node.get("name")) - root_properties = {} for ref_element in node.xpath("./*[@ref]"): referenced_element = self._get_referenced_node(ref_element) @@ -591,11 +581,12 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, new_root_properties = self._create_model( - node_copy, source_path, - additional_properties=additional_properties) - model_names.extend(returned_model_names) - root_properties.update(new_root_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) + root_properties.update(new_root_properties) + model_names.extend(returned_model_names) + for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) @@ -616,7 +607,7 @@ def _create_model( source_path: str = "", is_root_model: bool = False, additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None - ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: + ) -> tuple[list[str], dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. """ @@ -631,13 +622,12 @@ def _create_model( # properties of this model properties = {} properties.update(additional_properties) + properties.update(model.attributes_to_properties(node)) new_source_path = f"{source_path}/{node.get('name')}" model.set_name(self.deduplicate(to_model_name(node.get("name")))) - properties.update(model.attributes_to_properties(node)) - if node.xpath(f'./*[local-name() = "complexType"]') or self._node_has_separate_complex_type(node): if self._node_has_separate_complex_type(node): @@ -649,7 +639,6 @@ def _create_model( choices = complex_type_node.xpath(f'./*[local-name() = "choice"]') # if choices is unbounded, we treat it like sequence if not choices or choices[0].get("maxOccurs") == "unbounded": - # if it's a `choice` node with `unbounded`, we treat it the same as sequence node if choices: choices = complex_type_node.xpath(f'./*[local-name() = "choice"]/*[local-name() = "choice"]') else: @@ -786,29 +775,52 @@ def _create_model( if properties or is_root_model: + # TODO move this nested properties thing to a function + # DEALING WITH NESTED ROOT PROPERTIES --------------------- + # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} + # add the model prefix to every property name and source + for model_name, model_properties in root_properties.items(): + for root_property_id, root_property in model_properties.items(): + + # we don't need to add refs and backrefs, only actual fields + if not root_property.get("type") == "ref" and not root_property.get("type") == "backref": + + # we need to find out the name of the property that corresponds the model, + # because we need to use that if we used it in ref properties, otherwise use + prefix = None + stripped_model_name = model_name.rstrip("[]") + for property_id, prop in properties.items(): + if "model" in prop: + property_model_name = prop.get("model").split("/")[-1] + if property_model_name == stripped_model_name: + prefix = property_id + break + + array_sign = "" + if prefix is None: + if model_name.endswith("[]"): + array_sign = "[]" + + prefix = to_property_name(stripped_model_name.split("/")[-1]) + + root_property_id = f"{prefix}{array_sign}.{root_property_id}" + new_root_properties[root_property_id] = root_property + if is_root_model: - properties.update(root_properties) + properties.update(new_root_properties) else: - root_properties.update(deepcopy(properties)) + new_root_properties.update(deepcopy(properties)) - # if we have additional properties, those are to add `ref` which means that on the other side - # there is a `backref` which means that this is for an array - if additional_properties: - array_sign = "[]" - else: - array_sign = "" - # add the model prefix to every property name and source - for root_property_id, root_property in root_properties.items(): - new_root_property = root_property - - if "external" in root_property: - new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + # adding node source to the source path here, before passing further, + # because we can't retrieve it later otherwise + returned_root_properties = {} + for root_property_id, root_property in new_root_properties.items(): + new_root_property = deepcopy(root_property) - # we don't need to add refs and backrefs, only actual fields - if not new_root_property.get("type") == "ref" and not new_root_property.get("type") == "backref": - root_property_id = f"{to_property_name(model.standalone_name)}{array_sign}.{root_property_id}" - new_root_properties[root_property_id] = new_root_property + if "external" in root_property: + new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + returned_root_properties[root_property_id] = new_root_property model.properties = properties @@ -816,7 +828,13 @@ def _create_model( model.description = self.get_description(node) self.models.append(model) - return [model.name, ], new_root_properties + if additional_properties: + model_name = f"{model.name}[]" + else: + model_name = model.name + # -------------------END DEALING WITH NESTED ROOT PROPERTIES + + return [model.name, ], {model_name: returned_root_properties} return [], {} def _add_resource_model(self): diff --git a/tests/manifests/xsd/test_helpers.py b/tests/manifests/xsd/test_helpers.py index 9307ef720..7c73f47d4 100644 --- a/tests/manifests/xsd/test_helpers.py +++ b/tests/manifests/xsd/test_helpers.py @@ -716,7 +716,7 @@ def test_properties_from_references(): model = XSDModel(xsd, schema) result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == { + assert result == ({ 'ct_e200_fc_id': { 'description': 'E200 duomenų kompozicijos unikalus identifikatorius', 'enums': {}, @@ -735,7 +735,7 @@ def test_properties_from_references(): 'required': True, 'type': 'integer', }, - } + }, {}) def test_properties_from_references_complex_not_array(): @@ -782,7 +782,7 @@ def test_properties_from_references_complex_not_array(): model = XSDModel(xsd, schema) result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == { + assert result == ({ 'fiziniai_asmenys': { 'description': '', 'enums': {}, @@ -799,7 +799,32 @@ def test_properties_from_references_complex_not_array(): 'required': True, 'type': 'ref', }, - } + }, + {'test/FiziniaiAsmenys': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}}, + 'test/Objektai': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': {'name': 'OBJEKTAI/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'OBJEKTAI/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}}} + ) + assert xsd.models[0].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', @@ -910,7 +935,7 @@ def test_properties_from_references_complex_array(): model.set_name("test") result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == { + assert result == ({ 'fiziniai_asmenys[]': { 'description': '', 'enums': {}, @@ -927,7 +952,36 @@ def test_properties_from_references_complex_array(): 'required': True, 'type': 'backref', }, - } + }, + {'test/FiziniaiAsmenys[]': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'test': {'model': 'test/test', + 'type': 'ref'}}, + 'test/Objektai[]': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': {'name': 'OBJEKTAI/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'OBJEKTAI/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'test': {'model': 'test/test', + 'type': 'ref'}}}, + ) + assert xsd.models[0].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index b7b181ea2..1a63243c8 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -483,6 +483,13 @@ def test_xsd_choice(rc: RawConfig, tmp_path: Path): | | | | | text | string | | text() | | | | | | | | | | | parcel[] | backref | Parcel1 | | | | | | | | | | | | parcel1[] | backref | Parcel2 | | | | | | | + | | | | | parcel1[].text | string | | parcel/text() | | | | | | + | | | | | parcel1[].parcel_unique_number | integer required | | parcel/parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcel2[].text | string | | parcel/text() | | | | | | + | | | | | parcel2[].sign_of_change | integer required | | parcel/sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -563,6 +570,14 @@ def test_xsd_choice_max_occurs_unbound(rc: RawConfig, tmp_path: Path): | | | | | text | string | | text() | | | | | | | | | | | parcel[] | backref | Parcel1 | | | | | | | | | | | | parcel1[] | backref | Parcel2 | | | | | | | + | | | | | parcel1[].text | string | | parcel/text() | | | | | | + | | | | | parcel1[].parcel_unique_number | integer required | | parcel/parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcel2[].text | string | | parcel/text() | | | | | | + | | | | | parcel2[].sign_of_change | integer required | | parcel/sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | + """ path = tmp_path / 'manifest.xsd' @@ -617,6 +632,12 @@ def test_xsd_attributes(rc: RawConfig, tmp_path: Path): | | | | Salygos | | | /SALYGOS | | | | | | | | | | | text | string | | text() | | | | | | | | | | | salyga | ref required | Salyga | | | | | | | + | | | | | salyga.kodas | string | | SALYGA/@kodas | | | | | | + | | | | | salyga.nr | integer | | SALYGA/@nr | | | | | | + | | | | | salyga.text | string | | SALYGA/text() | | | | | | + | | | | | salyga.reiksme | string required | | SALYGA/REIKSME/text() | | | | | | + | | | | | salyga.pavadinimas | string | | SALYGA/PAVADINIMAS/text() | | | | | | + | | | | | salyga.aprasymas | string | | SALYGA/APRASYMAS/text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -672,6 +693,10 @@ def test_xsd_model_one_property(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | GetTzByTRAResponse | | | /getTzByTRAResponse | | | | | | | | | | | search_parameters | string | | searchParameters/text() | | | | | | + | | | | | extracttz.extract_preparation_time | datetime | | extracttz/extractPreparationTime/text() | | | | | | + | | | | | extracttz.phipoteka | integer | | extracttz/phipoteka/text() | | | | | | + | | | | | klaida.aprasymas | string | | klaida/Aprasymas/text() | | | | | | + """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -805,6 +830,16 @@ def test_xsd_sequence_choice_sequence(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Data | | | /data | | | | | | | | | | | response_message | string | | responseMessage/text() | | | | | | + | | | | | response_data.documents1.birth_date | string | | responseData/documents/birthDate/text() | | | | | | + | | | | | response_data.documents1.last_name | string | | responseData/documents/lastName/text() | | | | | | + | | | | | response_data.documents1.first_name | string | | responseData/documents/firstName/text() | | | | | | + | | | | | response_data.documents1.code | string | | responseData/documents/code/text() | | | | | | + | | | | | response_data.documents1.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | + | | | | | response_data.documents2.business_name | string | | responseData/documents/businessName/text() | | | | | | + | | | | | response_data.documents2.code | string | | responseData/documents/code/text() | | | | | | + | | | | | response_data.documents2.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | + | | | | | response_data.statement_id | string required | | responseData/statementId/text() | | | | | | + | | | | | response_data.title | string required | | responseData/title/text() | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -923,6 +958,7 @@ def test_xsd_recursion(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Data | | | /data | | | | | | | | | | | response_message | string | | responseMessage/text() | | | | | | + | | | | | action.code | string required | | action/code/text() | | | | | | Paslaugos kodas (RC kodas) """ path = tmp_path / 'manifest.xsd' @@ -1020,7 +1056,22 @@ def test_xsd_enumeration(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Data | | | /data | | | | | | | | | | | response_message | string | | responseMessage/text() | | | | | | - + | | | | | response_data.who_may_consitute | string required | | responseData/who_may_consitute/text() | | | | | | Įgaliojimą gali sudaryti. + | | enum | | fiz | | | | | | + | | | | fiz-notarial | | | | | | + | | | | jur | | | | | | + | | | | jur-notarial | | | | | | + | | | | fiz-jur | | | | | | + | | | | fiz-notarial-jur-notarial | | | | | | + | | | | fiz-notarial-jur | | | | | | + | | | | fiz-jur-notarial | | | | | | + | | | | | response_data.default_description_editable | string required | | responseData/default_description_editable/text() | | | | | | Ar numatytasis aprašymas gali būti redaguojamas? 0 - NE, 1 - TAIP + | | enum | | 0 | | | | | | + | | | | 1 | | | | | | + | | | | | response_data.digital_service | string required | | responseData/digital_service/text() | | | | | | El. paslauga. Reikšmės: digital - Tik elektroninė paslauga, analog - Tik neelektroninė paslauga, digital-or-analog - Elektroninė arba neelektroninė paslauga + | | enum | | digital | | | | | | + | | | | analog | | | | | | + | | | | digital-or-analog | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: From a3f45c30e388498b2a05b6539caf60840f24aaa2 Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 10 Jun 2024 15:46:58 +0300 Subject: [PATCH 05/62] removed some comments --- spinta/manifests/xsd/helpers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 5e4bae625..392260e2a 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -434,6 +434,7 @@ def _get_separate_complex_type_node_by_type(self, node_type: str) -> _Element: for node in complex_types: if node.get("name") == node_type: return node + def _get_separate_complex_type_node(self, node: _Element) -> _Element: node_type: str | list = node.get('type') return self._get_separate_complex_type_node_by_type(node_type) @@ -488,7 +489,10 @@ def _properties_from_references( dict[str, dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]]: properties = {} + + # nested properties for the root model root_properties = {} + for ref_element in node.xpath("./*[@ref]"): referenced_element = self._get_referenced_node(ref_element) @@ -553,7 +557,10 @@ def _split_choice( If there are choices in the element, we need to split it and create a separate model per each choice """ + + # nested properties for the root model root_properties = {} + model_names = [] node_copy = deepcopy(node) if self._node_has_separate_complex_type(node_copy): @@ -762,7 +769,6 @@ def _create_model( if child_node.xpath(f'./*[local-name() = "complexType"]') \ or self._node_has_separate_complex_type(child_node): # check for recursion - # TODO: maybe move this to a separate function paths = new_source_path.split("/") if not child_node.get("name") in paths: _, new_root_properties = self._create_model(child_node, source_path=new_source_path) @@ -775,7 +781,6 @@ def _create_model( if properties or is_root_model: - # TODO move this nested properties thing to a function # DEALING WITH NESTED ROOT PROPERTIES --------------------- # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} From 27fb2368d3b7756c79269f171f308bb4224095a6 Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 11 Jun 2024 16:31:21 +0300 Subject: [PATCH 06/62] standalone_name changed to base_name for consistency --- spinta/manifests/xsd/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 392260e2a..201056060 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -103,7 +103,7 @@ def __init__(self, xsd: 'XSDReader', node: _Element = None): self.node: _Element = node self.type: str = "model" self.name: str | None = None - self.standalone_name: str | None = None + self.basename: str | None = None self.external: dict | None = None self.properties: dict | None = None self.uri: str | None = None @@ -133,7 +133,7 @@ def add_external_info(self, external_name: str): } def set_name(self, name: str): - self.standalone_name = name + self.basename = name self.name = f"{self.dataset_name}/{name}" def _get_property_type(self, node: _Element) -> str: @@ -525,7 +525,7 @@ def _properties_from_references( property_type = "ref" else: referenced_element_properties = { - to_property_name(model.standalone_name): + to_property_name(model.basename): { "type": "ref", "model": f"{model.name}" From 1402eb0101b92482b1b0c3f9b966a24662fccb08 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 13 Jun 2024 19:13:05 +0300 Subject: [PATCH 07/62] #622 nested properties on the root models only --- spinta/manifests/xsd/helpers.py | 113 +++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 31 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 201056060..e14d7f427 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -108,6 +108,9 @@ def __init__(self, xsd: 'XSDReader', node: _Element = None): self.properties: dict | None = None self.uri: str | None = None self.description: str | None = None + self.root_properties: dict | None = None + self.parent_model: XSDModel | None = None + self.is_root_model: bool | None = None def get_data(self): model_data: dict = { @@ -387,7 +390,6 @@ def _get_referenced_node(self, node): referenced_node = self.root.xpath(xpath_query)[0] return referenced_node - @staticmethod def node_is_ref(node: _Element) -> bool: if node.get("ref"): @@ -483,6 +485,7 @@ def _properties_from_references( self, node: _Element, model: XSDModel, + parent_model: XSDModel, source_path: str = "" ) -> tuple[ dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], @@ -521,7 +524,11 @@ def _properties_from_references( source_path += f'/{ref_element.get("name")}' if not (XSDReader.is_array(ref_element) or is_array): - referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path) + referenced_model_names, new_root_properties = self._create_model( + referenced_element, + source_path=source_path, + parent_model=model + ) property_type = "ref" else: referenced_element_properties = { @@ -532,8 +539,12 @@ def _properties_from_references( } } property_type = "backref" - referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path, - additional_properties=referenced_element_properties) + referenced_model_names, new_root_properties = self._create_model( + referenced_element, + source_path=source_path, + parent_model=model, + additional_properties=referenced_element_properties + ) root_properties.update(new_root_properties) @@ -551,6 +562,7 @@ def _split_choice( self, node: _Element, source_path: str, + parent_model: XSDModel, additional_properties: dict[str, dict[str, str | bool | dict[str, str]]] ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ @@ -589,7 +601,9 @@ def _split_choice( for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) returned_model_names, new_root_properties = self._create_model( - node_copy, source_path, + node_copy, + source_path=source_path, + parent_model=parent_model, additional_properties=additional_properties) root_properties.update(new_root_properties) model_names.extend(returned_model_names) @@ -600,7 +614,9 @@ def _split_choice( else: choice_node_parent.insert(0, choice) returned_model_names, new_root_properties = self._create_model( - node_copy, source_path, + node_copy, + source_path=source_path, + parent_model=parent_model, additional_properties=additional_properties) model_names.extend(returned_model_names) root_properties.update(new_root_properties) @@ -613,12 +629,14 @@ def _create_model( node: _Element, source_path: str = "", is_root_model: bool = False, + parent_model: XSDModel = None, additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None ) -> tuple[list[str], dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. """ model = XSDModel(self) + model.parent_model = parent_model if additional_properties is None: additional_properties = {} @@ -652,7 +670,11 @@ def _create_model( choices = complex_type_node.xpath(f'./*[local-name() = "sequence"]/*[local-name() = "choice"]') if choices: if choices[0].get("maxOccurs") != "unbounded": - return self._split_choice(node, source_path, additional_properties=additional_properties) + return self._split_choice( + node, + source_path=source_path, + parent_model=model, + additional_properties=additional_properties) # if complextype node's property mixed is true, it allows text inside if complex_type_node.get("mixed") == "true": @@ -730,7 +752,10 @@ def _create_model( if not element.get("name") in paths: # this can sometimes happen when choice node has been split or maybe in some other cases too - return self._create_model(element, source_path=new_source_path) + return self._create_model( + element, + source_path=new_source_path, + parent_model=model) else: for index, path in enumerate(paths): if path == element.get("name"): @@ -745,11 +770,17 @@ def _create_model( element = sequence_or_all_node.xpath(f'./*[local-name() = "element"]')[0] element = self._get_referenced_node(element) if not is_root_model: - return self._create_model(element, source_path=new_source_path, - additional_properties=additional_properties) + return self._create_model( + element, + source_path=new_source_path, + parent_model=model, + additional_properties=additional_properties) else: - _, new_root_properties = self._create_model(element, source_path=new_source_path, - additional_properties=additional_properties) + _, new_root_properties = self._create_model( + element, + source_path=new_source_path, + parent_model=model, + additional_properties=additional_properties) root_properties.update(new_root_properties) elif sequence_or_all_node_length > 1 or properties: @@ -760,7 +791,11 @@ def _create_model( properties_required=properties_required)) # references - properties_from_references, new_root_properties = self._properties_from_references(sequence_or_all_node, model, new_source_path) + properties_from_references, new_root_properties = self._properties_from_references( + sequence_or_all_node, + model=model, + parent_model=parent_model, + source_path=new_source_path) properties.update(properties_from_references) root_properties.update(new_root_properties) @@ -771,7 +806,11 @@ def _create_model( # check for recursion paths = new_source_path.split("/") if not child_node.get("name") in paths: - _, new_root_properties = self._create_model(child_node, source_path=new_source_path) + _, new_root_properties = self._create_model( + child_node, + source_path=new_source_path, + parent_model=model + ) root_properties.update(new_root_properties) else: for index, path in enumerate(paths): @@ -779,9 +818,11 @@ def _create_model( paths[index] = f"/{path}" new_source_path = "/".join(paths) - if properties or is_root_model: + model.properties = properties - # DEALING WITH NESTED ROOT PROPERTIES --------------------- + # DEALING WITH NESTED ROOT PROPERTIES --------------------- + + if properties: # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} # add the model prefix to every property name and source @@ -789,6 +830,11 @@ def _create_model( for root_property_id, root_property in model_properties.items(): # we don't need to add refs and backrefs, only actual fields + # TODO: I don't know if we need this check. I would assume that we don't. + # But if we delete this check, then it doesn't find the referenced model + # if there is a model in between + # Example: Objektai has faktai[].faktu_naudotojai[].naudotojo_id then + # Faktu_Naudotojai has referencce to Faktai but not Objektai if not root_property.get("type") == "ref" and not root_property.get("type") == "backref": # we need to find out the name of the property that corresponds the model, @@ -812,22 +858,20 @@ def _create_model( root_property_id = f"{prefix}{array_sign}.{root_property_id}" new_root_properties[root_property_id] = root_property - if is_root_model: - properties.update(new_root_properties) - else: - new_root_properties.update(deepcopy(properties)) + model.root_properties = new_root_properties + + new_root_properties.update(deepcopy(properties)) # adding node source to the source path here, before passing further, # because we can't retrieve it later otherwise - returned_root_properties = {} - for root_property_id, root_property in new_root_properties.items(): - new_root_property = deepcopy(root_property) - - if "external" in root_property: - new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} - returned_root_properties[root_property_id] = new_root_property - - model.properties = properties + # returned_root_properties = {} + # for root_property_id, root_property in new_root_properties.items(): + # new_root_property = deepcopy(root_property) + # + # if "external" in root_property: + # new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + # returned_root_properties[root_property_id] = new_root_property + returned_root_properties = new_root_properties model.add_external_info(external_name=new_source_path) model.description = self.get_description(node) @@ -837,10 +881,10 @@ def _create_model( model_name = f"{model.name}[]" else: model_name = model.name - # -------------------END DEALING WITH NESTED ROOT PROPERTIES return [model.name, ], {model_name: returned_root_properties} - return [], {} + + return [], root_properties def _add_resource_model(self): resource_model = XSDModel(self) @@ -849,6 +893,7 @@ def _add_resource_model(self): resource_model.description = "Įvairūs duomenys" resource_model.uri = "http://www.w3.org/2000/01/rdf-schema#Resource" resource_model.properties = resource_model.properties_from_simple_elements(self.root, from_root=True) + resource_model.root_properties = {} if resource_model.properties: resource_model.set_name(self.deduplicate(f"Resource")) self.models.append(resource_model) @@ -961,4 +1006,10 @@ def read_schema( for parsed_model in xsd.models: + # we need to add root properties to properties if it's a root model + if parsed_model.parent_model is None or parsed_model.parent_model not in xsd.models: + parsed_model.properties.update(parsed_model.root_properties) + + parsed_model.properties = dict(sorted(parsed_model.properties.items())) + yield None, parsed_model.get_data() From 4fe01adef215e4a2187219b88b8be0eca57277f2 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 13 Jun 2024 20:10:40 +0300 Subject: [PATCH 08/62] cleanup --- spinta/manifests/xsd/helpers.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index e14d7f427..2f97cea99 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -862,15 +862,6 @@ def _create_model( new_root_properties.update(deepcopy(properties)) - # adding node source to the source path here, before passing further, - # because we can't retrieve it later otherwise - # returned_root_properties = {} - # for root_property_id, root_property in new_root_properties.items(): - # new_root_property = deepcopy(root_property) - # - # if "external" in root_property: - # new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} - # returned_root_properties[root_property_id] = new_root_property returned_root_properties = new_root_properties model.add_external_info(external_name=new_source_path) From 85cdef51139fa66f9487e0af81852a0c8fbceae7 Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 18 Jun 2024 20:56:52 +0300 Subject: [PATCH 09/62] #622 no root model ir it doesn't have it's own properties --- spinta/manifests/xsd/helpers.py | 103 ++++++++++++++++++---------- tests/manifests/xsd/test_helpers.py | 10 +-- 2 files changed, 72 insertions(+), 41 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 2f97cea99..01be5758d 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -139,30 +139,6 @@ def set_name(self, name: str): self.basename = name self.name = f"{self.dataset_name}/{name}" - def _get_property_type(self, node: _Element) -> str: - if node.get("ref"): - return "ref" - property_type: str = node.get("type") - if not property_type: - # this is a self defined simple type, so we take it's base as type - restrictions: list = node.xpath(f'./*[local-name() = "simpleType"]/*[local-name() = "restriction"]') - if restrictions: - property_type = restrictions[0].get("base", "") - else: - property_type = "" - # getting rid of the prefix - if ":" in property_type: - property_type = property_type.split(":")[1] - - if property_type in self.xsd.custom_types: - property_type = self.xsd.custom_types.get(property_type).get("base", "") - if property_type in DATATYPES_MAPPING: - property_type = DATATYPES_MAPPING[property_type] - else: - property_type = "string" - - return property_type - def _get_enums(self, node: _Element) -> dict[str, dict[str, Any]]: enums = {} simple_type = node.xpath(f'./*[local-name() = "simpleType"]') @@ -186,7 +162,7 @@ def _node_to_partial_property(self, node: _Element) -> tuple[str, dict[str, str property_name = node.get("name") prop["external"] = {"name": property_name} property_id = to_property_name(property_name) - prop["type"] = self._get_property_type(node) + prop["type"] = self.xsd.get_property_type(node) if ";" in prop["type"]: prop_type, target, value = prop["type"].split(";") prop["type"] = prop_type @@ -295,7 +271,7 @@ def properties_from_simple_elements( properties[property_id] = prop return properties - def get_text_property(self, property_type = None) -> dict[str, dict[str, str | dict[str, str]]]: + def get_text_property(self, property_type=None) -> dict[str, dict[str, str | dict[str, str]]]: if property_type is None: property_type = "string" return { @@ -306,17 +282,44 @@ def get_text_property(self, property_type = None) -> dict[str, dict[str, str | d } }} + def has_non_ref_properties(self) -> bool: + return any([prop["type"] not in ("ref", "backerf") for prop in self.properties.values()]) + class XSDReader: def __init__(self, path, dataset_name: str): self._path: str = path - self.models: list[XSDModel] = [] + self.models: dict[str, XSDModel] = {} self.custom_types: dict = {} self._dataset_given_name: str = dataset_name self._set_dataset_and_resource_info() self.deduplicate: Deduplicator = Deduplicator() + def get_property_type(self, node: _Element) -> str: + if node.get("ref"): + return "ref" + property_type: str = node.get("type") + if not property_type: + # this is a self defined simple type, so we take it's base as type + restrictions: list = node.xpath(f'./*[local-name() = "simpleType"]/*[local-name() = "restriction"]') + if restrictions: + property_type = restrictions[0].get("base", "") + else: + property_type = "" + # getting rid of the prefix + if ":" in property_type: + property_type = property_type.split(":")[1] + + if property_type in self.custom_types: + property_type = self.custom_types.get(property_type).get("base", "") + if property_type in DATATYPES_MAPPING: + property_type = DATATYPES_MAPPING[property_type] + else: + property_type = "string" + + return property_type + @staticmethod def get_enums_from_simple_type(node: _Element) -> dict[str, dict[str, Any]]: enums = {} @@ -514,7 +517,10 @@ def _properties_from_references( sequence = sequences[0] else: sequence = None - if sequence is not None and len(sequence) == 1 and self.node_is_ref(sequence[0]): + + # we check for the length of sequence, because it can has more than one element, but also length of + # complexType because it can have attributes too. + if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) @@ -551,7 +557,7 @@ def _properties_from_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) - prop["external"]["name"] = "" + prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") prop["type"] = property_type prop["model"] = f"{referenced_model_name}" properties[property_id] = prop @@ -829,13 +835,14 @@ def _create_model( for model_name, model_properties in root_properties.items(): for root_property_id, root_property in model_properties.items(): - # we don't need to add refs and backrefs, only actual fields + # we don't need to add refs which don't have source (as they point to the root model then) # TODO: I don't know if we need this check. I would assume that we don't. # But if we delete this check, then it doesn't find the referenced model # if there is a model in between # Example: Objektai has faktai[].faktu_naudotojai[].naudotojo_id then # Faktu_Naudotojai has referencce to Faktai but not Objektai - if not root_property.get("type") == "ref" and not root_property.get("type") == "backref": + # if True: + if not (root_property.get("type") == "ref" and "external" not in root_property): # we need to find out the name of the property that corresponds the model, # because we need to use that if we used it in ref properties, otherwise use @@ -866,7 +873,7 @@ def _create_model( model.add_external_info(external_name=new_source_path) model.description = self.get_description(node) - self.models.append(model) + self.models[model.name] = model if additional_properties: model_name = f"{model.name}[]" @@ -887,7 +894,7 @@ def _add_resource_model(self): resource_model.root_properties = {} if resource_model.properties: resource_model.set_name(self.deduplicate(f"Resource")) - self.models.append(resource_model) + self.models[resource_model.name] = resource_model def _parse_root_node(self): for node in self.root: @@ -995,12 +1002,36 @@ def read_schema( yield None, xsd.dataset_and_resource_info - for parsed_model in xsd.models: + new_models = {} + for model_name, parsed_model in xsd.models.items(): + if parsed_model.has_non_ref_properties() and parsed_model.parent_model is not None: + new_models[model_name] = parsed_model + else: + for ref_model in xsd.models.values(): + new_properties = {} + for property_id, prop in ref_model.properties.items(): + if not (prop["type"] == "ref" and prop["model"] == model_name): + new_properties[property_id] = prop + else: + if property_id in ref_model.root_properties: + ref_model.root_properties.pop(property_id) + ref_model.properties = new_properties + + xsd.models = new_models + + for model_name, parsed_model in xsd.models.items(): # we need to add root properties to properties if it's a root model - if parsed_model.parent_model is None or parsed_model.parent_model not in xsd.models: + if parsed_model.parent_model is None or parsed_model.parent_model.name not in xsd.models: parsed_model.properties.update(parsed_model.root_properties) - + for prop_id, prop in parsed_model.root_properties.items(): + if prop["type"] == "backref": + backref_model = xsd.models[prop["model"]] + if backref_model != parsed_model: + ref_property = {to_property_name(parsed_model.basename): {"model": parsed_model.name, "type": "ref"}} + backref_model.properties.update(ref_property) parsed_model.properties = dict(sorted(parsed_model.properties.items())) + for model_name, parsed_model in xsd.models.items(): + yield None, parsed_model.get_data() diff --git a/tests/manifests/xsd/test_helpers.py b/tests/manifests/xsd/test_helpers.py index 7c73f47d4..95e169456 100644 --- a/tests/manifests/xsd/test_helpers.py +++ b/tests/manifests/xsd/test_helpers.py @@ -81,7 +81,7 @@ def test_get_property_type(): element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" @@ -104,7 +104,7 @@ def test_get_property_type_ref(): print("ELEMENT:", element) xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "ref" @@ -127,7 +127,7 @@ def test_get_property_type_simple_type(): element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" @@ -145,7 +145,7 @@ def test_get_property_type_custom(): xsd = XSDReader("test.xsd", "dataset1") xsd.custom_types = {"some_type": {"base": "string"}} model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" @@ -162,7 +162,7 @@ def test_get_property_type_unknown(): element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" From a417166e45e401f4bef83f9973d5f30517c3ba54 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 11:55:18 +0300 Subject: [PATCH 10/62] #622 if root model has more than one ref, it has to be in --- spinta/manifests/xsd/helpers.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 01be5758d..36cc3810a 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -1002,23 +1002,6 @@ def read_schema( yield None, xsd.dataset_and_resource_info - new_models = {} - for model_name, parsed_model in xsd.models.items(): - if parsed_model.has_non_ref_properties() and parsed_model.parent_model is not None: - new_models[model_name] = parsed_model - else: - for ref_model in xsd.models.values(): - new_properties = {} - for property_id, prop in ref_model.properties.items(): - if not (prop["type"] == "ref" and prop["model"] == model_name): - new_properties[property_id] = prop - else: - if property_id in ref_model.root_properties: - ref_model.root_properties.pop(property_id) - ref_model.properties = new_properties - - xsd.models = new_models - for model_name, parsed_model in xsd.models.items(): # we need to add root properties to properties if it's a root model From 216dd518d188797f42e743e5c7ce7bbf72a05d1d Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 13:51:09 +0300 Subject: [PATCH 11/62] correct source when there's a ref to the ref --- spinta/manifests/xsd/helpers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 36cc3810a..b57f46687 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -518,16 +518,18 @@ def _properties_from_references( else: sequence = None + new_referenced_element = None # we check for the length of sequence, because it can has more than one element, but also length of # complexType because it can have attributes too. if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): + if ref_element.get("name") is not None: + source_path += f'/{ref_element.get("name")}' + source_path += f'/{referenced_element.get("name")}' is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) new_referenced_element = self._get_referenced_node(complex_type[0][0]) referenced_element = new_referenced_element - if ref_element.get("name") is not None: - source_path += f'/{ref_element.get("name")}' if not (XSDReader.is_array(ref_element) or is_array): referenced_model_names, new_root_properties = self._create_model( @@ -556,8 +558,11 @@ def _properties_from_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) - prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") + if new_referenced_element is not None: + _, referenced_prop = model.simple_element_to_property(referenced_element) + prop["external"]["name"] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' + prop["type"] = property_type prop["model"] = f"{referenced_model_name}" properties[property_id] = prop From 546d6ceec5f9c0d58fa213782145c577b31bcee5 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 20:33:36 +0300 Subject: [PATCH 12/62] #622 reference and then final element working properly --- spinta/manifests/xsd/helpers.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index b57f46687..f4e526a72 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -208,7 +208,8 @@ def attributes_to_properties( def simple_element_to_property( self, element: _Element, - is_array: bool = False + is_array: bool = False, + source_path: str = None ) -> tuple[str, dict[str, str | bool | dict[str, Any]]]: """ simple element is an element which is either @@ -238,6 +239,8 @@ def simple_element_to_property( prop["external"]["name"] = ref property_id = self.deduplicate(to_property_name(ref)) prop["external"]["name"] = f'{prop["external"]["name"]}/text()' + if source_path: + prop["external"]["name"] = f'{source_path}/{prop["external"]["name"]}' if prop.get("type") == "": prop["type"] = "string" if XSDReader.is_array(element) or is_array: @@ -519,7 +522,7 @@ def _properties_from_references( sequence = None new_referenced_element = None - # we check for the length of sequence, because it can has more than one element, but also length of + # we check for the length of sequence, because it can have more than one element, but also length of # complexType because it can have attributes too. if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): if ref_element.get("name") is not None: @@ -528,9 +531,21 @@ def _properties_from_references( is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) + + previous_referenced_element_name = referenced_element.get("name") new_referenced_element = self._get_referenced_node(complex_type[0][0]) referenced_element = new_referenced_element + if self.node_is_simple_type_or_inline(referenced_element): + property_id, prop = model.simple_element_to_property( + referenced_element, + is_array=is_array, + source_path=previous_referenced_element_name) + if not XSDReader.is_required(ref_element): + prop["required"] = False + properties[property_id] = prop + continue + if not (XSDReader.is_array(ref_element) or is_array): referenced_model_names, new_root_properties = self._create_model( referenced_element, @@ -841,21 +856,17 @@ def _create_model( for root_property_id, root_property in model_properties.items(): # we don't need to add refs which don't have source (as they point to the root model then) - # TODO: I don't know if we need this check. I would assume that we don't. - # But if we delete this check, then it doesn't find the referenced model - # if there is a model in between - # Example: Objektai has faktai[].faktu_naudotojai[].naudotojo_id then - # Faktu_Naudotojai has referencce to Faktai but not Objektai - # if True: if not (root_property.get("type") == "ref" and "external" not in root_property): # we need to find out the name of the property that corresponds the model, - # because we need to use that if we used it in ref properties, otherwise use + # because we need to use that if we used it in ref properties, + # otherwise use newly created form model name prefix = None stripped_model_name = model_name.rstrip("[]") for property_id, prop in properties.items(): if "model" in prop: - property_model_name = prop.get("model").split("/")[-1] + # property_model_name = prop.get("model").split("/")[-1] + property_model_name = prop.get("model") if property_model_name == stripped_model_name: prefix = property_id break From 35ccc669fd6053cd6283611f1d32494398dc5c09 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 22:03:32 +0300 Subject: [PATCH 13/62] #622 checking for root model for adding to xsd.models --- spinta/manifests/xsd/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index f4e526a72..990950150 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -848,7 +848,7 @@ def _create_model( # DEALING WITH NESTED ROOT PROPERTIES --------------------- - if properties: + if properties or is_root_model: # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} # add the model prefix to every property name and source From ba3993fdb65618dbce618bdad609a29ab400ee0c Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 6 Jun 2024 19:13:30 +0300 Subject: [PATCH 14/62] #622 added nested structure for XSD files --- spinta/manifests/xsd/helpers.py | 107 +++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 35 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 6a0e395b9..cd546195b 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -483,9 +483,12 @@ def _properties_from_references( node: _Element, model: XSDModel, source_path: str = "" - ) -> dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]: + ) -> tuple[ + dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], + dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: properties = {} + root_properties = {} # if len(node) == 1: # # if this model has only one property, which is a reference, we don't create it, but pass it on. # ref_element = node.xpath("./*[@ref]")[0] @@ -495,6 +498,7 @@ def _properties_from_references( # xpath_query = f"//*[@name='{ref}']" # referenced_element = self.root.xpath(xpath_query)[0] # return self._properties_from_references(model=model, source_path=node.get("name")) + root_properties = {} for ref_element in node.xpath("./*[@ref]"): referenced_element = self._get_referenced_node(ref_element) @@ -505,7 +509,6 @@ def _properties_from_references( properties[property_id] = prop else: is_array = False - # try: # TODO fix this because it probably doesn't cover all cases, only something like # https://github.com/atviriduomenys/spinta/issues/613 complex_type = referenced_element.xpath("./*[local-name() = 'complexType']")[0] @@ -522,11 +525,9 @@ def _properties_from_references( referenced_element = new_referenced_element if ref_element.get("name") is not None: source_path += f'/{ref_element.get("name")}' - # except (TypeError, IndexError): - # pass if not (XSDReader.is_array(ref_element) or is_array): - referenced_model_names = self._create_model(referenced_element, source_path) + referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path) property_type = "ref" else: referenced_element_properties = { @@ -537,9 +538,11 @@ def _properties_from_references( } } property_type = "backref" - referenced_model_names = self._create_model(referenced_element, source_path, + referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path, additional_properties=referenced_element_properties) + root_properties.update(new_root_properties) + for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) @@ -548,7 +551,7 @@ def _properties_from_references( prop["model"] = f"{referenced_model_name}" properties[property_id] = prop - return properties + return properties, root_properties def _split_choice( self, @@ -560,7 +563,7 @@ def _split_choice( If there are choices in the element, we need to split it and create a separate model per each choice """ - + root_properties = {} model_names = [] node_copy = deepcopy(node) if self._node_has_separate_complex_type(node_copy): @@ -588,13 +591,20 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - model_names.extend(self._create_model(node_copy, source_path, additional_properties)) + returned_model_names, root_properties = self._create_model(node_copy, source_path, + additional_properties) + model_names.extend(returned_model_names) + root_properties.update(root_properties) for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) else: choice_node_parent.insert(0, choice) - model_names.extend(self._create_model(node_copy, source_path, additional_properties)) + returned_model_names, root_properties = self._create_model(node_copy, source_path, + additional_properties) + model_names.extend(returned_model_names) + root_properties.update(root_properties) + choice_node_parent.remove(choice) return model_names @@ -602,8 +612,9 @@ def _create_model( self, node: _Element, source_path: str = "", + is_root_model: bool = False, additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None - ) -> list[str]: + ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. """ @@ -612,6 +623,10 @@ def _create_model( if additional_properties is None: additional_properties = {} + # properties to add to the root model + root_properties = {} + + # properties of this model properties = {} properties.update(additional_properties) @@ -619,14 +634,8 @@ def _create_model( model.set_name(self.deduplicate(to_model_name(node.get("name")))) - # if this is complexType node which has complexContent, with a separate - # node, we need to join the contents of them both - - description = self.get_description(node) properties.update(model.attributes_to_properties(node)) - model_names = [] - if node.xpath(f'./*[local-name() = "complexType"]') or self._node_has_separate_complex_type(node): if self._node_has_separate_complex_type(node): @@ -650,11 +659,10 @@ def _create_model( # if complextype node's property mixed is true, it allows text inside if complex_type_node.get("mixed") == "true": properties.update(model.get_text_property()) - if complex_type_node.xpath(f'./*[local-name() = "complexContent"]'): - # TODO: this is only for the nodes where complex content extension base is abstract. - # it's the case for the RC documents, but might be different for other data providers - # https://github.com/atviriduomenys/spinta/issues/604 + # if this is complexType node which has complexContent, with a separate + # node, we need to join the contents of them both + if complex_type_node.xpath(f'./*[local-name() = "complexContent"]'): complex_type_node = complex_type_node.xpath(f'./*[local-name() = "complexContent"]/*[local-name() = "extension"]')[0] complex_content_base_name = complex_type_node.get("base") complex_content_base_node = self._get_separate_complex_type_node_by_type(complex_content_base_name) @@ -736,11 +744,15 @@ def _create_model( # and maxOccurs of it is 1, # then do not create reference, but add to the same - # properties.update( - # self._properties_from_references(sequence_or_all_node, model, new_source_path)) element = sequence_or_all_node.xpath(f'./*[local-name() = "element"]')[0] element = self._get_referenced_node(element) - return self._create_model(element, source_path=new_source_path, additional_properties=additional_properties) + if not is_root_model: + return self._create_model(element, source_path=new_source_path, + additional_properties=additional_properties) + else: + _, new_root_properties = self._create_model(element, source_path=new_source_path, + additional_properties=additional_properties) + root_properties.update(new_root_properties) elif sequence_or_all_node_length > 1 or properties: # properties from simple type or inline elements without references @@ -750,8 +762,9 @@ def _create_model( properties_required=properties_required)) # references - properties.update( - self._properties_from_references(sequence_or_all_node, model, new_source_path)) + properties_from_references, new_root_properties = self._properties_from_references(sequence_or_all_node, model, new_source_path) + properties.update(properties_from_references) + root_properties.update(new_root_properties) # complex type child nodes - to models for child_node in sequence_or_all_node: @@ -761,21 +774,48 @@ def _create_model( # TODO: maybe move this to a separate function paths = new_source_path.split("/") if not child_node.get("name") in paths: - self._create_model(child_node, source_path=new_source_path) + _, new_root_properties = self._create_model(child_node, source_path=new_source_path) + root_properties.update(new_root_properties) else: for index, path in enumerate(paths): if path == child_node.get("name"): paths[index] = f"/{path}" new_source_path = "/".join(paths) - if properties: + if properties or is_root_model: + + new_root_properties = {} + if is_root_model: + properties.update(root_properties) + else: + root_properties.update(deepcopy(properties)) + + # if we have additional properties, those are to add `ref` which means that on the other side + # there is a `backref` which means that this is for an array + if additional_properties: + array_sign = "[]" + else: + array_sign = "" + # add the model prefix to every property name and source + for root_property_id, root_property in root_properties.items(): + new_root_property = root_property + + if "external" in root_property: + new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + + # we don't need to add refs and backrefs, only actual fields + if not new_root_property.get("type") == "ref" and not new_root_property.get("type") == "backref": + root_property_id = f"{to_property_name(model.standalone_name)}{array_sign}.{root_property_id}" + new_root_properties[root_property_id] = new_root_property + model.properties = properties + model.add_external_info(external_name=new_source_path) - model.description = description + model.description = self.get_description(node) self.models.append(model) - return [model.name, ] - return [] + return [model.name, ], new_root_properties + return [], {} def _add_resource_model(self): resource_model = XSDModel(self) @@ -789,16 +829,13 @@ def _add_resource_model(self): self.models.append(resource_model) def _parse_root_node(self): - # get properties from elements - # Resource model - special case - for node in self.root: if ( self._is_element(node) and (not self.node_is_simple_type_or_inline(node) or self.node_is_ref(node)) and not self._node_is_referenced(node) ): - self._create_model(node) + self._create_model(node, is_root_model=True) def start(self): self._extract_root() From 7dd51c8ba9d6d5c05faf1116cfe583c7c0863ec4 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 6 Jun 2024 21:47:39 +0300 Subject: [PATCH 15/62] #622 additional_properties passing as named parameter --- spinta/manifests/xsd/helpers.py | 18 ++++++++++-------- tests/manifests/xsd/test_xsd.py | 6 ++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index cd546195b..fe740ac3c 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -558,7 +558,7 @@ def _split_choice( node: _Element, source_path: str, additional_properties: dict[str, dict[str, str | bool | dict[str, str]]] - ) -> list[str]: + ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ If there are choices in the element, we need to split it and create a separate model per each choice @@ -591,22 +591,24 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, root_properties = self._create_model(node_copy, source_path, - additional_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) model_names.extend(returned_model_names) - root_properties.update(root_properties) + root_properties.update(new_root_properties) for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) else: choice_node_parent.insert(0, choice) - returned_model_names, root_properties = self._create_model(node_copy, source_path, - additional_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) model_names.extend(returned_model_names) - root_properties.update(root_properties) + root_properties.update(new_root_properties) choice_node_parent.remove(choice) - return model_names + return model_names, root_properties def _create_model( self, diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index dfda3aec8..b7b181ea2 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -286,6 +286,9 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | text | string | | text() | | | | | | | | | | | asmenys[] | backref | Asmuo | | | | | | | + | | | | | asmuo[].id | string required | | asmuo/@id | | | | | | + | | | | | asmuo[].ak | string required | | asmuo/@ak | | | | | | + | | | | | asmuo[].text | string | | asmuo/text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -342,6 +345,9 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | text | string | | text() | | | | | | | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + """ path = tmp_path / 'manifest.xsd' From b22fd99094e820163a281c7cfe26f587a592173e Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 7 Jun 2024 23:43:49 +0300 Subject: [PATCH 16/62] nested properties git commit -am --- spinta/manifests/xsd/helpers.py | 96 +++++++++++++++++------------ tests/manifests/xsd/test_helpers.py | 66 ++++++++++++++++++-- tests/manifests/xsd/test_xsd.py | 53 +++++++++++++++- 3 files changed, 169 insertions(+), 46 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index fe740ac3c..5e4bae625 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -485,20 +485,10 @@ def _properties_from_references( source_path: str = "" ) -> tuple[ dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], - dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: + dict[str, dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]]: properties = {} root_properties = {} - # if len(node) == 1: - # # if this model has only one property, which is a reference, we don't create it, but pass it on. - # ref_element = node.xpath("./*[@ref]")[0] - # ref = ref_element.get("ref") - # if ":" in ref: - # ref = ref.split(":")[1] - # xpath_query = f"//*[@name='{ref}']" - # referenced_element = self.root.xpath(xpath_query)[0] - # return self._properties_from_references(model=model, source_path=node.get("name")) - root_properties = {} for ref_element in node.xpath("./*[@ref]"): referenced_element = self._get_referenced_node(ref_element) @@ -591,11 +581,12 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, new_root_properties = self._create_model( - node_copy, source_path, - additional_properties=additional_properties) - model_names.extend(returned_model_names) - root_properties.update(new_root_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) + root_properties.update(new_root_properties) + model_names.extend(returned_model_names) + for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) @@ -616,7 +607,7 @@ def _create_model( source_path: str = "", is_root_model: bool = False, additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None - ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: + ) -> tuple[list[str], dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. """ @@ -631,13 +622,12 @@ def _create_model( # properties of this model properties = {} properties.update(additional_properties) + properties.update(model.attributes_to_properties(node)) new_source_path = f"{source_path}/{node.get('name')}" model.set_name(self.deduplicate(to_model_name(node.get("name")))) - properties.update(model.attributes_to_properties(node)) - if node.xpath(f'./*[local-name() = "complexType"]') or self._node_has_separate_complex_type(node): if self._node_has_separate_complex_type(node): @@ -649,7 +639,6 @@ def _create_model( choices = complex_type_node.xpath(f'./*[local-name() = "choice"]') # if choices is unbounded, we treat it like sequence if not choices or choices[0].get("maxOccurs") == "unbounded": - # if it's a `choice` node with `unbounded`, we treat it the same as sequence node if choices: choices = complex_type_node.xpath(f'./*[local-name() = "choice"]/*[local-name() = "choice"]') else: @@ -786,29 +775,52 @@ def _create_model( if properties or is_root_model: + # TODO move this nested properties thing to a function + # DEALING WITH NESTED ROOT PROPERTIES --------------------- + # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} + # add the model prefix to every property name and source + for model_name, model_properties in root_properties.items(): + for root_property_id, root_property in model_properties.items(): + + # we don't need to add refs and backrefs, only actual fields + if not root_property.get("type") == "ref" and not root_property.get("type") == "backref": + + # we need to find out the name of the property that corresponds the model, + # because we need to use that if we used it in ref properties, otherwise use + prefix = None + stripped_model_name = model_name.rstrip("[]") + for property_id, prop in properties.items(): + if "model" in prop: + property_model_name = prop.get("model").split("/")[-1] + if property_model_name == stripped_model_name: + prefix = property_id + break + + array_sign = "" + if prefix is None: + if model_name.endswith("[]"): + array_sign = "[]" + + prefix = to_property_name(stripped_model_name.split("/")[-1]) + + root_property_id = f"{prefix}{array_sign}.{root_property_id}" + new_root_properties[root_property_id] = root_property + if is_root_model: - properties.update(root_properties) + properties.update(new_root_properties) else: - root_properties.update(deepcopy(properties)) + new_root_properties.update(deepcopy(properties)) - # if we have additional properties, those are to add `ref` which means that on the other side - # there is a `backref` which means that this is for an array - if additional_properties: - array_sign = "[]" - else: - array_sign = "" - # add the model prefix to every property name and source - for root_property_id, root_property in root_properties.items(): - new_root_property = root_property - - if "external" in root_property: - new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + # adding node source to the source path here, before passing further, + # because we can't retrieve it later otherwise + returned_root_properties = {} + for root_property_id, root_property in new_root_properties.items(): + new_root_property = deepcopy(root_property) - # we don't need to add refs and backrefs, only actual fields - if not new_root_property.get("type") == "ref" and not new_root_property.get("type") == "backref": - root_property_id = f"{to_property_name(model.standalone_name)}{array_sign}.{root_property_id}" - new_root_properties[root_property_id] = new_root_property + if "external" in root_property: + new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + returned_root_properties[root_property_id] = new_root_property model.properties = properties @@ -816,7 +828,13 @@ def _create_model( model.description = self.get_description(node) self.models.append(model) - return [model.name, ], new_root_properties + if additional_properties: + model_name = f"{model.name}[]" + else: + model_name = model.name + # -------------------END DEALING WITH NESTED ROOT PROPERTIES + + return [model.name, ], {model_name: returned_root_properties} return [], {} def _add_resource_model(self): diff --git a/tests/manifests/xsd/test_helpers.py b/tests/manifests/xsd/test_helpers.py index 9307ef720..7c73f47d4 100644 --- a/tests/manifests/xsd/test_helpers.py +++ b/tests/manifests/xsd/test_helpers.py @@ -716,7 +716,7 @@ def test_properties_from_references(): model = XSDModel(xsd, schema) result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == { + assert result == ({ 'ct_e200_fc_id': { 'description': 'E200 duomenų kompozicijos unikalus identifikatorius', 'enums': {}, @@ -735,7 +735,7 @@ def test_properties_from_references(): 'required': True, 'type': 'integer', }, - } + }, {}) def test_properties_from_references_complex_not_array(): @@ -782,7 +782,7 @@ def test_properties_from_references_complex_not_array(): model = XSDModel(xsd, schema) result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == { + assert result == ({ 'fiziniai_asmenys': { 'description': '', 'enums': {}, @@ -799,7 +799,32 @@ def test_properties_from_references_complex_not_array(): 'required': True, 'type': 'ref', }, - } + }, + {'test/FiziniaiAsmenys': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}}, + 'test/Objektai': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': {'name': 'OBJEKTAI/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'OBJEKTAI/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}}} + ) + assert xsd.models[0].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', @@ -910,7 +935,7 @@ def test_properties_from_references_complex_array(): model.set_name("test") result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == { + assert result == ({ 'fiziniai_asmenys[]': { 'description': '', 'enums': {}, @@ -927,7 +952,36 @@ def test_properties_from_references_complex_array(): 'required': True, 'type': 'backref', }, - } + }, + {'test/FiziniaiAsmenys[]': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'FIZINIAI_ASMENYS/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'test': {'model': 'test/test', + 'type': 'ref'}}, + 'test/Objektai[]': {'objektu_asmenys[]': {'description': '', + 'enums': {}, + 'external': {'name': 'OBJEKTAI/OBJEKTU_ASMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'tekstiniai_duomenys[]': {'description': '', + 'enums': {}, + 'external': { + 'name': 'OBJEKTAI/TEKSTINIAI_DUOMENYS/text()'}, + 'required': False, + 'type': 'string'}, + 'test': {'model': 'test/test', + 'type': 'ref'}}}, + ) + assert xsd.models[0].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index b7b181ea2..1a63243c8 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -483,6 +483,13 @@ def test_xsd_choice(rc: RawConfig, tmp_path: Path): | | | | | text | string | | text() | | | | | | | | | | | parcel[] | backref | Parcel1 | | | | | | | | | | | | parcel1[] | backref | Parcel2 | | | | | | | + | | | | | parcel1[].text | string | | parcel/text() | | | | | | + | | | | | parcel1[].parcel_unique_number | integer required | | parcel/parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcel2[].text | string | | parcel/text() | | | | | | + | | | | | parcel2[].sign_of_change | integer required | | parcel/sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -563,6 +570,14 @@ def test_xsd_choice_max_occurs_unbound(rc: RawConfig, tmp_path: Path): | | | | | text | string | | text() | | | | | | | | | | | parcel[] | backref | Parcel1 | | | | | | | | | | | | parcel1[] | backref | Parcel2 | | | | | | | + | | | | | parcel1[].text | string | | parcel/text() | | | | | | + | | | | | parcel1[].parcel_unique_number | integer required | | parcel/parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcel2[].text | string | | parcel/text() | | | | | | + | | | | | parcel2[].sign_of_change | integer required | | parcel/sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | + """ path = tmp_path / 'manifest.xsd' @@ -617,6 +632,12 @@ def test_xsd_attributes(rc: RawConfig, tmp_path: Path): | | | | Salygos | | | /SALYGOS | | | | | | | | | | | text | string | | text() | | | | | | | | | | | salyga | ref required | Salyga | | | | | | | + | | | | | salyga.kodas | string | | SALYGA/@kodas | | | | | | + | | | | | salyga.nr | integer | | SALYGA/@nr | | | | | | + | | | | | salyga.text | string | | SALYGA/text() | | | | | | + | | | | | salyga.reiksme | string required | | SALYGA/REIKSME/text() | | | | | | + | | | | | salyga.pavadinimas | string | | SALYGA/PAVADINIMAS/text() | | | | | | + | | | | | salyga.aprasymas | string | | SALYGA/APRASYMAS/text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -672,6 +693,10 @@ def test_xsd_model_one_property(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | GetTzByTRAResponse | | | /getTzByTRAResponse | | | | | | | | | | | search_parameters | string | | searchParameters/text() | | | | | | + | | | | | extracttz.extract_preparation_time | datetime | | extracttz/extractPreparationTime/text() | | | | | | + | | | | | extracttz.phipoteka | integer | | extracttz/phipoteka/text() | | | | | | + | | | | | klaida.aprasymas | string | | klaida/Aprasymas/text() | | | | | | + """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -805,6 +830,16 @@ def test_xsd_sequence_choice_sequence(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Data | | | /data | | | | | | | | | | | response_message | string | | responseMessage/text() | | | | | | + | | | | | response_data.documents1.birth_date | string | | responseData/documents/birthDate/text() | | | | | | + | | | | | response_data.documents1.last_name | string | | responseData/documents/lastName/text() | | | | | | + | | | | | response_data.documents1.first_name | string | | responseData/documents/firstName/text() | | | | | | + | | | | | response_data.documents1.code | string | | responseData/documents/code/text() | | | | | | + | | | | | response_data.documents1.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | + | | | | | response_data.documents2.business_name | string | | responseData/documents/businessName/text() | | | | | | + | | | | | response_data.documents2.code | string | | responseData/documents/code/text() | | | | | | + | | | | | response_data.documents2.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | + | | | | | response_data.statement_id | string required | | responseData/statementId/text() | | | | | | + | | | | | response_data.title | string required | | responseData/title/text() | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -923,6 +958,7 @@ def test_xsd_recursion(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Data | | | /data | | | | | | | | | | | response_message | string | | responseMessage/text() | | | | | | + | | | | | action.code | string required | | action/code/text() | | | | | | Paslaugos kodas (RC kodas) """ path = tmp_path / 'manifest.xsd' @@ -1020,7 +1056,22 @@ def test_xsd_enumeration(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Data | | | /data | | | | | | | | | | | response_message | string | | responseMessage/text() | | | | | | - + | | | | | response_data.who_may_consitute | string required | | responseData/who_may_consitute/text() | | | | | | Įgaliojimą gali sudaryti. + | | enum | | fiz | | | | | | + | | | | fiz-notarial | | | | | | + | | | | jur | | | | | | + | | | | jur-notarial | | | | | | + | | | | fiz-jur | | | | | | + | | | | fiz-notarial-jur-notarial | | | | | | + | | | | fiz-notarial-jur | | | | | | + | | | | fiz-jur-notarial | | | | | | + | | | | | response_data.default_description_editable | string required | | responseData/default_description_editable/text() | | | | | | Ar numatytasis aprašymas gali būti redaguojamas? 0 - NE, 1 - TAIP + | | enum | | 0 | | | | | | + | | | | 1 | | | | | | + | | | | | response_data.digital_service | string required | | responseData/digital_service/text() | | | | | | El. paslauga. Reikšmės: digital - Tik elektroninė paslauga, analog - Tik neelektroninė paslauga, digital-or-analog - Elektroninė arba neelektroninė paslauga + | | enum | | digital | | | | | | + | | | | analog | | | | | | + | | | | digital-or-analog | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: From 4f8803dd268a874c8c2f056baaa96b271011aa20 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 6 Jun 2024 21:47:39 +0300 Subject: [PATCH 17/62] additional_properties passing as named patrameter --- spinta/manifests/xsd/helpers.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 5e4bae625..5843301fa 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -581,12 +581,11 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, new_root_properties = self._create_model( - node_copy, source_path, - additional_properties=additional_properties) - root_properties.update(new_root_properties) - model_names.extend(returned_model_names) - + returned_model_names, new_root_properties = self._create_model( + node_copy, source_path, + additional_properties=additional_properties) + model_names.extend(returned_model_names) + root_properties.update(new_root_properties) for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) From c2575ac4dbadf30911122a22bcea1cce2a494985 Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 10 Jun 2024 15:46:58 +0300 Subject: [PATCH 18/62] removed some comments --- spinta/manifests/xsd/helpers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 5843301fa..fd0199990 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -434,6 +434,7 @@ def _get_separate_complex_type_node_by_type(self, node_type: str) -> _Element: for node in complex_types: if node.get("name") == node_type: return node + def _get_separate_complex_type_node(self, node: _Element) -> _Element: node_type: str | list = node.get('type') return self._get_separate_complex_type_node_by_type(node_type) @@ -488,7 +489,10 @@ def _properties_from_references( dict[str, dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]]: properties = {} + + # nested properties for the root model root_properties = {} + for ref_element in node.xpath("./*[@ref]"): referenced_element = self._get_referenced_node(ref_element) @@ -553,7 +557,10 @@ def _split_choice( If there are choices in the element, we need to split it and create a separate model per each choice """ + + # nested properties for the root model root_properties = {} + model_names = [] node_copy = deepcopy(node) if self._node_has_separate_complex_type(node_copy): @@ -761,7 +768,6 @@ def _create_model( if child_node.xpath(f'./*[local-name() = "complexType"]') \ or self._node_has_separate_complex_type(child_node): # check for recursion - # TODO: maybe move this to a separate function paths = new_source_path.split("/") if not child_node.get("name") in paths: _, new_root_properties = self._create_model(child_node, source_path=new_source_path) @@ -774,7 +780,6 @@ def _create_model( if properties or is_root_model: - # TODO move this nested properties thing to a function # DEALING WITH NESTED ROOT PROPERTIES --------------------- # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} From b99841d196319772de06c1fd3f999d39775ca1b6 Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 11 Jun 2024 16:31:21 +0300 Subject: [PATCH 19/62] standalone_name changed to base_name for consistency --- spinta/manifests/xsd/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index fd0199990..3119c5330 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -103,7 +103,7 @@ def __init__(self, xsd: 'XSDReader', node: _Element = None): self.node: _Element = node self.type: str = "model" self.name: str | None = None - self.standalone_name: str | None = None + self.basename: str | None = None self.external: dict | None = None self.properties: dict | None = None self.uri: str | None = None @@ -133,7 +133,7 @@ def add_external_info(self, external_name: str): } def set_name(self, name: str): - self.standalone_name = name + self.basename = name self.name = f"{self.dataset_name}/{name}" def _get_property_type(self, node: _Element) -> str: @@ -525,7 +525,7 @@ def _properties_from_references( property_type = "ref" else: referenced_element_properties = { - to_property_name(model.standalone_name): + to_property_name(model.basename): { "type": "ref", "model": f"{model.name}" From e7a7c517ffe9ad4c1bbb94618dab6edfd563c457 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 13 Jun 2024 19:13:05 +0300 Subject: [PATCH 20/62] #622 nested properties on the root models only --- spinta/manifests/xsd/helpers.py | 122 +++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 35 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 3119c5330..e14d7f427 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -108,6 +108,9 @@ def __init__(self, xsd: 'XSDReader', node: _Element = None): self.properties: dict | None = None self.uri: str | None = None self.description: str | None = None + self.root_properties: dict | None = None + self.parent_model: XSDModel | None = None + self.is_root_model: bool | None = None def get_data(self): model_data: dict = { @@ -387,7 +390,6 @@ def _get_referenced_node(self, node): referenced_node = self.root.xpath(xpath_query)[0] return referenced_node - @staticmethod def node_is_ref(node: _Element) -> bool: if node.get("ref"): @@ -483,6 +485,7 @@ def _properties_from_references( self, node: _Element, model: XSDModel, + parent_model: XSDModel, source_path: str = "" ) -> tuple[ dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], @@ -521,7 +524,11 @@ def _properties_from_references( source_path += f'/{ref_element.get("name")}' if not (XSDReader.is_array(ref_element) or is_array): - referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path) + referenced_model_names, new_root_properties = self._create_model( + referenced_element, + source_path=source_path, + parent_model=model + ) property_type = "ref" else: referenced_element_properties = { @@ -532,8 +539,12 @@ def _properties_from_references( } } property_type = "backref" - referenced_model_names, new_root_properties = self._create_model(referenced_element, source_path, - additional_properties=referenced_element_properties) + referenced_model_names, new_root_properties = self._create_model( + referenced_element, + source_path=source_path, + parent_model=model, + additional_properties=referenced_element_properties + ) root_properties.update(new_root_properties) @@ -551,6 +562,7 @@ def _split_choice( self, node: _Element, source_path: str, + parent_model: XSDModel, additional_properties: dict[str, dict[str, str | bool | dict[str, str]]] ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ @@ -588,18 +600,23 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, new_root_properties = self._create_model( - node_copy, source_path, - additional_properties=additional_properties) - model_names.extend(returned_model_names) - root_properties.update(new_root_properties) + returned_model_names, new_root_properties = self._create_model( + node_copy, + source_path=source_path, + parent_model=parent_model, + additional_properties=additional_properties) + root_properties.update(new_root_properties) + model_names.extend(returned_model_names) + for node_in_choice in choice_copy: node_in_choice = choice_node_parent.xpath(f"./*[@name=\'{node_in_choice.get('name')}\']")[0] choice_node_parent.remove(node_in_choice) else: choice_node_parent.insert(0, choice) returned_model_names, new_root_properties = self._create_model( - node_copy, source_path, + node_copy, + source_path=source_path, + parent_model=parent_model, additional_properties=additional_properties) model_names.extend(returned_model_names) root_properties.update(new_root_properties) @@ -612,12 +629,14 @@ def _create_model( node: _Element, source_path: str = "", is_root_model: bool = False, + parent_model: XSDModel = None, additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None ) -> tuple[list[str], dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. """ model = XSDModel(self) + model.parent_model = parent_model if additional_properties is None: additional_properties = {} @@ -651,7 +670,11 @@ def _create_model( choices = complex_type_node.xpath(f'./*[local-name() = "sequence"]/*[local-name() = "choice"]') if choices: if choices[0].get("maxOccurs") != "unbounded": - return self._split_choice(node, source_path, additional_properties=additional_properties) + return self._split_choice( + node, + source_path=source_path, + parent_model=model, + additional_properties=additional_properties) # if complextype node's property mixed is true, it allows text inside if complex_type_node.get("mixed") == "true": @@ -729,7 +752,10 @@ def _create_model( if not element.get("name") in paths: # this can sometimes happen when choice node has been split or maybe in some other cases too - return self._create_model(element, source_path=new_source_path) + return self._create_model( + element, + source_path=new_source_path, + parent_model=model) else: for index, path in enumerate(paths): if path == element.get("name"): @@ -744,11 +770,17 @@ def _create_model( element = sequence_or_all_node.xpath(f'./*[local-name() = "element"]')[0] element = self._get_referenced_node(element) if not is_root_model: - return self._create_model(element, source_path=new_source_path, - additional_properties=additional_properties) + return self._create_model( + element, + source_path=new_source_path, + parent_model=model, + additional_properties=additional_properties) else: - _, new_root_properties = self._create_model(element, source_path=new_source_path, - additional_properties=additional_properties) + _, new_root_properties = self._create_model( + element, + source_path=new_source_path, + parent_model=model, + additional_properties=additional_properties) root_properties.update(new_root_properties) elif sequence_or_all_node_length > 1 or properties: @@ -759,7 +791,11 @@ def _create_model( properties_required=properties_required)) # references - properties_from_references, new_root_properties = self._properties_from_references(sequence_or_all_node, model, new_source_path) + properties_from_references, new_root_properties = self._properties_from_references( + sequence_or_all_node, + model=model, + parent_model=parent_model, + source_path=new_source_path) properties.update(properties_from_references) root_properties.update(new_root_properties) @@ -770,7 +806,11 @@ def _create_model( # check for recursion paths = new_source_path.split("/") if not child_node.get("name") in paths: - _, new_root_properties = self._create_model(child_node, source_path=new_source_path) + _, new_root_properties = self._create_model( + child_node, + source_path=new_source_path, + parent_model=model + ) root_properties.update(new_root_properties) else: for index, path in enumerate(paths): @@ -778,9 +818,11 @@ def _create_model( paths[index] = f"/{path}" new_source_path = "/".join(paths) - if properties or is_root_model: + model.properties = properties - # DEALING WITH NESTED ROOT PROPERTIES --------------------- + # DEALING WITH NESTED ROOT PROPERTIES --------------------- + + if properties: # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} # add the model prefix to every property name and source @@ -788,6 +830,11 @@ def _create_model( for root_property_id, root_property in model_properties.items(): # we don't need to add refs and backrefs, only actual fields + # TODO: I don't know if we need this check. I would assume that we don't. + # But if we delete this check, then it doesn't find the referenced model + # if there is a model in between + # Example: Objektai has faktai[].faktu_naudotojai[].naudotojo_id then + # Faktu_Naudotojai has referencce to Faktai but not Objektai if not root_property.get("type") == "ref" and not root_property.get("type") == "backref": # we need to find out the name of the property that corresponds the model, @@ -811,22 +858,20 @@ def _create_model( root_property_id = f"{prefix}{array_sign}.{root_property_id}" new_root_properties[root_property_id] = root_property - if is_root_model: - properties.update(new_root_properties) - else: - new_root_properties.update(deepcopy(properties)) + model.root_properties = new_root_properties + + new_root_properties.update(deepcopy(properties)) # adding node source to the source path here, before passing further, # because we can't retrieve it later otherwise - returned_root_properties = {} - for root_property_id, root_property in new_root_properties.items(): - new_root_property = deepcopy(root_property) - - if "external" in root_property: - new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} - returned_root_properties[root_property_id] = new_root_property - - model.properties = properties + # returned_root_properties = {} + # for root_property_id, root_property in new_root_properties.items(): + # new_root_property = deepcopy(root_property) + # + # if "external" in root_property: + # new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} + # returned_root_properties[root_property_id] = new_root_property + returned_root_properties = new_root_properties model.add_external_info(external_name=new_source_path) model.description = self.get_description(node) @@ -836,10 +881,10 @@ def _create_model( model_name = f"{model.name}[]" else: model_name = model.name - # -------------------END DEALING WITH NESTED ROOT PROPERTIES return [model.name, ], {model_name: returned_root_properties} - return [], {} + + return [], root_properties def _add_resource_model(self): resource_model = XSDModel(self) @@ -848,6 +893,7 @@ def _add_resource_model(self): resource_model.description = "Įvairūs duomenys" resource_model.uri = "http://www.w3.org/2000/01/rdf-schema#Resource" resource_model.properties = resource_model.properties_from_simple_elements(self.root, from_root=True) + resource_model.root_properties = {} if resource_model.properties: resource_model.set_name(self.deduplicate(f"Resource")) self.models.append(resource_model) @@ -960,4 +1006,10 @@ def read_schema( for parsed_model in xsd.models: + # we need to add root properties to properties if it's a root model + if parsed_model.parent_model is None or parsed_model.parent_model not in xsd.models: + parsed_model.properties.update(parsed_model.root_properties) + + parsed_model.properties = dict(sorted(parsed_model.properties.items())) + yield None, parsed_model.get_data() From db8f639f25e9a4a19045106801de879252872005 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 13 Jun 2024 20:10:40 +0300 Subject: [PATCH 21/62] cleanup --- spinta/manifests/xsd/helpers.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index e14d7f427..2f97cea99 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -862,15 +862,6 @@ def _create_model( new_root_properties.update(deepcopy(properties)) - # adding node source to the source path here, before passing further, - # because we can't retrieve it later otherwise - # returned_root_properties = {} - # for root_property_id, root_property in new_root_properties.items(): - # new_root_property = deepcopy(root_property) - # - # if "external" in root_property: - # new_root_property["external"] = {"name": f"{node.get('name')}/{root_property['external']['name']}"} - # returned_root_properties[root_property_id] = new_root_property returned_root_properties = new_root_properties model.add_external_info(external_name=new_source_path) From d514ed791d641363641d7a3f0b1ec077d012d1d3 Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 18 Jun 2024 20:56:52 +0300 Subject: [PATCH 22/62] #622 no root model ir it doesn't have it's own properties --- spinta/manifests/xsd/helpers.py | 103 ++++++++++++++++++---------- tests/manifests/xsd/test_helpers.py | 10 +-- 2 files changed, 72 insertions(+), 41 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 2f97cea99..01be5758d 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -139,30 +139,6 @@ def set_name(self, name: str): self.basename = name self.name = f"{self.dataset_name}/{name}" - def _get_property_type(self, node: _Element) -> str: - if node.get("ref"): - return "ref" - property_type: str = node.get("type") - if not property_type: - # this is a self defined simple type, so we take it's base as type - restrictions: list = node.xpath(f'./*[local-name() = "simpleType"]/*[local-name() = "restriction"]') - if restrictions: - property_type = restrictions[0].get("base", "") - else: - property_type = "" - # getting rid of the prefix - if ":" in property_type: - property_type = property_type.split(":")[1] - - if property_type in self.xsd.custom_types: - property_type = self.xsd.custom_types.get(property_type).get("base", "") - if property_type in DATATYPES_MAPPING: - property_type = DATATYPES_MAPPING[property_type] - else: - property_type = "string" - - return property_type - def _get_enums(self, node: _Element) -> dict[str, dict[str, Any]]: enums = {} simple_type = node.xpath(f'./*[local-name() = "simpleType"]') @@ -186,7 +162,7 @@ def _node_to_partial_property(self, node: _Element) -> tuple[str, dict[str, str property_name = node.get("name") prop["external"] = {"name": property_name} property_id = to_property_name(property_name) - prop["type"] = self._get_property_type(node) + prop["type"] = self.xsd.get_property_type(node) if ";" in prop["type"]: prop_type, target, value = prop["type"].split(";") prop["type"] = prop_type @@ -295,7 +271,7 @@ def properties_from_simple_elements( properties[property_id] = prop return properties - def get_text_property(self, property_type = None) -> dict[str, dict[str, str | dict[str, str]]]: + def get_text_property(self, property_type=None) -> dict[str, dict[str, str | dict[str, str]]]: if property_type is None: property_type = "string" return { @@ -306,17 +282,44 @@ def get_text_property(self, property_type = None) -> dict[str, dict[str, str | d } }} + def has_non_ref_properties(self) -> bool: + return any([prop["type"] not in ("ref", "backerf") for prop in self.properties.values()]) + class XSDReader: def __init__(self, path, dataset_name: str): self._path: str = path - self.models: list[XSDModel] = [] + self.models: dict[str, XSDModel] = {} self.custom_types: dict = {} self._dataset_given_name: str = dataset_name self._set_dataset_and_resource_info() self.deduplicate: Deduplicator = Deduplicator() + def get_property_type(self, node: _Element) -> str: + if node.get("ref"): + return "ref" + property_type: str = node.get("type") + if not property_type: + # this is a self defined simple type, so we take it's base as type + restrictions: list = node.xpath(f'./*[local-name() = "simpleType"]/*[local-name() = "restriction"]') + if restrictions: + property_type = restrictions[0].get("base", "") + else: + property_type = "" + # getting rid of the prefix + if ":" in property_type: + property_type = property_type.split(":")[1] + + if property_type in self.custom_types: + property_type = self.custom_types.get(property_type).get("base", "") + if property_type in DATATYPES_MAPPING: + property_type = DATATYPES_MAPPING[property_type] + else: + property_type = "string" + + return property_type + @staticmethod def get_enums_from_simple_type(node: _Element) -> dict[str, dict[str, Any]]: enums = {} @@ -514,7 +517,10 @@ def _properties_from_references( sequence = sequences[0] else: sequence = None - if sequence is not None and len(sequence) == 1 and self.node_is_ref(sequence[0]): + + # we check for the length of sequence, because it can has more than one element, but also length of + # complexType because it can have attributes too. + if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) @@ -551,7 +557,7 @@ def _properties_from_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) - prop["external"]["name"] = "" + prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") prop["type"] = property_type prop["model"] = f"{referenced_model_name}" properties[property_id] = prop @@ -829,13 +835,14 @@ def _create_model( for model_name, model_properties in root_properties.items(): for root_property_id, root_property in model_properties.items(): - # we don't need to add refs and backrefs, only actual fields + # we don't need to add refs which don't have source (as they point to the root model then) # TODO: I don't know if we need this check. I would assume that we don't. # But if we delete this check, then it doesn't find the referenced model # if there is a model in between # Example: Objektai has faktai[].faktu_naudotojai[].naudotojo_id then # Faktu_Naudotojai has referencce to Faktai but not Objektai - if not root_property.get("type") == "ref" and not root_property.get("type") == "backref": + # if True: + if not (root_property.get("type") == "ref" and "external" not in root_property): # we need to find out the name of the property that corresponds the model, # because we need to use that if we used it in ref properties, otherwise use @@ -866,7 +873,7 @@ def _create_model( model.add_external_info(external_name=new_source_path) model.description = self.get_description(node) - self.models.append(model) + self.models[model.name] = model if additional_properties: model_name = f"{model.name}[]" @@ -887,7 +894,7 @@ def _add_resource_model(self): resource_model.root_properties = {} if resource_model.properties: resource_model.set_name(self.deduplicate(f"Resource")) - self.models.append(resource_model) + self.models[resource_model.name] = resource_model def _parse_root_node(self): for node in self.root: @@ -995,12 +1002,36 @@ def read_schema( yield None, xsd.dataset_and_resource_info - for parsed_model in xsd.models: + new_models = {} + for model_name, parsed_model in xsd.models.items(): + if parsed_model.has_non_ref_properties() and parsed_model.parent_model is not None: + new_models[model_name] = parsed_model + else: + for ref_model in xsd.models.values(): + new_properties = {} + for property_id, prop in ref_model.properties.items(): + if not (prop["type"] == "ref" and prop["model"] == model_name): + new_properties[property_id] = prop + else: + if property_id in ref_model.root_properties: + ref_model.root_properties.pop(property_id) + ref_model.properties = new_properties + + xsd.models = new_models + + for model_name, parsed_model in xsd.models.items(): # we need to add root properties to properties if it's a root model - if parsed_model.parent_model is None or parsed_model.parent_model not in xsd.models: + if parsed_model.parent_model is None or parsed_model.parent_model.name not in xsd.models: parsed_model.properties.update(parsed_model.root_properties) - + for prop_id, prop in parsed_model.root_properties.items(): + if prop["type"] == "backref": + backref_model = xsd.models[prop["model"]] + if backref_model != parsed_model: + ref_property = {to_property_name(parsed_model.basename): {"model": parsed_model.name, "type": "ref"}} + backref_model.properties.update(ref_property) parsed_model.properties = dict(sorted(parsed_model.properties.items())) + for model_name, parsed_model in xsd.models.items(): + yield None, parsed_model.get_data() diff --git a/tests/manifests/xsd/test_helpers.py b/tests/manifests/xsd/test_helpers.py index 7c73f47d4..95e169456 100644 --- a/tests/manifests/xsd/test_helpers.py +++ b/tests/manifests/xsd/test_helpers.py @@ -81,7 +81,7 @@ def test_get_property_type(): element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" @@ -104,7 +104,7 @@ def test_get_property_type_ref(): print("ELEMENT:", element) xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "ref" @@ -127,7 +127,7 @@ def test_get_property_type_simple_type(): element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" @@ -145,7 +145,7 @@ def test_get_property_type_custom(): xsd = XSDReader("test.xsd", "dataset1") xsd.custom_types = {"some_type": {"base": "string"}} model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" @@ -162,7 +162,7 @@ def test_get_property_type_unknown(): element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") model = XSDModel(xsd, schema) - result = model._get_property_type(element) + result = model.get_property_type(element) assert result == "string" From 1d4ab672e5f791563de9390c0b7ccf14ad2bdb14 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 11:55:18 +0300 Subject: [PATCH 23/62] #622 if root model has more than one ref, it has to be in --- spinta/manifests/xsd/helpers.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 01be5758d..36cc3810a 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -1002,23 +1002,6 @@ def read_schema( yield None, xsd.dataset_and_resource_info - new_models = {} - for model_name, parsed_model in xsd.models.items(): - if parsed_model.has_non_ref_properties() and parsed_model.parent_model is not None: - new_models[model_name] = parsed_model - else: - for ref_model in xsd.models.values(): - new_properties = {} - for property_id, prop in ref_model.properties.items(): - if not (prop["type"] == "ref" and prop["model"] == model_name): - new_properties[property_id] = prop - else: - if property_id in ref_model.root_properties: - ref_model.root_properties.pop(property_id) - ref_model.properties = new_properties - - xsd.models = new_models - for model_name, parsed_model in xsd.models.items(): # we need to add root properties to properties if it's a root model From 91b0cf3a03245e35717d135964fbe595e66eb2e3 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 13:51:09 +0300 Subject: [PATCH 24/62] correct source when there's a ref to the ref --- spinta/manifests/xsd/helpers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 36cc3810a..b57f46687 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -518,16 +518,18 @@ def _properties_from_references( else: sequence = None + new_referenced_element = None # we check for the length of sequence, because it can has more than one element, but also length of # complexType because it can have attributes too. if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): + if ref_element.get("name") is not None: + source_path += f'/{ref_element.get("name")}' + source_path += f'/{referenced_element.get("name")}' is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) new_referenced_element = self._get_referenced_node(complex_type[0][0]) referenced_element = new_referenced_element - if ref_element.get("name") is not None: - source_path += f'/{ref_element.get("name")}' if not (XSDReader.is_array(ref_element) or is_array): referenced_model_names, new_root_properties = self._create_model( @@ -556,8 +558,11 @@ def _properties_from_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) - prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") + if new_referenced_element is not None: + _, referenced_prop = model.simple_element_to_property(referenced_element) + prop["external"]["name"] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' + prop["type"] = property_type prop["model"] = f"{referenced_model_name}" properties[property_id] = prop From 379c268b3df7481ce54d6475fe1ed3676ef5728e Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 20:33:36 +0300 Subject: [PATCH 25/62] #622 reference and then final element working properly --- spinta/manifests/xsd/helpers.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index b57f46687..f4e526a72 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -208,7 +208,8 @@ def attributes_to_properties( def simple_element_to_property( self, element: _Element, - is_array: bool = False + is_array: bool = False, + source_path: str = None ) -> tuple[str, dict[str, str | bool | dict[str, Any]]]: """ simple element is an element which is either @@ -238,6 +239,8 @@ def simple_element_to_property( prop["external"]["name"] = ref property_id = self.deduplicate(to_property_name(ref)) prop["external"]["name"] = f'{prop["external"]["name"]}/text()' + if source_path: + prop["external"]["name"] = f'{source_path}/{prop["external"]["name"]}' if prop.get("type") == "": prop["type"] = "string" if XSDReader.is_array(element) or is_array: @@ -519,7 +522,7 @@ def _properties_from_references( sequence = None new_referenced_element = None - # we check for the length of sequence, because it can has more than one element, but also length of + # we check for the length of sequence, because it can have more than one element, but also length of # complexType because it can have attributes too. if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): if ref_element.get("name") is not None: @@ -528,9 +531,21 @@ def _properties_from_references( is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) + + previous_referenced_element_name = referenced_element.get("name") new_referenced_element = self._get_referenced_node(complex_type[0][0]) referenced_element = new_referenced_element + if self.node_is_simple_type_or_inline(referenced_element): + property_id, prop = model.simple_element_to_property( + referenced_element, + is_array=is_array, + source_path=previous_referenced_element_name) + if not XSDReader.is_required(ref_element): + prop["required"] = False + properties[property_id] = prop + continue + if not (XSDReader.is_array(ref_element) or is_array): referenced_model_names, new_root_properties = self._create_model( referenced_element, @@ -841,21 +856,17 @@ def _create_model( for root_property_id, root_property in model_properties.items(): # we don't need to add refs which don't have source (as they point to the root model then) - # TODO: I don't know if we need this check. I would assume that we don't. - # But if we delete this check, then it doesn't find the referenced model - # if there is a model in between - # Example: Objektai has faktai[].faktu_naudotojai[].naudotojo_id then - # Faktu_Naudotojai has referencce to Faktai but not Objektai - # if True: if not (root_property.get("type") == "ref" and "external" not in root_property): # we need to find out the name of the property that corresponds the model, - # because we need to use that if we used it in ref properties, otherwise use + # because we need to use that if we used it in ref properties, + # otherwise use newly created form model name prefix = None stripped_model_name = model_name.rstrip("[]") for property_id, prop in properties.items(): if "model" in prop: - property_model_name = prop.get("model").split("/")[-1] + # property_model_name = prop.get("model").split("/")[-1] + property_model_name = prop.get("model") if property_model_name == stripped_model_name: prefix = property_id break From 3cdc2bf9eacfe4af290f1d9af5ba5b1a4d8a9149 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 19 Jun 2024 22:03:32 +0300 Subject: [PATCH 26/62] #622 checking for root model for adding to xsd.models --- spinta/manifests/xsd/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index f4e526a72..990950150 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -848,7 +848,7 @@ def _create_model( # DEALING WITH NESTED ROOT PROPERTIES --------------------- - if properties: + if properties or is_root_model: # new_root_properties are to pass up to the root model and to add to it new_root_properties = {} # add the model prefix to every property name and source From fa37256adb417d9d9f8ae13f18f2471640434e6a Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 28 Jun 2024 14:12:57 +0300 Subject: [PATCH 27/62] creating references when joined through type --- spinta/manifests/xsd/helpers.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 990950150..d90dbdc4c 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -589,7 +589,8 @@ def _split_choice( node: _Element, source_path: str, parent_model: XSDModel, - additional_properties: dict[str, dict[str, str | bool | dict[str, str]]] + additional_properties: dict[str, dict[str, str | bool | dict[str, str]]], + is_root_model: bool = False ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: """ If there are choices in the element, @@ -630,7 +631,9 @@ def _split_choice( node_copy, source_path=source_path, parent_model=parent_model, - additional_properties=additional_properties) + additional_properties=additional_properties, + is_root_model=is_root_model + ) root_properties.update(new_root_properties) model_names.extend(returned_model_names) @@ -643,7 +646,9 @@ def _split_choice( node_copy, source_path=source_path, parent_model=parent_model, - additional_properties=additional_properties) + additional_properties=additional_properties, + is_root_model=is_root_model + ) model_names.extend(returned_model_names) root_properties.update(new_root_properties) @@ -700,7 +705,9 @@ def _create_model( node, source_path=source_path, parent_model=model, - additional_properties=additional_properties) + additional_properties=additional_properties, + is_root_model=is_root_model + ) # if complextype node's property mixed is true, it allows text inside if complex_type_node.get("mixed") == "true": @@ -802,7 +809,7 @@ def _create_model( parent_model=model, additional_properties=additional_properties) else: - _, new_root_properties = self._create_model( + ref_model_name, new_root_properties = self._create_model( element, source_path=new_source_path, parent_model=model, @@ -832,11 +839,12 @@ def _create_model( # check for recursion paths = new_source_path.split("/") if not child_node.get("name") in paths: - _, new_root_properties = self._create_model( + ref_model_name, new_root_properties = self._create_model( child_node, source_path=new_source_path, parent_model=model ) + properties.update({to_property_name(ref_model_name[0].split("/")[-1]): {"type": "ref", "model": ref_model_name[0], "external": {"name": child_node.get("name")}}}) root_properties.update(new_root_properties) else: for index, path in enumerate(paths): @@ -1011,6 +1019,11 @@ def read_schema( Attribute can only be turned into a property A property can also be text() + + -----------Nested properties------------- + + Root model or models can have nested properties if they have any properties that point to other models. + """ xsd = XSDReader(path, dataset_name) From ab34a0aa4098cf3044e96b8d2fef32c95226cd56 Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 28 Jun 2024 17:36:38 +0300 Subject: [PATCH 28/62] #622 creating references when joined through type --- spinta/manifests/xsd/helpers.py | 145 +++++++++++++++++++++++++++----- 1 file changed, 125 insertions(+), 20 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index d90dbdc4c..41ac699c7 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -487,11 +487,129 @@ def is_required(element: _Element) -> bool: return True return False + def _properties_from_type_references( + self, + node: _Element, + model: XSDModel, + source_path: str = "" + ) -> tuple[ + dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], + dict[str, dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]]: + + properties = {} + + # nested properties for the root model + root_properties = {} + + for typed_element in node.xpath("./*[@type]"): + + if typed_element.xpath(f'./*[local-name() = "complexType"]') \ + or self._node_has_separate_complex_type(typed_element): + + if typed_element.xpath(f'./*[local-name() = "complexType"]'): + complex_type = typed_element.xpath(f'./*[local-name() = "complexType"]')[0] + if self._node_has_separate_complex_type(typed_element): + complex_type = self._get_separate_complex_type_node(typed_element) + + is_array = False + + # TODO fix this because it probably doesn't cover all cases, only something like + # https://github.com/atviriduomenys/spinta/issues/613 + sequences = complex_type.xpath("./*[local-name() = 'sequence']") + if sequences: + sequence = sequences[0] + else: + sequence = None + + # proxy element + new_referenced_element = None + # we check for the length of sequence, because it can have more than one element, but also length of + # complexType because it can have attributes too. + + if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self._node_has_separate_complex_type(sequence[0]): + # if typed_element.get("name") is not None: + source_path += f'/{typed_element.get("name")}' + # source_path += f'/{complex_type.get("name")}' + is_array = XSDReader.is_array(complex_type) + if not is_array: + is_array = XSDReader.is_array(complex_type[0][0]) + if not is_array: + XSDReader.is_array(typed_element) + + previous_referenced_element_name = typed_element.get("name") + new_referenced_element = self._get_referenced_node(complex_type[0][0]) + + if self.node_is_simple_type_or_inline(new_referenced_element): + property_id, prop = model.simple_element_to_property( + new_referenced_element, + is_array=is_array, + source_path=previous_referenced_element_name) + if not XSDReader.is_required(typed_element): + prop["required"] = False + properties[property_id] = prop + continue + + if not is_array: + referenced_model_names, new_root_properties = self._create_model( + typed_element, + source_path=source_path, + parent_model=model + ) + property_type = "ref" + else: + referenced_element_properties = { + to_property_name(model.basename): + { + "type": "ref", + "model": f"{model.name}" + } + } + property_type = "backref" + referenced_model_names, new_root_properties = self._create_model( + typed_element, + source_path=source_path, + parent_model=model, + additional_properties=referenced_element_properties + ) + + root_properties.update(new_root_properties) + + for referenced_model_name in referenced_model_names: + property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) + prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") + if new_referenced_element is not None: + _, referenced_prop = model.simple_element_to_property(complex_type) + prop["external"]["name"] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' + + prop["type"] = property_type + prop["model"] = f"{referenced_model_name}" + properties[property_id] = prop + + # for child_node in sequence_or_all_node: + # if child_node.xpath(f'./*[local-name() = "complexType"]') \ + # or self._node_has_separate_complex_type(child_node): + # # check for recursion + # paths = new_source_path.split("/") + # if not child_node.get("name") in paths: + # ref_model_name, new_root_properties = self._create_model( + # child_node, + # source_path=new_source_path, + # parent_model=model + # ) + # properties.update({to_property_name(ref_model_name[0].split("/")[-1]): {"type": "ref", "model": ref_model_name[0], "external": {"name": child_node.get("name")}}}) + # root_properties.update(new_root_properties) + # else: + # for index, path in enumerate(paths): + # if path == child_node.get("name"): + # paths[index] = f"/{path}" + # new_source_path = "/".join(paths) + + return properties, root_properties + def _properties_from_references( self, node: _Element, model: XSDModel, - parent_model: XSDModel, source_path: str = "" ) -> tuple[ dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], @@ -827,30 +945,17 @@ def _create_model( properties_from_references, new_root_properties = self._properties_from_references( sequence_or_all_node, model=model, - parent_model=parent_model, source_path=new_source_path) properties.update(properties_from_references) root_properties.update(new_root_properties) # complex type child nodes - to models - for child_node in sequence_or_all_node: - if child_node.xpath(f'./*[local-name() = "complexType"]') \ - or self._node_has_separate_complex_type(child_node): - # check for recursion - paths = new_source_path.split("/") - if not child_node.get("name") in paths: - ref_model_name, new_root_properties = self._create_model( - child_node, - source_path=new_source_path, - parent_model=model - ) - properties.update({to_property_name(ref_model_name[0].split("/")[-1]): {"type": "ref", "model": ref_model_name[0], "external": {"name": child_node.get("name")}}}) - root_properties.update(new_root_properties) - else: - for index, path in enumerate(paths): - if path == child_node.get("name"): - paths[index] = f"/{path}" - new_source_path = "/".join(paths) + properties_from_references, new_root_properties = self._properties_from_type_references( + sequence_or_all_node, + model=model, + source_path=new_source_path) + properties.update(properties_from_references) + root_properties.update(new_root_properties) model.properties = properties From 2242833014062f26b8bf9cfc6c2df7a0c9dfd8f8 Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 28 Jun 2024 17:55:04 +0300 Subject: [PATCH 29/62] # 622 notes to self --- spinta/manifests/xsd/helpers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 41ac699c7..9e983d9a0 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -537,7 +537,7 @@ def _properties_from_type_references( XSDReader.is_array(typed_element) previous_referenced_element_name = typed_element.get("name") - new_referenced_element = self._get_referenced_node(complex_type[0][0]) + new_referenced_element = complex_type[0][0] if self.node_is_simple_type_or_inline(new_referenced_element): property_id, prop = model.simple_element_to_property( @@ -1152,3 +1152,9 @@ def read_schema( for model_name, parsed_model in xsd.models.items(): yield None, parsed_model.get_data() + + +# todo kas dar neveikia: +# actor_list[] ir dt_actor[] - abu array, turėtų būti tik vienas iš jų +# iš tikro, tai ActorList modelio neturėtų būti +# Kai kur nukerpa paskutinę raidę, ir naudoja tipo vardą vietoje elemento vardo DtNvi actor_list[] tipas ActorLis/ActorListTyp From 5493305dba376178647d1b8fa3ba1e61f78d77ef Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 4 Jul 2024 19:01:41 +0300 Subject: [PATCH 30/62] correct names for array elements --- spinta/manifests/xsd/helpers.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 9e983d9a0..5c2856cbc 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -521,11 +521,12 @@ def _properties_from_type_references( else: sequence = None - # proxy element + # proxy element (for array), we don't create a model for it, it's to indicate an array new_referenced_element = None + previous_referenced_element = None + previous_referenced_element_name = None # we check for the length of sequence, because it can have more than one element, but also length of # complexType because it can have attributes too. - if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self._node_has_separate_complex_type(sequence[0]): # if typed_element.get("name") is not None: source_path += f'/{typed_element.get("name")}' @@ -536,8 +537,10 @@ def _properties_from_type_references( if not is_array: XSDReader.is_array(typed_element) + previous_referenced_element = typed_element previous_referenced_element_name = typed_element.get("name") new_referenced_element = complex_type[0][0] + typed_element = new_referenced_element if self.node_is_simple_type_or_inline(new_referenced_element): property_id, prop = model.simple_element_to_property( @@ -576,11 +579,13 @@ def _properties_from_type_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) - prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") - if new_referenced_element is not None: - _, referenced_prop = model.simple_element_to_property(complex_type) - prop["external"]["name"] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' - + prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') + if new_referenced_element is not None and new_referenced_element.get("mixed") != "true": + _, referenced_prop = model.simple_element_to_property(new_referenced_element) + prop["external"]["name"] += f'/{referenced_prop["external"]["name"].replace("/text()", "")}' + property_id = to_property_name(previous_referenced_element_name) + if is_array: + property_id += "[]" prop["type"] = property_type prop["model"] = f"{referenced_model_name}" properties[property_id] = prop @@ -1158,3 +1163,4 @@ def read_schema( # actor_list[] ir dt_actor[] - abu array, turėtų būti tik vienas iš jų # iš tikro, tai ActorList modelio neturėtų būti # Kai kur nukerpa paskutinę raidę, ir naudoja tipo vardą vietoje elemento vardo DtNvi actor_list[] tipas ActorLis/ActorListTyp +# todo kai mixed true ir viduj vien atributai, jų neranda (88) From be16b11ab5fb8f1fa6d0f3ba334ccc966bc44ac1 Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 5 Jul 2024 14:59:43 +0300 Subject: [PATCH 31/62] #622 array source correct --- spinta/manifests/xsd/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 5c2856cbc..a2e4d5476 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -581,8 +581,8 @@ def _properties_from_type_references( property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') if new_referenced_element is not None and new_referenced_element.get("mixed") != "true": - _, referenced_prop = model.simple_element_to_property(new_referenced_element) - prop["external"]["name"] += f'/{referenced_prop["external"]["name"].replace("/text()", "")}' + _, referenced_prop = model.simple_element_to_property(previous_referenced_element) + prop["external"]["name"] = f'{previous_referenced_element_name}/{prop["external"]["name"]}' property_id = to_property_name(previous_referenced_element_name) if is_array: property_id += "[]" From 322f3ece702f0882c69ae5936f6dc3f633e3afe7 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 11 Jul 2024 14:59:07 +0300 Subject: [PATCH 32/62] #622 sources for nested properties are correct --- spinta/manifests/xsd/helpers.py | 63 ++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index a2e4d5476..c4a6b30eb 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -503,6 +503,8 @@ def _properties_from_type_references( for typed_element in node.xpath("./*[@type]"): + new_source_path = source_path + if typed_element.xpath(f'./*[local-name() = "complexType"]') \ or self._node_has_separate_complex_type(typed_element): @@ -529,7 +531,7 @@ def _properties_from_type_references( # complexType because it can have attributes too. if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self._node_has_separate_complex_type(sequence[0]): # if typed_element.get("name") is not None: - source_path += f'/{typed_element.get("name")}' + new_source_path += f'/{typed_element.get("name")}' # source_path += f'/{complex_type.get("name")}' is_array = XSDReader.is_array(complex_type) if not is_array: @@ -555,7 +557,7 @@ def _properties_from_type_references( if not is_array: referenced_model_names, new_root_properties = self._create_model( typed_element, - source_path=source_path, + source_path=new_source_path, parent_model=model ) property_type = "ref" @@ -570,7 +572,7 @@ def _properties_from_type_references( property_type = "backref" referenced_model_names, new_root_properties = self._create_model( typed_element, - source_path=source_path, + source_path=new_source_path, parent_model=model, additional_properties=referenced_element_properties ) @@ -626,6 +628,7 @@ def _properties_from_references( root_properties = {} for ref_element in node.xpath("./*[@ref]"): + new_source_path = source_path referenced_element = self._get_referenced_node(ref_element) if self.node_is_simple_type_or_inline(referenced_element): @@ -649,8 +652,8 @@ def _properties_from_references( # complexType because it can have attributes too. if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): if ref_element.get("name") is not None: - source_path += f'/{ref_element.get("name")}' - source_path += f'/{referenced_element.get("name")}' + new_source_path += f'/{ref_element.get("name")}' + new_source_path += f'/{referenced_element.get("name")}' is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) @@ -672,7 +675,7 @@ def _properties_from_references( if not (XSDReader.is_array(ref_element) or is_array): referenced_model_names, new_root_properties = self._create_model( referenced_element, - source_path=source_path, + source_path=new_source_path, parent_model=model ) property_type = "ref" @@ -687,7 +690,7 @@ def _properties_from_references( property_type = "backref" referenced_model_names, new_root_properties = self._create_model( referenced_element, - source_path=source_path, + source_path=new_source_path, parent_model=model, additional_properties=referenced_element_properties ) @@ -997,11 +1000,26 @@ def _create_model( prefix = to_property_name(stripped_model_name.split("/")[-1]) root_property_id = f"{prefix}{array_sign}.{root_property_id}" - new_root_properties[root_property_id] = root_property + + + + new_root_properties[root_property_id] = root_property model.root_properties = new_root_properties + properties_copy = deepcopy(properties) - new_root_properties.update(deepcopy(properties)) + for prop in properties_copy.values(): + if 'external' in prop: + prop['external']['name'] = f"{new_source_path}/{prop['external']['name']}" + + new_root_properties.update(properties_copy) + + # for new_root_property in new_root_properties.values(): + # updating properties sources. If it's an array, we need to keep them relative + # to the array. If not, it needs to be relative to current model. + # if not model_name.endswith('[]'): + # if 'external' in new_root_property: + # new_root_property['external']['name'] = f"{node.get('name')}/{new_root_property['external']['name']}" returned_root_properties = new_root_properties @@ -1016,6 +1034,10 @@ def _create_model( return [model.name, ], {model_name: returned_root_properties} + # for model_root_properties in root_properties.values(): + # for model_root_property in model_root_properties.values(): + # model_root_property['external']['name'] = f"{node.get('name')}/{model_root_property['external']['name']}" + return [], root_properties def _add_resource_model(self): @@ -1146,12 +1168,35 @@ def read_schema( # we need to add root properties to properties if it's a root model if parsed_model.parent_model is None or parsed_model.parent_model.name not in xsd.models: parsed_model.properties.update(parsed_model.root_properties) + model_source = parsed_model.external["name"] for prop_id, prop in parsed_model.root_properties.items(): if prop["type"] == "backref": backref_model = xsd.models[prop["model"]] if backref_model != parsed_model: ref_property = {to_property_name(parsed_model.basename): {"model": parsed_model.name, "type": "ref"}} backref_model.properties.update(ref_property) + + # root properties sources are now relative to the general root. + # We need to make them relative to what they have to be relative + # If they come after an array, they have to be relative to the source of that array. + # Otherwise, they have to be relative to the model they are in. + for prop_compare_id, prop_compare in parsed_model.root_properties.items(): + if ( + 'external' in prop and + 'external' in prop_compare and + prop['external']['name'].startswith(prop_compare['external']['name']) and + prop_compare_id.endswith('[]') and + prop['external']['name'] != prop_compare['external']['name'] + ): + prop['external']['name'] = prop['external']['name'].replace(prop_compare['external']['name'], '') + + if 'external' in prop and prop['external']['name'].startswith(model_source): + prop['external']['name'] = prop['external']['name'].replace(model_source, '') + + if 'external' in prop: + prop['external']['name'] = prop['external']['name'].lstrip('/') + + parsed_model.properties = dict(sorted(parsed_model.properties.items())) for model_name, parsed_model in xsd.models.items(): From b28c8562901558b1a34e9809efb4546fc8a10992 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 11 Jul 2024 18:08:46 +0300 Subject: [PATCH 33/62] #622 choices with maxOccurs=unbound are turned into arrays correctly --- spinta/manifests/xsd/helpers.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index c4a6b30eb..3ce62e423 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -518,6 +518,11 @@ def _properties_from_type_references( # TODO fix this because it probably doesn't cover all cases, only something like # https://github.com/atviriduomenys/spinta/issues/613 sequences = complex_type.xpath("./*[local-name() = 'sequence']") + if not sequences: + choices = complex_type.xpath("./*[local-name() = 'choice']") + if choices[0].get('maxOccurs') == 'unbounded': + sequences = choices + is_array = True if sequences: sequence = sequences[0] else: @@ -533,7 +538,8 @@ def _properties_from_type_references( # if typed_element.get("name") is not None: new_source_path += f'/{typed_element.get("name")}' # source_path += f'/{complex_type.get("name")}' - is_array = XSDReader.is_array(complex_type) + if not is_array: + is_array = XSDReader.is_array(complex_type) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) if not is_array: @@ -642,6 +648,11 @@ def _properties_from_references( # https://github.com/atviriduomenys/spinta/issues/613 complex_type = referenced_element.xpath("./*[local-name() = 'complexType']")[0] sequences = complex_type.xpath("./*[local-name() = 'sequence']") + if not sequences: + choices = complex_type.xpath("./*[local-name() = 'choice']") + if choices and choices[0].get('maxOccurs') == 'unbounded': + sequences = choices + is_array = True if sequences: sequence = sequences[0] else: @@ -654,7 +665,8 @@ def _properties_from_references( if ref_element.get("name") is not None: new_source_path += f'/{ref_element.get("name")}' new_source_path += f'/{referenced_element.get("name")}' - is_array = XSDReader.is_array(referenced_element) + if not is_array: + is_array = XSDReader.is_array(referenced_element) if not is_array: is_array = XSDReader.is_array(complex_type[0][0]) From 4bf77bd2e874b7ad33b2739fe3f0f400ee21f0ab Mon Sep 17 00:00:00 2001 From: karina Date: Sun, 14 Jul 2024 18:20:58 +0300 Subject: [PATCH 34/62] #622 generating nested properties moved seperately, not together with all properties --- spinta/manifests/xsd/helpers.py | 245 +++++++++++++------------------- 1 file changed, 102 insertions(+), 143 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 3ce62e423..981dba1d4 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -288,6 +288,11 @@ def get_text_property(self, property_type=None) -> dict[str, dict[str, str | dic def has_non_ref_properties(self) -> bool: return any([prop["type"] not in ("ref", "backerf") for prop in self.properties.values()]) + def add_ref_property(self, ref_model): + property_id = to_property_name(ref_model.basename) + prop = {"type": "ref", "model": ref_model.name} + self.properties.update({property_id: prop}) + class XSDReader: @@ -492,15 +497,10 @@ def _properties_from_type_references( node: _Element, model: XSDModel, source_path: str = "" - ) -> tuple[ - dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], - dict[str, dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]]: + ) -> dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]: properties = {} - # nested properties for the root model - root_properties = {} - for typed_element in node.xpath("./*[@type]"): new_source_path = source_path @@ -561,7 +561,7 @@ def _properties_from_type_references( continue if not is_array: - referenced_model_names, new_root_properties = self._create_model( + referenced_model_names = self._create_model( typed_element, source_path=new_source_path, parent_model=model @@ -576,15 +576,13 @@ def _properties_from_type_references( } } property_type = "backref" - referenced_model_names, new_root_properties = self._create_model( + referenced_model_names = self._create_model( typed_element, source_path=new_source_path, parent_model=model, additional_properties=referenced_element_properties ) - root_properties.update(new_root_properties) - for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') @@ -617,22 +615,17 @@ def _properties_from_type_references( # paths[index] = f"/{path}" # new_source_path = "/".join(paths) - return properties, root_properties + return properties def _properties_from_references( self, node: _Element, model: XSDModel, source_path: str = "" - ) -> tuple[ - dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]], - dict[str, dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]]: + ) -> dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]: properties = {} - # nested properties for the root model - root_properties = {} - for ref_element in node.xpath("./*[@ref]"): new_source_path = source_path referenced_element = self._get_referenced_node(ref_element) @@ -685,7 +678,7 @@ def _properties_from_references( continue if not (XSDReader.is_array(ref_element) or is_array): - referenced_model_names, new_root_properties = self._create_model( + referenced_model_names = self._create_model( referenced_element, source_path=new_source_path, parent_model=model @@ -700,15 +693,13 @@ def _properties_from_references( } } property_type = "backref" - referenced_model_names, new_root_properties = self._create_model( + referenced_model_names = self._create_model( referenced_element, source_path=new_source_path, parent_model=model, additional_properties=referenced_element_properties ) - root_properties.update(new_root_properties) - for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") @@ -720,7 +711,7 @@ def _properties_from_references( prop["model"] = f"{referenced_model_name}" properties[property_id] = prop - return properties, root_properties + return properties def _split_choice( self, @@ -729,15 +720,12 @@ def _split_choice( parent_model: XSDModel, additional_properties: dict[str, dict[str, str | bool | dict[str, str]]], is_root_model: bool = False - ) -> tuple[list[str], dict[str, str | bool | dict[str, str | dict[str, Any]]]]: + ) -> list[str]: """ If there are choices in the element, we need to split it and create a separate model per each choice """ - # nested properties for the root model - root_properties = {} - model_names = [] node_copy = deepcopy(node) if self._node_has_separate_complex_type(node_copy): @@ -772,7 +760,7 @@ def _split_choice( additional_properties=additional_properties, is_root_model=is_root_model ) - root_properties.update(new_root_properties) + model_names.extend(returned_model_names) for node_in_choice in choice_copy: @@ -780,7 +768,7 @@ def _split_choice( choice_node_parent.remove(node_in_choice) else: choice_node_parent.insert(0, choice) - returned_model_names, new_root_properties = self._create_model( + returned_model_names = self._create_model( node_copy, source_path=source_path, parent_model=parent_model, @@ -788,10 +776,9 @@ def _split_choice( is_root_model=is_root_model ) model_names.extend(returned_model_names) - root_properties.update(new_root_properties) choice_node_parent.remove(choice) - return model_names, root_properties + return model_names def _create_model( self, @@ -800,7 +787,7 @@ def _create_model( is_root_model: bool = False, parent_model: XSDModel = None, additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None - ) -> tuple[list[str], dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]]: + ) -> list[str]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. """ @@ -810,9 +797,6 @@ def _create_model( if additional_properties is None: additional_properties = {} - # properties to add to the root model - root_properties = {} - # properties of this model properties = {} properties.update(additional_properties) @@ -952,7 +936,6 @@ def _create_model( source_path=new_source_path, parent_model=model, additional_properties=additional_properties) - root_properties.update(new_root_properties) elif sequence_or_all_node_length > 1 or properties: # properties from simple type or inline elements without references @@ -962,95 +945,34 @@ def _create_model( properties_required=properties_required)) # references - properties_from_references, new_root_properties = self._properties_from_references( + properties_from_references = self._properties_from_references( sequence_or_all_node, model=model, source_path=new_source_path) properties.update(properties_from_references) - root_properties.update(new_root_properties) # complex type child nodes - to models - properties_from_references, new_root_properties = self._properties_from_type_references( + properties_from_references = self._properties_from_type_references( sequence_or_all_node, model=model, source_path=new_source_path) properties.update(properties_from_references) - root_properties.update(new_root_properties) model.properties = properties - # DEALING WITH NESTED ROOT PROPERTIES --------------------- - - if properties or is_root_model: - # new_root_properties are to pass up to the root model and to add to it - new_root_properties = {} - # add the model prefix to every property name and source - for model_name, model_properties in root_properties.items(): - for root_property_id, root_property in model_properties.items(): - - # we don't need to add refs which don't have source (as they point to the root model then) - if not (root_property.get("type") == "ref" and "external" not in root_property): - - # we need to find out the name of the property that corresponds the model, - # because we need to use that if we used it in ref properties, - # otherwise use newly created form model name - prefix = None - stripped_model_name = model_name.rstrip("[]") - for property_id, prop in properties.items(): - if "model" in prop: - # property_model_name = prop.get("model").split("/")[-1] - property_model_name = prop.get("model") - if property_model_name == stripped_model_name: - prefix = property_id - break - - array_sign = "" - if prefix is None: - if model_name.endswith("[]"): - array_sign = "[]" - - prefix = to_property_name(stripped_model_name.split("/")[-1]) - - root_property_id = f"{prefix}{array_sign}.{root_property_id}" - - - - new_root_properties[root_property_id] = root_property - - model.root_properties = new_root_properties - properties_copy = deepcopy(properties) - - for prop in properties_copy.values(): - if 'external' in prop: - prop['external']['name'] = f"{new_source_path}/{prop['external']['name']}" - - new_root_properties.update(properties_copy) - - # for new_root_property in new_root_properties.values(): - # updating properties sources. If it's an array, we need to keep them relative - # to the array. If not, it needs to be relative to current model. - # if not model_name.endswith('[]'): - # if 'external' in new_root_property: - # new_root_property['external']['name'] = f"{node.get('name')}/{new_root_property['external']['name']}" - - returned_root_properties = new_root_properties + if properties: model.add_external_info(external_name=new_source_path) model.description = self.get_description(node) self.models[model.name] = model - if additional_properties: - model_name = f"{model.name}[]" - else: - model_name = model.name - - return [model.name, ], {model_name: returned_root_properties} + return [model.name, ] # for model_root_properties in root_properties.values(): # for model_root_property in model_root_properties.values(): # model_root_property['external']['name'] = f"{node.get('name')}/{model_root_property['external']['name']}" - return [], root_properties + return [] def _add_resource_model(self): resource_model = XSDModel(self) @@ -1080,6 +1002,84 @@ def start(self): self._parse_root_node() + def _add_model_nested_properties(self, root_model: XSDModel, model: XSDModel, property_prefix: str = "", source_path: str = ""): + """recursively gather nested properties or root model""" + # go orward, add property prefix, which is constructed rom properties that came rom beore models, and construct pathh orward also + # probably will need to cut beginning for path sometimes + + source_path = source_path.lstrip("/") + + root_properties = {} + + properties = deepcopy(model.properties) + + for property_id, prop in properties.items(): + + if (model != root_model and + not ("model" in prop and prop["model"] == model.parent_model.name)): + + # update property source and name and add it to the root properties + if property_prefix: + property_id = f"{property_prefix}.{property_id}" + + if "external" in prop and source_path: + prop["external"]["name"] = f"{source_path}/{prop['external']['name']}" + + root_properties[property_id] = prop + + # if this property is ref or backref, gather the properties of the model to which it points + # (if it's not the root model or back pointing ref) + if "model" in prop: + ref_model = self.models[prop['model']] + + # there are two types of ref - direct, and for backref. We don't want to traverse the ones or backref, + # because it will create an infinite loop. + # If it's a ref, we need to build the path from the root of the model. If it's backref (array) - + # it's relative to array + if prop["type"] == "ref": + has_backref = False + for ref_model_property in ref_model.properties.values(): + if (ref_model_property.get("type") == "backref") and (ref_model_property.get('model') == model.name): + has_backref = True + break + if has_backref: + continue + if source_path: + new_source_path = f"{source_path}/{ref_model.external['name'].replace(model.external['name'], '').lstrip('/')}" + else: + new_source_path = ref_model.external['name'].replace(model.external['name'], '') + new_source_path = new_source_path.lstrip('/') + + # property type is backref + else: + new_source_path = "" + + self._add_model_nested_properties(root_model, ref_model, property_prefix=property_id, source_path=new_source_path) + + root_model.properties.update(root_properties) + + def compile_nested_properties(self): + for model_name, parsed_model in self.models.items(): + + # we need to add root properties to properties if it's a root model + if parsed_model.parent_model is None or parsed_model.parent_model.name not in self.models: + + self._add_model_nested_properties(parsed_model, parsed_model) + + # parsed_model.properties.update(nested_properties) + + # if some nested properties are backrefs and still don't have refs + # (in case o indirect links), we need to add them + + for prop in parsed_model.properties.values(): + if prop.get("type") == "backref": + ref_model_name = prop.get("model") + ref_model = self.models[ref_model_name] + if ref_model: + ref_model.add_ref_property(parsed_model) + + parsed_model.properties = dict(sorted(parsed_model.properties.items())) + def read_schema( context: Context, @@ -1175,49 +1175,8 @@ def read_schema( yield None, xsd.dataset_and_resource_info - for model_name, parsed_model in xsd.models.items(): - - # we need to add root properties to properties if it's a root model - if parsed_model.parent_model is None or parsed_model.parent_model.name not in xsd.models: - parsed_model.properties.update(parsed_model.root_properties) - model_source = parsed_model.external["name"] - for prop_id, prop in parsed_model.root_properties.items(): - if prop["type"] == "backref": - backref_model = xsd.models[prop["model"]] - if backref_model != parsed_model: - ref_property = {to_property_name(parsed_model.basename): {"model": parsed_model.name, "type": "ref"}} - backref_model.properties.update(ref_property) - - # root properties sources are now relative to the general root. - # We need to make them relative to what they have to be relative - # If they come after an array, they have to be relative to the source of that array. - # Otherwise, they have to be relative to the model they are in. - for prop_compare_id, prop_compare in parsed_model.root_properties.items(): - if ( - 'external' in prop and - 'external' in prop_compare and - prop['external']['name'].startswith(prop_compare['external']['name']) and - prop_compare_id.endswith('[]') and - prop['external']['name'] != prop_compare['external']['name'] - ): - prop['external']['name'] = prop['external']['name'].replace(prop_compare['external']['name'], '') - - if 'external' in prop and prop['external']['name'].startswith(model_source): - prop['external']['name'] = prop['external']['name'].replace(model_source, '') - - if 'external' in prop: - prop['external']['name'] = prop['external']['name'].lstrip('/') - - - parsed_model.properties = dict(sorted(parsed_model.properties.items())) + xsd.compile_nested_properties() for model_name, parsed_model in xsd.models.items(): yield None, parsed_model.get_data() - - -# todo kas dar neveikia: -# actor_list[] ir dt_actor[] - abu array, turėtų būti tik vienas iš jų -# iš tikro, tai ActorList modelio neturėtų būti -# Kai kur nukerpa paskutinę raidę, ir naudoja tipo vardą vietoje elemento vardo DtNvi actor_list[] tipas ActorLis/ActorListTyp -# todo kai mixed true ir viduj vien atributai, jų neranda (88) From ebd4b812582ba07092d42c405d7c7e56f7040913 Mon Sep 17 00:00:00 2001 From: karina Date: Sun, 14 Jul 2024 19:53:06 +0300 Subject: [PATCH 35/62] #622 void source or backres and avoiding recursion --- spinta/manifests/xsd/helpers.py | 35 +++---- tests/manifests/xsd/test_xsd.py | 166 +++++++++++++++----------------- 2 files changed, 91 insertions(+), 110 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 981dba1d4..9a51e034b 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -560,6 +560,10 @@ def _properties_from_type_references( properties[property_id] = prop continue + # avoiding recursion + if typed_element.get("name") in source_path.split("/"): + continue + if not is_array: referenced_model_names = self._create_model( typed_element, @@ -594,27 +598,11 @@ def _properties_from_type_references( property_id += "[]" prop["type"] = property_type prop["model"] = f"{referenced_model_name}" + # backrefs don't have to have source + if property_type == "backref": + prop['external'] = {} properties[property_id] = prop - # for child_node in sequence_or_all_node: - # if child_node.xpath(f'./*[local-name() = "complexType"]') \ - # or self._node_has_separate_complex_type(child_node): - # # check for recursion - # paths = new_source_path.split("/") - # if not child_node.get("name") in paths: - # ref_model_name, new_root_properties = self._create_model( - # child_node, - # source_path=new_source_path, - # parent_model=model - # ) - # properties.update({to_property_name(ref_model_name[0].split("/")[-1]): {"type": "ref", "model": ref_model_name[0], "external": {"name": child_node.get("name")}}}) - # root_properties.update(new_root_properties) - # else: - # for index, path in enumerate(paths): - # if path == child_node.get("name"): - # paths[index] = f"/{path}" - # new_source_path = "/".join(paths) - return properties def _properties_from_references( @@ -677,6 +665,10 @@ def _properties_from_references( properties[property_id] = prop continue + # avoiding recursion + if referenced_element.get("name") in source_path.split("/"): + continue + if not (XSDReader.is_array(ref_element) or is_array): referenced_model_names = self._create_model( referenced_element, @@ -709,6 +701,11 @@ def _properties_from_references( prop["type"] = property_type prop["model"] = f"{referenced_model_name}" + + # backrefs don't have to have source + if property_type == "backref": + prop['external'] = {} + properties[property_id] = prop return properties diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index 1a63243c8..a36478439 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -143,81 +143,81 @@ def test_xsd(rc: RawConfig, tmp_path: Path): """ table = """ -d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description -manifest | | | | | | | | | - | resource1 | xml | | | | | | | | - | | | | | | | | | - | | | Administracinis | | | /ADMINISTRACINIAI/ADMINISTRACINIS | | | | | | - | | | | adm_kodas | integer required | | ADM_KODAS/text() | | | | | | - | | | | adm_id | integer required | | ADM_ID/text() | | | | | | - | | | | tipas | string required | | TIPAS/text() | | | | | | - | | | | tipo_santrumpa | string required | | TIPO_SANTRUMPA/text() | | | | | | - | | | | vardas_k | string required | | VARDAS_K/text() | | | | | | - | | | | vardas_k_lot | string required | | VARDAS_K_LOT/text() | | | | | | - | | | | priklauso_kodas | integer required | | PRIKLAUSO_KODAS/text() | | | | | | - | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | - | | | | nuo | date required | | NUO/text() | | | | | | - | | | | iki | date required | | IKI/text() | | | | | | - | | | | adm_nuo | date required | | ADM_NUO/text() | | | | | | - | | | | adm_iki | date required | | ADM_IKI/text() | | | | | | - | | | | | | | | | - | | | Gyvenviete | | | /GYVENVIETES/GYVENVIETE | | | | | | - | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | - | | | | gyv_id | integer required | | GYV_ID/text() | | | | | | - | | | | tipas | string required | | TIPAS/text() | | | | | | - | | | | tipo_santrumpa | string required | | TIPO_SANTRUMPA/text() | | | | | | - | | | | vardas_v | string required | | VARDAS_V/text() | | | | | | - | | | | vardas_v_lot | string required | | VARDAS_V_LOT/text() | | | | | | - | | | | vardas_k | string required | | VARDAS_K/text() | | | | | | - | | | | vardas_k_lot | string required | | VARDAS_K_LOT/text() | | | | | | - | | | | adm_kodas | integer required | | ADM_KODAS/text() | | | | | | - | | | | nuo | date required | | NUO/text() | | | | | | - | | | | iki | date required | | IKI/text() | | | | | | - | | | | gyv_nuo | date required | | GYV_NUO/text() | | | | | | - | | | | gyv_iki | date required | | GYV_IKI/text() | | | | | | - | | | | | | | | | - | | | Gatve | | | /GATVES/GATVE | | | | | | - | | | | gat_kodas | integer required | | GAT_KODAS/text() | | | | | | - | | | | gat_id | integer required | | GAT_ID/text() | | | | | | - | | | | tipas | string required | | TIPAS/text() | | | | | | - | | | | tipo_santrumpa | string required | | TIPO_SANTRUMPA/text() | | | | | | - | | | | vardas_k | string required | | VARDAS_K/text() | | | | | | - | | | | vardas_k_lot | string required | | VARDAS_K_LOT/text() | | | | | | - | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | - | | | | nuo | date required | | NUO/text() | | | | | | - | | | | iki | date required | | IKI/text() | | | | | | - | | | | gat_nuo | date required | | GAT_NUO/text() | | | | | | - | | | | gat_iki | date required | | GAT_IKI/text() | | | | | | - | | | | | | | | | - | | | Adresas | | | /ADRESAI/ADRESAS | | | | | | - | | | | aob_kodas | integer required | | AOB_KODAS/text() | | | | | | - | | | | aob_id | integer required | | AOB_ID/text() | | | | | | - | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | - | | | | gat_kodas | integer required | | GAT_KODAS/text() | | | | | | - | | | | nr | string required | | NR/text() | | | | | | - | | | | korpuso_nr | string required | | KORPUSO_NR/text() | | | | | | - | | | | pasto_kodas | string required | | PASTO_KODAS/text() | | | | | | - | | | | nuo | date required | | NUO/text() | | | | | | - | | | | iki | date required | | IKI/text() | | | | | | - | | | | aob_nuo | date required | | AOB_NUO/text() | | | | | | - | | | | aob_iki | date required | | AOB_IKI/text() | | | | | | - | | | | | | | | | - | | | Patalpa | | | /PATALPOS/PATALPA | | | | | | - | | | | pat_kodas | integer required | | PAT_KODAS/text() | | | | | | - | | | | pat_id | integer required | | PAT_ID/text() | | | | | | - | | | | aob_kodas | integer required | | AOB_KODAS/text() | | | | | | - | | | | patalpos_nr | string required | | PATALPOS_NR/text() | | | | | | - | | | | nuo | date required | | NUO/text() | | | | | | - | | | | iki | date required | | IKI/text() | | | | | | - | | | | pat_nuo | date required | | PAT_NUO/text() | | | | | | - | | | | pat_iki | date required | | PAT_IKI/text() | | | | | | - | | | | | | | | | - | | | Kodas | | | /KODAI/KODAS | | | | | | - | | | | pasto_kodas | string required | | PASTO_KODAS/text() | | | | | | - | | | | pasto_viet_pav | string required | | PASTO_VIET_PAV/text() | | | | | | - | | | | nuo | date required | | NUO/text() | | | | | | - | | | | iki | date required | | IKI/text() | | | | | | - """ + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Administracinis | | | /ADMINISTRACINIAI/ADMINISTRACINIS | | | | | | + | | | | | adm_id | integer required | | ADM_ID/text() | | | | | | + | | | | | adm_iki | date required | | ADM_IKI/text() | | | | | | + | | | | | adm_kodas | integer required | | ADM_KODAS/text() | | | | | | + | | | | | adm_nuo | date required | | ADM_NUO/text() | | | | | | + | | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | + | | | | | iki | date required | | IKI/text() | | | | | | + | | | | | nuo | date required | | NUO/text() | | | | | | + | | | | | priklauso_kodas | integer required | | PRIKLAUSO_KODAS/text() | | | | | | + | | | | | tipas | string required | | TIPAS/text() | | | | | | + | | | | | tipo_santrumpa | string required | | TIPO_SANTRUMPA/text() | | | | | | + | | | | | vardas_k | string required | | VARDAS_K/text() | | | | | | + | | | | | vardas_k_lot | string required | | VARDAS_K_LOT/text() | | | | | | + | | | | | | | | | | + | | | | Gyvenviete | | | /GYVENVIETES/GYVENVIETE | | | | | | + | | | | | adm_kodas | integer required | | ADM_KODAS/text() | | | | | | + | | | | | gyv_id | integer required | | GYV_ID/text() | | | | | | + | | | | | gyv_iki | date required | | GYV_IKI/text() | | | | | | + | | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | + | | | | | gyv_nuo | date required | | GYV_NUO/text() | | | | | | + | | | | | iki | date required | | IKI/text() | | | | | | + | | | | | nuo | date required | | NUO/text() | | | | | | + | | | | | tipas | string required | | TIPAS/text() | | | | | | + | | | | | tipo_santrumpa | string required | | TIPO_SANTRUMPA/text() | | | | | | + | | | | | vardas_k | string required | | VARDAS_K/text() | | | | | | + | | | | | vardas_k_lot | string required | | VARDAS_K_LOT/text() | | | | | | + | | | | | vardas_v | string required | | VARDAS_V/text() | | | | | | + | | | | | vardas_v_lot | string required | | VARDAS_V_LOT/text() | | | | | | + | | | | | | | | | | + | | | | Gatve | | | /GATVES/GATVE | | | | | | + | | | | | gat_id | integer required | | GAT_ID/text() | | | | | | + | | | | | gat_iki | date required | | GAT_IKI/text() | | | | | | + | | | | | gat_kodas | integer required | | GAT_KODAS/text() | | | | | | + | | | | | gat_nuo | date required | | GAT_NUO/text() | | | | | | + | | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | + | | | | | iki | date required | | IKI/text() | | | | | | + | | | | | nuo | date required | | NUO/text() | | | | | | + | | | | | tipas | string required | | TIPAS/text() | | | | | | + | | | | | tipo_santrumpa | string required | | TIPO_SANTRUMPA/text() | | | | | | + | | | | | vardas_k | string required | | VARDAS_K/text() | | | | | | + | | | | | vardas_k_lot | string required | | VARDAS_K_LOT/text() | | | | | | + | | | | | | | | | | + | | | | Adresas | | | /ADRESAI/ADRESAS | | | | | | + | | | | | aob_id | integer required | | AOB_ID/text() | | | | | | + | | | | | aob_iki | date required | | AOB_IKI/text() | | | | | | + | | | | | aob_kodas | integer required | | AOB_KODAS/text() | | | | | | + | | | | | aob_nuo | date required | | AOB_NUO/text() | | | | | | + | | | | | gat_kodas | integer required | | GAT_KODAS/text() | | | | | | + | | | | | gyv_kodas | integer required | | GYV_KODAS/text() | | | | | | + | | | | | iki | date required | | IKI/text() | | | | | | + | | | | | korpuso_nr | string required | | KORPUSO_NR/text() | | | | | | + | | | | | nr | string required | | NR/text() | | | | | | + | | | | | nuo | date required | | NUO/text() | | | | | | + | | | | | pasto_kodas | string required | | PASTO_KODAS/text() | | | | | | + | | | | | | | | | | + | | | | Patalpa | | | /PATALPOS/PATALPA | | | | | | + | | | | | aob_kodas | integer required | | AOB_KODAS/text() | | | | | | + | | | | | iki | date required | | IKI/text() | | | | | | + | | | | | nuo | date required | | NUO/text() | | | | | | + | | | | | pat_id | integer required | | PAT_ID/text() | | | | | | + | | | | | pat_iki | date required | | PAT_IKI/text() | | | | | | + | | | | | pat_kodas | integer required | | PAT_KODAS/text() | | | | | | + | | | | | pat_nuo | date required | | PAT_NUO/text() | | | | | | + | | | | | patalpos_nr | string required | | PATALPOS_NR/text() | | | | | | + | | | | | | | | | | + | | | | Kodas | | | /KODAI/KODAS | | | | | | + | | | | | iki | date required | | IKI/text() | | | | | | + | | | | | nuo | date required | | NUO/text() | | | | | | + | | | | | pasto_kodas | string required | | PASTO_KODAS/text() | | | | | | + | | | | | pasto_viet_pav | string required | | PASTO_VIET_PAV/text() | | | | | | +""" path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: xsd_file.write(xsd) @@ -273,22 +273,6 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Asmuo | | | /klientu_saraso_rezultatas/asmuo | | | | | | - | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | - | | | | | id | string required | | @id | | | | | | - | | | | | ak | string required | | @ak | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | - | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | asmenys[] | backref | Asmuo | | | | | | | - | | | | | asmuo[].id | string required | | asmuo/@id | | | | | | - | | | | | asmuo[].ak | string required | | asmuo/@ak | | | | | | - | | | | | asmuo[].text | string | | asmuo/text() | | | | | | """ path = tmp_path / 'manifest.xsd' From 2c98fc7828f9286e46bde8501ea558b6a6690e2a Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 16 Jul 2024 10:28:08 +0300 Subject: [PATCH 36/62] backref source removal --- spinta/manifests/xsd/helpers.py | 18 +++++----- tests/manifests/xsd/test_xsd.py | 59 +++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 9a51e034b..2cfc52382 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -599,8 +599,8 @@ def _properties_from_type_references( prop["type"] = property_type prop["model"] = f"{referenced_model_name}" # backrefs don't have to have source - if property_type == "backref": - prop['external'] = {} + if property_type == 'backref': + del prop["external"] properties[property_id] = prop return properties @@ -694,17 +694,17 @@ def _properties_from_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) - prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") + prop['external']['name'] = prop['external']['name'].rstrip('/text()') if new_referenced_element is not None: _, referenced_prop = model.simple_element_to_property(referenced_element) - prop["external"]["name"] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' + prop['external']['name'] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' - prop["type"] = property_type - prop["model"] = f"{referenced_model_name}" + prop['type'] = property_type + prop['model'] = f'{referenced_model_name}' # backrefs don't have to have source - if property_type == "backref": - prop['external'] = {} + if property_type == 'backref': + del prop['external'] properties[property_id] = prop @@ -928,7 +928,7 @@ def _create_model( parent_model=model, additional_properties=additional_properties) else: - ref_model_name, new_root_properties = self._create_model( + ref_model_name = self._create_model( element, source_path=new_source_path, parent_model=model, diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index a36478439..59032a10a 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -273,6 +273,32 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): """ table = """ + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | + | | | | | ak | string required | | @ak | | | | | | + | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | id | string required | | @id | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | asmuo[] | backref | Asmuo | | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | + | | | | | asmenys | ref | Asmenys | asmenys | | | | | | + | | | | | asmenys.asmuo[] | backref | Asmuo | | | | | | | + | | | | | asmenys.asmuo[].ak | string required | | @ak | | | | | | + | | | | | asmenys.asmuo[].id | string required | | @id | | | | | | + | | | | | asmenys.asmuo[].text | string | | text() | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + | | | | | text | string | | text() | | | | | | + """ path = tmp_path / 'manifest.xsd' @@ -315,23 +341,22 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): """ table = """ -id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Resource | | | / | | | | http://www.w3.org/2000/01/rdf-schema#Resource | | Įvairūs duomenys - | | | | | klaida | string | | klaida/text() | | | | | | Klaidos atveju - klaidos pranešimas - | | | | | | | | | | - | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | - | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | - | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | asmenys | ref | Asmenys | | | | | | | - | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris - | | | | | asmenys.text | string | | asmenys/text() | | | | | | - + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Resource | | | / | | | | http://www.w3.org/2000/01/rdf-schema#Resource | | Įvairūs duomenys + | | | | | klaida | string | | klaida/text() | | | | | | Klaidos atveju - klaidos pranešimas + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | + | | | | | asmenys | ref | Asmenys | asmenys | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + | | | | | text | string | | text() | | | | | | """ path = tmp_path / 'manifest.xsd' From 3ce8ab6cf50f54a2dc35af8ed00e50710eedf70e Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 16 Jul 2024 10:28:08 +0300 Subject: [PATCH 37/62] #622 backref source removal --- spinta/manifests/xsd/helpers.py | 18 +++++----- tests/manifests/xsd/test_xsd.py | 59 +++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 9a51e034b..2cfc52382 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -599,8 +599,8 @@ def _properties_from_type_references( prop["type"] = property_type prop["model"] = f"{referenced_model_name}" # backrefs don't have to have source - if property_type == "backref": - prop['external'] = {} + if property_type == 'backref': + del prop["external"] properties[property_id] = prop return properties @@ -694,17 +694,17 @@ def _properties_from_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) - prop["external"]["name"] = prop["external"]["name"].rstrip("/text()") + prop['external']['name'] = prop['external']['name'].rstrip('/text()') if new_referenced_element is not None: _, referenced_prop = model.simple_element_to_property(referenced_element) - prop["external"]["name"] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' + prop['external']['name'] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' - prop["type"] = property_type - prop["model"] = f"{referenced_model_name}" + prop['type'] = property_type + prop['model'] = f'{referenced_model_name}' # backrefs don't have to have source - if property_type == "backref": - prop['external'] = {} + if property_type == 'backref': + del prop['external'] properties[property_id] = prop @@ -928,7 +928,7 @@ def _create_model( parent_model=model, additional_properties=additional_properties) else: - ref_model_name, new_root_properties = self._create_model( + ref_model_name = self._create_model( element, source_path=new_source_path, parent_model=model, diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index a36478439..59032a10a 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -273,6 +273,32 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): """ table = """ + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | + | | | | | ak | string required | | @ak | | | | | | + | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | id | string required | | @id | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | asmuo[] | backref | Asmuo | | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | + | | | | | asmenys | ref | Asmenys | asmenys | | | | | | + | | | | | asmenys.asmuo[] | backref | Asmuo | | | | | | | + | | | | | asmenys.asmuo[].ak | string required | | @ak | | | | | | + | | | | | asmenys.asmuo[].id | string required | | @id | | | | | | + | | | | | asmenys.asmuo[].text | string | | text() | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + | | | | | text | string | | text() | | | | | | + """ path = tmp_path / 'manifest.xsd' @@ -315,23 +341,22 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): """ table = """ -id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Resource | | | / | | | | http://www.w3.org/2000/01/rdf-schema#Resource | | Įvairūs duomenys - | | | | | klaida | string | | klaida/text() | | | | | | Klaidos atveju - klaidos pranešimas - | | | | | | | | | | - | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | - | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | - | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | asmenys | ref | Asmenys | | | | | | | - | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris - | | | | | asmenys.text | string | | asmenys/text() | | | | | | - + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Resource | | | / | | | | http://www.w3.org/2000/01/rdf-schema#Resource | | Įvairūs duomenys + | | | | | klaida | string | | klaida/text() | | | | | | Klaidos atveju - klaidos pranešimas + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | + | | | | | asmenys | ref | Asmenys | asmenys | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + | | | | | text | string | | text() | | | | | | """ path = tmp_path / 'manifest.xsd' From 738addb4d05d59f902747dbec78997fe208cf872 Mon Sep 17 00:00:00 2001 From: karina Date: Sun, 21 Jul 2024 17:35:06 +0300 Subject: [PATCH 38/62] connecting models with types working --- spinta/manifests/xsd/helpers.py | 79 ++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 25 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 2cfc52382..319c2d5b2 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -520,7 +520,7 @@ def _properties_from_type_references( sequences = complex_type.xpath("./*[local-name() = 'sequence']") if not sequences: choices = complex_type.xpath("./*[local-name() = 'choice']") - if choices[0].get('maxOccurs') == 'unbounded': + if choices and choices[0].get('maxOccurs') == 'unbounded': sequences = choices is_array = True if sequences: @@ -534,25 +534,35 @@ def _properties_from_type_references( previous_referenced_element_name = None # we check for the length of sequence, because it can have more than one element, but also length of # complexType because it can have attributes too. - if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self._node_has_separate_complex_type(sequence[0]): + if ( + sequence is not None and + len(sequence) == 1 and + # len(complex_type) == 1 and + (len(complex_type) == 1 or (len(complex_type) == 2 and len(complex_type.xpath("./*[local-name() = 'annotation']")) > 0)) and + self._node_has_separate_complex_type(sequence[0]) + ): # if typed_element.get("name") is not None: new_source_path += f'/{typed_element.get("name")}' # source_path += f'/{complex_type.get("name")}' - if not is_array: - is_array = XSDReader.is_array(complex_type) - if not is_array: - is_array = XSDReader.is_array(complex_type[0][0]) - if not is_array: - XSDReader.is_array(typed_element) + is_array = ( + is_array or + XSDReader.is_array(complex_type) or + XSDReader.is_array(complex_type[0][0]) or + XSDReader.is_array(typed_element) or + XSDReader.is_array(sequence[0]) + ) previous_referenced_element = typed_element previous_referenced_element_name = typed_element.get("name") - new_referenced_element = complex_type[0][0] - typed_element = new_referenced_element + complex_type = self._get_separate_complex_type_node(sequence[0]) + + # TODO: we probably don't need both + new_referenced_element = sequence[0] + typed_element = sequence[0] if self.node_is_simple_type_or_inline(new_referenced_element): property_id, prop = model.simple_element_to_property( - new_referenced_element, + typed_element, is_array=is_array, source_path=previous_referenced_element_name) if not XSDReader.is_required(typed_element): @@ -626,35 +636,51 @@ def _properties_from_references( else: is_array = False # TODO fix this because it probably doesn't cover all cases, only something like + # also it covers choice now. # https://github.com/atviriduomenys/spinta/issues/613 + complex_type = referenced_element.xpath("./*[local-name() = 'complexType']")[0] sequences = complex_type.xpath("./*[local-name() = 'sequence']") if not sequences: choices = complex_type.xpath("./*[local-name() = 'choice']") - if choices and choices[0].get('maxOccurs') == 'unbounded': + if choices and XSDReader.is_array(choices[0]): sequences = choices - is_array = True + # we only make this array if it's only one choice, which means it's a wrapper for an array + # also, if it's mixed, and has choices inside, it's not an array even if choices are unbound + if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": + is_array = True + # is_array = True if sequences: sequence = sequences[0] else: sequence = None new_referenced_element = None + + # if we only have one ref element and if it's inside a choice/sequence (this node) which is maxOccurs = unbounded then it's array + if XSDReader.is_array(node) and len(node) == 1: + is_array = True + + # if it's a proxy model, we don't create it, but make reference to the next model # we check for the length of sequence, because it can have more than one element, but also length of # complexType because it can have attributes too. - if sequence is not None and len(sequence) == 1 and len(complex_type) == 1 and self.node_is_ref(sequence[0]): + if ( + sequence is not None and + len(sequence) == 1 and + len(complex_type) == 1 and + self.node_is_ref(sequence[0]) and + not complex_type.get("mixed") == "true" + ): if ref_element.get("name") is not None: new_source_path += f'/{ref_element.get("name")}' new_source_path += f'/{referenced_element.get("name")}' - if not is_array: - is_array = XSDReader.is_array(referenced_element) - if not is_array: - is_array = XSDReader.is_array(complex_type[0][0]) previous_referenced_element_name = referenced_element.get("name") new_referenced_element = self._get_referenced_node(complex_type[0][0]) referenced_element = new_referenced_element + is_array = is_array or XSDReader.is_array(referenced_element) or XSDReader.is_array(complex_type[0][0]) or XSDReader.is_array(sequence) + if self.node_is_simple_type_or_inline(referenced_element): property_id, prop = model.simple_element_to_property( referenced_element, @@ -927,12 +953,6 @@ def _create_model( source_path=new_source_path, parent_model=model, additional_properties=additional_properties) - else: - ref_model_name = self._create_model( - element, - source_path=new_source_path, - parent_model=model, - additional_properties=additional_properties) elif sequence_or_all_node_length > 1 or properties: # properties from simple type or inline elements without references @@ -1001,7 +1021,7 @@ def start(self): def _add_model_nested_properties(self, root_model: XSDModel, model: XSDModel, property_prefix: str = "", source_path: str = ""): """recursively gather nested properties or root model""" - # go orward, add property prefix, which is constructed rom properties that came rom beore models, and construct pathh orward also + # go forward, add property prefix, which is constructed rom properties that came rom beore models, and construct pathh orward also # probably will need to cut beginning for path sometimes source_path = source_path.lstrip("/") @@ -1165,6 +1185,15 @@ def read_schema( Root model or models can have nested properties if they have any properties that point to other models. + + TODO: there are 3 types of creating references: + through ref + through type + direct, when one element that corresponds to a model is inside another one. This still doesn't work. + It also seems that it even stopped adding models if they are connected this way. + + TODO: JADIS 455 sukuria Asmuo modelį, bet ne property jam modelyje Israsas + """ xsd = XSDReader(path, dataset_name) From 4050c44109e66b9b72e02c9fd0789ad6ace83111 Mon Sep 17 00:00:00 2001 From: karina Date: Sun, 21 Jul 2024 19:17:14 +0300 Subject: [PATCH 39/62] model connection trough type fixed --- spinta/manifests/xsd/helpers.py | 86 ++++++++------------------------- 1 file changed, 21 insertions(+), 65 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 319c2d5b2..a7b4fc3a8 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -501,7 +501,8 @@ def _properties_from_type_references( properties = {} - for typed_element in node.xpath("./*[@type]"): + for typed_element in node.xpath('./*[local-name() = "element"]'): + # for typed_element in node.xpath("./*[@type]"): new_source_path = source_path @@ -849,7 +850,7 @@ def _create_model( return self._split_choice( node, source_path=source_path, - parent_model=model, + parent_model=parent_model, additional_properties=additional_properties, is_root_model=is_root_model ) @@ -911,69 +912,24 @@ def _create_model( sequence_or_all_node = sequence_or_all_nodes[0] else: sequence_or_all_node = complex_type_node - sequence_or_all_node_length = len(sequence_or_all_node) - # There is only one element in the complex node sequence, and it doesn't have annotation. - # Then we just go deeper and add this model to the next model's path. - if sequence_or_all_node_length == 1 and not properties: - - if sequence_or_all_node.xpath(f'./*[local-name() = "element"]'): - if not sequence_or_all_node.xpath(f'./*[local-name() = "element"]')[0].get("ref"): - element = sequence_or_all_node.xpath(f'./*[local-name() = "element"]')[0] - if self.node_is_simple_type_or_inline(element) and not self.node_is_ref(element): - properties.update(model.properties_from_simple_elements(sequence_or_all_node, properties_required=properties_required)) - # check for recursion - # TODO: maybe move this to a separate function - # TODO: recursion not fully working - # https://github.com/atviriduomenys/spinta/issues/602 - else: - paths = new_source_path.split("/") - if not element.get("name") in paths: - - # this can sometimes happen when choice node has been split or maybe in some other cases too - return self._create_model( - element, - source_path=new_source_path, - parent_model=model) - else: - for index, path in enumerate(paths): - if path == element.get("name"): - paths[index] = f"/{path}" - new_source_path = "/".join(paths) - - else: - # TODO: if reference is to an inline or simpleType element, - # and maxOccurs of it is 1, - # then do not create reference, but add to the same - - element = sequence_or_all_node.xpath(f'./*[local-name() = "element"]')[0] - element = self._get_referenced_node(element) - if not is_root_model: - return self._create_model( - element, - source_path=new_source_path, - parent_model=model, - additional_properties=additional_properties) - - elif sequence_or_all_node_length > 1 or properties: - # properties from simple type or inline elements without references - # properties are required for choice where maxOccurs=unbound and maybe some other cases - properties.update(model.properties_from_simple_elements( - sequence_or_all_node, - properties_required=properties_required)) - - # references - properties_from_references = self._properties_from_references( - sequence_or_all_node, - model=model, - source_path=new_source_path) - properties.update(properties_from_references) - - # complex type child nodes - to models - properties_from_references = self._properties_from_type_references( - sequence_or_all_node, - model=model, - source_path=new_source_path) - properties.update(properties_from_references) + + properties.update(model.properties_from_simple_elements( + sequence_or_all_node, + properties_required=properties_required)) + + # references + properties_from_references = self._properties_from_references( + sequence_or_all_node, + model=model, + source_path=new_source_path) + properties.update(properties_from_references) + + # complex type child nodes - to models + properties_from_references = self._properties_from_type_references( + sequence_or_all_node, + model=model, + source_path=new_source_path) + properties.update(properties_from_references) model.properties = properties From 515e0344530a5737ce7c7272f430d4a662dc44a0 Mon Sep 17 00:00:00 2001 From: karina Date: Sun, 21 Jul 2024 21:41:47 +0300 Subject: [PATCH 40/62] #622 correctly detecting if element is referenced even if there is a namespace --- spinta/manifests/xsd/helpers.py | 36 +++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index a7b4fc3a8..2135e40de 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -388,7 +388,17 @@ def _node_is_referenced(self, node): # if this node is referenced by some other node node_name = node.get('name') xpath_search_string = f'//*[@ref="{node_name}"]' - references = self.root.xpath(xpath_search_string) + references = self.root.xpath(xpath_search_string, namespaces=self.namespaces) + + # also check with namespace prefixes. + # Though, it is possible that this isn't correct XSD behaviour, but it seems common in RC + if not references: + for prefix in self.namespaces: + prefixed_node_name = f"{prefix}:{node_name}" + xpath_search_string = f'//*[@ref="{prefixed_node_name}"]' + references = self.root.xpath(xpath_search_string, namespaces=self.namespaces) + if references: + return True if references: return True return False @@ -941,10 +951,6 @@ def _create_model( return [model.name, ] - # for model_root_properties in root_properties.values(): - # for model_root_property in model_root_properties.values(): - # model_root_property['external']['name'] = f"{node.get('name')}/{model_root_property['external']['name']}" - return [] def _add_resource_model(self): @@ -968,13 +974,29 @@ def _parse_root_node(self): ): self._create_model(node, is_root_model=True) + # def _trim_fake_root_models(self): + # """ + # We need to remove those root models which are referenced from other elements. + # We need to check the source instead of name, because it most probably be referenced by another name anyway. + # """ + # trimmed_models = {} + # for model_name, model in self.models: + + def _extract_namespaces(self): + self.namespaces = self.root.nsmap + def start(self): self._extract_root() + self._extract_namespaces() self._extract_custom_types(self.root) self._add_resource_model() self._parse_root_node() + # self._trim_fake_root_models() + + self._compile_nested_properties() + def _add_model_nested_properties(self, root_model: XSDModel, model: XSDModel, property_prefix: str = "", source_path: str = ""): """recursively gather nested properties or root model""" # go forward, add property prefix, which is constructed rom properties that came rom beore models, and construct pathh orward also @@ -1031,7 +1053,7 @@ def _add_model_nested_properties(self, root_model: XSDModel, model: XSDModel, pr root_model.properties.update(root_properties) - def compile_nested_properties(self): + def _compile_nested_properties(self): for model_name, parsed_model in self.models.items(): # we need to add root properties to properties if it's a root model @@ -1157,8 +1179,6 @@ def read_schema( yield None, xsd.dataset_and_resource_info - xsd.compile_nested_properties() - for model_name, parsed_model in xsd.models.items(): yield None, parsed_model.get_data() From 96da8453e35a59f9a27b3e0145833b597dff9478 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 25 Jul 2024 21:17:15 +0300 Subject: [PATCH 41/62] #622 correctly generating arrays --- spinta/manifests/xsd/helpers.py | 52 +++++++++++++++------------------ 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 2135e40de..41b400b6e 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -388,7 +388,7 @@ def _node_is_referenced(self, node): # if this node is referenced by some other node node_name = node.get('name') xpath_search_string = f'//*[@ref="{node_name}"]' - references = self.root.xpath(xpath_search_string, namespaces=self.namespaces) + references = self.root.xpath(xpath_search_string) # also check with namespace prefixes. # Though, it is possible that this isn't correct XSD behaviour, but it seems common in RC @@ -396,7 +396,7 @@ def _node_is_referenced(self, node): for prefix in self.namespaces: prefixed_node_name = f"{prefix}:{node_name}" xpath_search_string = f'//*[@ref="{prefixed_node_name}"]' - references = self.root.xpath(xpath_search_string, namespaces=self.namespaces) + references = self.root.xpath(xpath_search_string) if references: return True if references: @@ -512,7 +512,6 @@ def _properties_from_type_references( properties = {} for typed_element in node.xpath('./*[local-name() = "element"]'): - # for typed_element in node.xpath("./*[@type]"): new_source_path = source_path @@ -548,13 +547,10 @@ def _properties_from_type_references( if ( sequence is not None and len(sequence) == 1 and - # len(complex_type) == 1 and (len(complex_type) == 1 or (len(complex_type) == 2 and len(complex_type.xpath("./*[local-name() = 'annotation']")) > 0)) and - self._node_has_separate_complex_type(sequence[0]) + (self._node_has_separate_complex_type(sequence[0]) or sequence[0].xpath(f'./*[local-name() = "complexType"]')) ): - # if typed_element.get("name") is not None: new_source_path += f'/{typed_element.get("name")}' - # source_path += f'/{complex_type.get("name")}' is_array = ( is_array or XSDReader.is_array(complex_type) or @@ -585,6 +581,11 @@ def _properties_from_type_references( if typed_element.get("name") in source_path.split("/"): continue + is_array = is_array or XSDReader.is_array(typed_element) + if not is_array: + if previous_referenced_element: + is_array = XSDReader.is_array(previous_referenced_element) + if not is_array: referenced_model_names = self._create_model( typed_element, @@ -612,16 +613,15 @@ def _properties_from_type_references( property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') if new_referenced_element is not None and new_referenced_element.get("mixed") != "true": - _, referenced_prop = model.simple_element_to_property(previous_referenced_element) + _, referenced_prop = model.simple_element_to_property(previous_referenced_element, is_array=is_array) prop["external"]["name"] = f'{previous_referenced_element_name}/{prop["external"]["name"]}' property_id = to_property_name(previous_referenced_element_name) if is_array: - property_id += "[]" + if not property_id.endswith("[]"): + property_id += "[]" + property_type = "backref" prop["type"] = property_type prop["model"] = f"{referenced_model_name}" - # backrefs don't have to have source - if property_type == 'backref': - del prop["external"] properties[property_id] = prop return properties @@ -660,7 +660,6 @@ def _properties_from_references( # also, if it's mixed, and has choices inside, it's not an array even if choices are unbound if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": is_array = True - # is_array = True if sequences: sequence = sequences[0] else: @@ -706,7 +705,10 @@ def _properties_from_references( if referenced_element.get("name") in source_path.split("/"): continue - if not (XSDReader.is_array(ref_element) or is_array): + if XSDReader.is_array(ref_element): + is_array = True + + if not is_array: referenced_model_names = self._create_model( referenced_element, source_path=new_source_path, @@ -733,16 +735,17 @@ def _properties_from_references( property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) prop['external']['name'] = prop['external']['name'].rstrip('/text()') if new_referenced_element is not None: - _, referenced_prop = model.simple_element_to_property(referenced_element) + _, referenced_prop = model.simple_element_to_property(referenced_element, is_array=is_array) prop['external']['name'] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' + if is_array: + if not property_id.endswith("[]"): + property_id += "[]" + property_type = "backref" + prop['type'] = property_type prop['model'] = f'{referenced_model_name}' - # backrefs don't have to have source - if property_type == 'backref': - del prop['external'] - properties[property_id] = prop return properties @@ -974,27 +977,18 @@ def _parse_root_node(self): ): self._create_model(node, is_root_model=True) - # def _trim_fake_root_models(self): - # """ - # We need to remove those root models which are referenced from other elements. - # We need to check the source instead of name, because it most probably be referenced by another name anyway. - # """ - # trimmed_models = {} - # for model_name, model in self.models: - def _extract_namespaces(self): self.namespaces = self.root.nsmap def start(self): self._extract_root() + self._extract_namespaces() self._extract_custom_types(self.root) self._add_resource_model() self._parse_root_node() - # self._trim_fake_root_models() - self._compile_nested_properties() def _add_model_nested_properties(self, root_model: XSDModel, model: XSDModel, property_prefix: str = "", source_path: str = ""): From 0abe997a9c441b2e4a4605c9668e72d3ac36a018 Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 12 Aug 2024 11:57:16 +0300 Subject: [PATCH 42/62] tests: --- spinta/manifests/xsd/helpers.py | 187 +++++++++++++---------------- tests/manifests/xsd/test_xsd.py | 207 ++++++++++++++------------------ 2 files changed, 179 insertions(+), 215 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 41b400b6e..3819060b8 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -533,59 +533,11 @@ def _properties_from_type_references( if choices and choices[0].get('maxOccurs') == 'unbounded': sequences = choices is_array = True - if sequences: - sequence = sequences[0] - else: - sequence = None - - # proxy element (for array), we don't create a model for it, it's to indicate an array - new_referenced_element = None - previous_referenced_element = None - previous_referenced_element_name = None - # we check for the length of sequence, because it can have more than one element, but also length of - # complexType because it can have attributes too. - if ( - sequence is not None and - len(sequence) == 1 and - (len(complex_type) == 1 or (len(complex_type) == 2 and len(complex_type.xpath("./*[local-name() = 'annotation']")) > 0)) and - (self._node_has_separate_complex_type(sequence[0]) or sequence[0].xpath(f'./*[local-name() = "complexType"]')) - ): - new_source_path += f'/{typed_element.get("name")}' - is_array = ( - is_array or - XSDReader.is_array(complex_type) or - XSDReader.is_array(complex_type[0][0]) or - XSDReader.is_array(typed_element) or - XSDReader.is_array(sequence[0]) - ) - - previous_referenced_element = typed_element - previous_referenced_element_name = typed_element.get("name") - complex_type = self._get_separate_complex_type_node(sequence[0]) - - # TODO: we probably don't need both - new_referenced_element = sequence[0] - typed_element = sequence[0] - - if self.node_is_simple_type_or_inline(new_referenced_element): - property_id, prop = model.simple_element_to_property( - typed_element, - is_array=is_array, - source_path=previous_referenced_element_name) - if not XSDReader.is_required(typed_element): - prop["required"] = False - properties[property_id] = prop - continue # avoiding recursion if typed_element.get("name") in source_path.split("/"): continue - is_array = is_array or XSDReader.is_array(typed_element) - if not is_array: - if previous_referenced_element: - is_array = XSDReader.is_array(previous_referenced_element) - if not is_array: referenced_model_names = self._create_model( typed_element, @@ -612,14 +564,10 @@ def _properties_from_type_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') - if new_referenced_element is not None and new_referenced_element.get("mixed") != "true": - _, referenced_prop = model.simple_element_to_property(previous_referenced_element, is_array=is_array) - prop["external"]["name"] = f'{previous_referenced_element_name}/{prop["external"]["name"]}' - property_id = to_property_name(previous_referenced_element_name) - if is_array: - if not property_id.endswith("[]"): - property_id += "[]" - property_type = "backref" + if is_array: + if not property_id.endswith("[]"): + property_id += "[]" + property_type = "backref" prop["type"] = property_type prop["model"] = f"{referenced_model_name}" properties[property_id] = prop @@ -660,47 +608,11 @@ def _properties_from_references( # also, if it's mixed, and has choices inside, it's not an array even if choices are unbound if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": is_array = True - if sequences: - sequence = sequences[0] - else: - sequence = None - - new_referenced_element = None # if we only have one ref element and if it's inside a choice/sequence (this node) which is maxOccurs = unbounded then it's array if XSDReader.is_array(node) and len(node) == 1: is_array = True - # if it's a proxy model, we don't create it, but make reference to the next model - # we check for the length of sequence, because it can have more than one element, but also length of - # complexType because it can have attributes too. - if ( - sequence is not None and - len(sequence) == 1 and - len(complex_type) == 1 and - self.node_is_ref(sequence[0]) and - not complex_type.get("mixed") == "true" - ): - if ref_element.get("name") is not None: - new_source_path += f'/{ref_element.get("name")}' - new_source_path += f'/{referenced_element.get("name")}' - - previous_referenced_element_name = referenced_element.get("name") - new_referenced_element = self._get_referenced_node(complex_type[0][0]) - referenced_element = new_referenced_element - - is_array = is_array or XSDReader.is_array(referenced_element) or XSDReader.is_array(complex_type[0][0]) or XSDReader.is_array(sequence) - - if self.node_is_simple_type_or_inline(referenced_element): - property_id, prop = model.simple_element_to_property( - referenced_element, - is_array=is_array, - source_path=previous_referenced_element_name) - if not XSDReader.is_required(ref_element): - prop["required"] = False - properties[property_id] = prop - continue - # avoiding recursion if referenced_element.get("name") in source_path.split("/"): continue @@ -734,9 +646,6 @@ def _properties_from_references( for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) prop['external']['name'] = prop['external']['name'].rstrip('/text()') - if new_referenced_element is not None: - _, referenced_prop = model.simple_element_to_property(referenced_element, is_array=is_array) - prop['external']['name'] += f'/{referenced_prop["external"]["name"].rstrip("/text()")}' if is_array: if not property_id.endswith("[]"): @@ -989,8 +898,89 @@ def start(self): self._parse_root_node() + self._remove_unneeded_models() + self._compile_nested_properties() + def remove_extra_root_models(self, model: XSDModel) -> XSDModel: + """ + removes root models that have only one property from the root + """ + stop_removing = False + + while not stop_removing: + # remove the model itself if it's a root proxy model + if (len(model.properties) == 1) and (list(model.properties.values())[0]["type"] in ("ref", "backref")): + model = self.models[list(model.properties.values())[0]["model"]] + model.parent_model = None + else: + stop_removing = True + + # todo: what if a root element is an array, like in klasifikatoriai. There can be only one + # root element. So it's rowset, and then inside rowset the re are many rows. + # Is this somehow important for us, for later reading the xml, or not? + return model + + def _remove_proxy_models(self, model: XSDModel): + """ Removes models which have only one property + Usually these are proxy models to indicate arrays, but there can be other situations + Removes the models that are in the middle of other models + or at the end and have one property, then this property is joined to the referring model. + """ + + self.new_models[model.name] = model + + for property_id, prop in model.properties.items(): + if prop["type"] in ("ref", "backref"): + referee = self.models[prop["model"]] + parse_referee = True + while len(referee.properties) == 1: + ref_property_id, ref_prop = list(referee.properties.items())[0] + if ref_prop["type"] not in ("ref", "backref"): + ref_prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' + parse_referee = False + model.properties[property_id] = ref_prop + break + + if prop["type"] == "backref" and ref_prop["type"] == "backref": + break + else: + + referee = self.models[ref_prop["model"]] + if prop["type"] == "ref" and ref_prop["type"] == "backref": + prop["type"] = "backref" + property_id = f"{property_id}[]" + prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' + prop["model"] = ref_prop["model"] + + if not self._has_backref(model, referee) and parse_referee: + self._remove_proxy_models(referee) + + def _remove_unneeded_models(self): + """ + Proxy models are those that have only one property which is a ref to another model. + They can act as placeholders, or as array indicators. + If either one of them is an array, drop the proxy model and replace the reference to point to the new model + If both referencing properties are not arrays, the resulting model shouldn't be an array, and if any of them is an array, the resulting ref is an array (backref). + If both models, the referrer and the referee are arrays, do not drop them, because this means that it's an array of arrays. + """ + self.new_models = {} + for model_name, model in self.models.items(): + + # we need to start from root models + if model.parent_model is None: + model = self.remove_extra_root_models(model) + self._remove_proxy_models(model) + + self.models = self.new_models + + def _has_backref(self, model: XSDModel, ref_model: XSDModel) -> bool: + has_backref = False + for ref_model_property in ref_model.properties.values(): + if (ref_model_property.get("type") == "backref") and (ref_model_property.get('model') == model.name): + has_backref = True + return has_backref + def _add_model_nested_properties(self, root_model: XSDModel, model: XSDModel, property_prefix: str = "", source_path: str = ""): """recursively gather nested properties or root model""" # go forward, add property prefix, which is constructed rom properties that came rom beore models, and construct pathh orward also @@ -1026,12 +1016,7 @@ def _add_model_nested_properties(self, root_model: XSDModel, model: XSDModel, pr # If it's a ref, we need to build the path from the root of the model. If it's backref (array) - # it's relative to array if prop["type"] == "ref": - has_backref = False - for ref_model_property in ref_model.properties.values(): - if (ref_model_property.get("type") == "backref") and (ref_model_property.get('model') == model.name): - has_backref = True - break - if has_backref: + if self._has_backref(model, ref_model): continue if source_path: new_source_path = f"{source_path}/{ref_model.external['name'].replace(model.external['name'], '').lstrip('/')}" diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index 59032a10a..b8cda3479 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -277,28 +277,27 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): | manifest | | | | | | | | | | | resource1 | xml | | | | | | | | | | | | | | | | | | - | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | - | | | | | ak | string required | | @ak | | | | | | - | | | | | asmenys | ref | Asmenys | | | | | | | - | | | | | id | string required | | @id | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | - | | | | | | | | | | - | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | - | | | | | asmuo[] | backref | Asmuo | | | | | | | - | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | asmenys | ref | Asmenys | asmenys | | | | | | - | | | | | asmenys.asmuo[] | backref | Asmuo | | | | | | | + | | | | | asmenys.asmuo[] | backref | Asmuo | asmenys/asmuo | | | | | | | | | | | asmenys.asmuo[].ak | string required | | @ak | | | | | | | | | | | asmenys.asmuo[].id | string required | | @id | | | | | | | | | | | asmenys.asmuo[].text | string | | text() | | | | | | | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris | | | | | asmenys.text | string | | asmenys/text() | | | | | | | | | | | text | string | | text() | | | | | | - + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | asmuo[] | backref | Asmuo | asmuo | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | + | | | | | ak | string required | | @ak | | | | | | + | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | id | string required | | @id | | | | | | + | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | + | | | | | text | string | | text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -348,15 +347,15 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): | | | | Resource | | | / | | | | http://www.w3.org/2000/01/rdf-schema#Resource | | Įvairūs duomenys | | | | | klaida | string | | klaida/text() | | | | | | Klaidos atveju - klaidos pranešimas | | | | | | | | | | - | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | - | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | | | | | | asmenys | ref | Asmenys | asmenys | | | | | | | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris | | | | | asmenys.text | string | | asmenys/text() | | | | | | | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -471,34 +470,35 @@ def test_xsd_choice(rc: RawConfig, tmp_path: Path): """ table = """ -id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Parcel1 | | | /parcels/parcel | | | | | | Žemės sklypo pasikeitimo informacija - | | | | | parcels | ref | Parcels | | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | parcel_unique_number | integer required | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris - | | | | | | | | | | - | | | | Parcel2 | | | /parcels/parcel | | | | | | Žemės sklypo pasikeitimo informacija - | | | | | parcels | ref | Parcels | | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | sign_of_change | integer required | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis - | | enum | | 1 | | | | | | - | | | | 2 | | | | | | - | | | | 3 | | | | | | - | | | | | | | | | | - | | | | Parcels | | | /parcels | | | | | | Pasikeitusių žemės sklypų sąrašas - | | | | | text | string | | text() | | | | | | - | | | | | parcel[] | backref | Parcel1 | | | | | | | - | | | | | parcel1[] | backref | Parcel2 | | | | | | | - | | | | | parcel1[].text | string | | parcel/text() | | | | | | - | | | | | parcel1[].parcel_unique_number | integer required | | parcel/parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris - | | | | | parcel2[].text | string | | parcel/text() | | | | | | - | | | | | parcel2[].sign_of_change | integer required | | parcel/sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis - | | enum | | 1 | | | | | | - | | | | 2 | | | | | | - | | | | 3 | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Parcels | | | /parcels | | | | | | Pasikeitusių žemės sklypų sąrašas + | | | | | parcel1[] | backref | Parcel2 | parcel | | | | | | + | | | | | parcel1[].sign_of_change | integer required | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | + | | | | | parcel1[].text | string | | text() | | | | | | + | | | | | parcel[] | backref | Parcel1 | parcel | | | | | | + | | | | | parcel[].parcel_unique_number | integer required | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcel[].text | string | | text() | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Parcel1 | | | /parcels/parcel | | | | | | Žemės sklypo pasikeitimo informacija + | | | | | parcel_unique_number | integer required | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcels | ref | Parcels | | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Parcel2 | | | /parcels/parcel | | | | | | Žemės sklypo pasikeitimo informacija + | | | | | parcels | ref | Parcels | | | | | | | + | | | | | sign_of_change | integer required | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | + | | | | | text | string | | text() | | | | | | + """ path = tmp_path / 'manifest.xsd' @@ -509,7 +509,7 @@ def test_xsd_choice(rc: RawConfig, tmp_path: Path): assert manifest == table -def test_xsd_choice_max_occurs_unbound(rc: RawConfig, tmp_path: Path): +def test_xsd_choice_max_occurs_unbounded(rc: RawConfig, tmp_path: Path): xsd = """ @@ -528,7 +528,7 @@ def test_xsd_choice_max_occurs_unbound(rc: RawConfig, tmp_path: Path): Žemės sklypo pasikeitimo informacija - + Žemės sklypo unikalus numeris @@ -558,34 +558,28 @@ def test_xsd_choice_max_occurs_unbound(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Parcel1 | | | /parcels/parcel | | | | | | Žemės sklypo pasikeitimo informacija - | | | | | parcels | ref | Parcels | | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | parcel_unique_number | integer required | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris - | | | | | | | | | | - | | | | Parcel2 | | | /parcels/parcel | | | | | | Žemės sklypo pasikeitimo informacija - | | | | | parcels | ref | Parcels | | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | sign_of_change | integer required | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis - | | enum | | 1 | | | | | | - | | | | 2 | | | | | | - | | | | 3 | | | | | | - | | | | | | | | | | - | | | | Parcels | | | /parcels | | | | | | Pasikeitusių žemės sklypų sąrašas - | | | | | text | string | | text() | | | | | | - | | | | | parcel[] | backref | Parcel1 | | | | | | | - | | | | | parcel1[] | backref | Parcel2 | | | | | | | - | | | | | parcel1[].text | string | | parcel/text() | | | | | | - | | | | | parcel1[].parcel_unique_number | integer required | | parcel/parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris - | | | | | parcel2[].text | string | | parcel/text() | | | | | | - | | | | | parcel2[].sign_of_change | integer required | | parcel/sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis - | | enum | | 1 | | | | | | - | | | | 2 | | | | | | - | | | | 3 | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Parcels | | | /parcels | | | | | | Pasikeitusių žemės sklypų sąrašas + | | | | | parcel[] | backref | Parcel | parcel | | | | | | + | | | | | parcel[].parcel_unique_number | integer | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcel[].sign_of_change | integer | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | + | | | | | parcel[].text | string | | text() | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Parcel | | | /parcels/parcel | | | | | | Žemės sklypo pasikeitimo informacija + | | | | | parcel_unique_number | integer | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris + | | | | | parcels | ref | Parcels | | | | | | | + | | | | | sign_of_change | integer | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis + | | enum | | 1 | | | | | | + | | | | 2 | | | | | | + | | | | 3 | | | | | | + | | | | | text | string | | text() | | | | | | """ @@ -626,27 +620,29 @@ def test_xsd_attributes(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Salyga | | | /SALYGOS/SALYGA | | | | | | - | | | | | kodas | string | | @kodas | | | | | | - | | | | | nr | integer | | @nr | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | reiksme | string required | | REIKSME/text() | | | | | | - | | | | | pavadinimas | string | | PAVADINIMAS/text() | | | | | | - | | | | | aprasymas | string | | APRASYMAS/text() | | | | | | - | | | | | | | | | | - | | | | Salygos | | | /SALYGOS | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | salyga | ref required | Salyga | | | | | | | - | | | | | salyga.kodas | string | | SALYGA/@kodas | | | | | | - | | | | | salyga.nr | integer | | SALYGA/@nr | | | | | | - | | | | | salyga.text | string | | SALYGA/text() | | | | | | - | | | | | salyga.reiksme | string required | | SALYGA/REIKSME/text() | | | | | | - | | | | | salyga.pavadinimas | string | | SALYGA/PAVADINIMAS/text() | | | | | | - | | | | | salyga.aprasymas | string | | SALYGA/APRASYMAS/text() | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Salygos | | | /SALYGOS | | | | | | + | | | | | salyga[] | backref required | Salyga | SALYGA | | | | | | + | | | | | salyga[].aprasymas | string | | APRASYMAS/text() | | | | | | + | | | | | salyga[].kodas | string | | @kodas | | | | | | + | | | | | salyga[].nr | integer | | @nr | | | | | | + | | | | | salyga[].pavadinimas | string | | PAVADINIMAS/text() | | | | | | + | | | | | salyga[].reiksme | string required | | REIKSME/text() | | | | | | + | | | | | salyga[].text | string | | text() | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Salyga | | | /SALYGOS/SALYGA | | | | | | + | | | | | aprasymas | string | | APRASYMAS/text() | | | | | | + | | | | | kodas | string | | @kodas | | | | | | + | | | | | nr | integer | | @nr | | | | | | + | | | | | pavadinimas | string | | PAVADINIMAS/text() | | | | | | + | | | | | reiksme | string required | | REIKSME/text() | | | | | | + | | | | | salygos | ref | Salygos | | | | | | | + | | | | | text | string | | text() | | | | | | + """ path = tmp_path / 'manifest.xsd' @@ -689,23 +685,6 @@ def test_xsd_model_one_property(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Extracttz | | | /getTzByTRAResponse/extracttz | | | | | | - | | | | | extract_preparation_time | datetime | | extractPreparationTime/text() | | | | | | - | | | | | phipoteka | integer | | phipoteka/text() | | | | | | - | | | | | | | | | | - | | | | Klaida | | | /getTzByTRAResponse/klaida | | | | | | - | | | | | aprasymas | string | | Aprasymas/text() | | | | | | - | | | | | | | | | | - | | | | GetTzByTRAResponse | | | /getTzByTRAResponse | | | | | | - | | | | | search_parameters | string | | searchParameters/text() | | | | | | - | | | | | extracttz.extract_preparation_time | datetime | | extracttz/extractPreparationTime/text() | | | | | | - | | | | | extracttz.phipoteka | integer | | extracttz/phipoteka/text() | | | | | | - | | | | | klaida.aprasymas | string | | klaida/Aprasymas/text() | | | | | | - """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: From 5a5afa248ea9f2ea31d33225873cb56eafe987ca Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 12 Aug 2024 16:27:14 +0300 Subject: [PATCH 43/62] deleted extra variable --- spinta/manifests/xsd/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 41b400b6e..205a203a7 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -790,7 +790,7 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, new_root_properties = self._create_model( + returned_model_names = self._create_model( node_copy, source_path=source_path, parent_model=parent_model, From 108d34572900c9af2443883a1e4769d05de01a2a Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 12 Aug 2024 17:58:52 +0300 Subject: [PATCH 44/62] deleted extra variable --- spinta/manifests/xsd/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 3819060b8..260ac1e19 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -699,7 +699,7 @@ def _split_choice( choice_copy = deepcopy(choice) for node_in_choice in choice: choice_node_parent.insert(0, node_in_choice) - returned_model_names, new_root_properties = self._create_model( + returned_model_names = self._create_model( node_copy, source_path=source_path, parent_model=parent_model, From 318317cb172403861f4bd9e30f6dfe602c2439f5 Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 12 Aug 2024 21:02:02 +0300 Subject: [PATCH 45/62] additional check for arrays --- spinta/manifests/xsd/helpers.py | 9 +- tests/manifests/xsd/test_xsd.py | 215 +++++++++++++++++--------------- 2 files changed, 121 insertions(+), 103 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 260ac1e19..ea4382a4c 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -531,9 +531,11 @@ def _properties_from_type_references( if not sequences: choices = complex_type.xpath("./*[local-name() = 'choice']") if choices and choices[0].get('maxOccurs') == 'unbounded': - sequences = choices is_array = True + if XSDReader.is_array(typed_element): + is_array = True + # avoiding recursion if typed_element.get("name") in source_path.split("/"): continue @@ -950,8 +952,9 @@ def _remove_proxy_models(self, model: XSDModel): if prop["type"] == "ref" and ref_prop["type"] == "backref": prop["type"] = "backref" property_id = f"{property_id}[]" - prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' - prop["model"] = ref_prop["model"] + if "external" in prop and "external" in ref_prop: + prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' + prop["model"] = ref_prop["model"] if not self._has_backref(model, referee) and parse_referee: self._remove_proxy_models(referee) diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index b8cda3479..91b036573 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -685,6 +685,20 @@ def test_xsd_model_one_property(rc: RawConfig, tmp_path: Path): """ table = """ + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | GetTzByTRAResponse | | | /getTzByTRAResponse | | | | | | + | | | | | extracttz | ref | Extracttz | extracttz | | | | | | + | | | | | extracttz.extract_preparation_time | datetime | | extracttz/extractPreparationTime/text() | | | | | | + | | | | | extracttz.phipoteka | integer | | extracttz/phipoteka/text() | | | | | | + | | | | | klaida | string | | klaida/Aprasymas/text() | | | | | | + | | | | | search_parameters | string | | searchParameters/text() | | | | | | + | | | | | | | | | | + | | | | Extracttz | | | /getTzByTRAResponse/extracttz | | | | | | + | | | | | extract_preparation_time | datetime | | extractPreparationTime/text() | | | | | | + | | | | | phipoteka | integer | | phipoteka/text() | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -736,15 +750,14 @@ def test_xsd_separate_simple_type(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Tyrimas | | | /TYRIMAS | | | | | | - | | | | | ct_e200ats_duom_sukurti | string | | CT_E200ATS_DUOM_SUKURTI/text() | | | | | | E200-ats duomenų sukūrimo data ir laikas - | | | | | ct_paciento_spi | string | | CT_PACIENTO_SPI/text() | | | | | | Paciento prisirašymo įstaigos pavadinimas - | | | | | ct_ctd_eminys_gautas | string | | CT_CTD_EMINYS_GAUTAS/text() | | | | | | Ėminio gavimo data - + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Tyrimas | | | /TYRIMAS | | | | | | + | | | | | ct_ctd_eminys_gautas | string | | CT_CTD_EMINYS_GAUTAS/text() | | | | | | Ėminio gavimo data + | | | | | ct_e200ats_duom_sukurti | string | | CT_E200ATS_DUOM_SUKURTI/text() | | | | | | E200-ats duomenų sukūrimo data ir laikas + | | | | | ct_paciento_spi | string | | CT_PACIENTO_SPI/text() | | | | | | Paciento prisirašymo įstaigos pavadinimas """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -796,38 +809,43 @@ def test_xsd_sequence_choice_sequence(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Documents1 | | | /data/responseData/documents | | | | | | - | | | | | birth_date | string | | birthDate/text() | | | | | | - | | | | | last_name | string | | lastName/text() | | | | | | - | | | | | first_name | string | | firstName/text() | | | | | | - | | | | | code | string | | code/text() | | | | | | - | | | | | iltu_code | string | | iltu_code/text() | | | | | | - | | | | | | | | | | - | | | | Documents2 | | | /data/responseData/documents | | | | | | - | | | | | business_name | string | | businessName/text() | | | | | | - | | | | | code | string | | code/text() | | | | | | - | | | | | iltu_code | string | | iltu_code/text() | | | | | | - | | | | | | | | | | - | | | | ResponseData | | | /data/responseData | | | | | | - | | | | | statement_id | string required | | statementId/text() | | | | | | - | | | | | title | string required | | title/text() | | | | | | - | | | | | | | | | | - | | | | Data | | | /data | | | | | | - | | | | | response_message | string | | responseMessage/text() | | | | | | - | | | | | response_data.documents1.birth_date | string | | responseData/documents/birthDate/text() | | | | | | - | | | | | response_data.documents1.last_name | string | | responseData/documents/lastName/text() | | | | | | - | | | | | response_data.documents1.first_name | string | | responseData/documents/firstName/text() | | | | | | - | | | | | response_data.documents1.code | string | | responseData/documents/code/text() | | | | | | - | | | | | response_data.documents1.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | - | | | | | response_data.documents2.business_name | string | | responseData/documents/businessName/text() | | | | | | - | | | | | response_data.documents2.code | string | | responseData/documents/code/text() | | | | | | - | | | | | response_data.documents2.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | - | | | | | response_data.statement_id | string required | | responseData/statementId/text() | | | | | | - | | | | | response_data.title | string required | | responseData/title/text() | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Data | | | /data | | | | | | + | | | | | response_data | ref | ResponseData | responseData | | | | | | + | | | | | response_data.documents | ref required | Documents1 | responseData/documents | | | | | | + | | | | | response_data.documents.birth_date | string | | responseData/documents/birthDate/text() | | | | | | + | | | | | response_data.documents.code | string | | responseData/documents/code/text() | | | | | | + | | | | | response_data.documents.first_name | string | | responseData/documents/firstName/text() | | | | | | + | | | | | response_data.documents.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | + | | | | | response_data.documents.last_name | string | | responseData/documents/lastName/text() | | | | | | + | | | | | response_data.documents1 | ref required | Documents2 | responseData/documents | | | | | | + | | | | | response_data.documents1.business_name | string | | responseData/documents/businessName/text() | | | | | | + | | | | | response_data.documents1.code | string | | responseData/documents/code/text() | | | | | | + | | | | | response_data.documents1.iltu_code | string | | responseData/documents/iltu_code/text() | | | | | | + | | | | | response_data.statement_id | string required | | responseData/statementId/text() | | | | | | + | | | | | response_data.title | string required | | responseData/title/text() | | | | | | + | | | | | response_message | string | | responseMessage/text() | | | | | | + | | | | | | | | | | + | | | | ResponseData | | | /data/responseData | | | | | | + | | | | | documents | ref required | Documents1 | documents | | | | | | + | | | | | documents1 | ref required | Documents2 | documents | | | | | | + | | | | | statement_id | string required | | statementId/text() | | | | | | + | | | | | title | string required | | title/text() | | | | | | + | | | | | | | | | | + | | | | Documents1 | | | /data/responseData/documents | | | | | | + | | | | | birth_date | string | | birthDate/text() | | | | | | + | | | | | code | string | | code/text() | | | | | | + | | | | | first_name | string | | firstName/text() | | | | | | + | | | | | iltu_code | string | | iltu_code/text() | | | | | | + | | | | | last_name | string | | lastName/text() | | | | | | + | | | | | | | | | | + | | | | Documents2 | | | /data/responseData/documents | | | | | | + | | | | | business_name | string | | businessName/text() | | | | | | + | | | | | code | string | | code/text() | | | | | | + | | | | | iltu_code | string | | iltu_code/text() | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -870,19 +888,18 @@ def test_xsd_complex_ontent(rc: RawConfig, tmp_path: Path): | | resource1 | xml | | | | | | | | | | | | | | | | | | | | | | BeFull | | | /BE_FULL | | | | | | - | | | | | title1 | string | | title1/text() | | | | | | - | | | | | title2 | string | | title2/text() | | | | | | + | | | | | contract_list | string | | ContractList/text() | | | | | | + | | | | | contract_statusas | string | | ContractStatusas/text() | | | | | | + | | | | | doc_list | string | | DocList/text() | | | | | | + | | | | | preorder_list | string | | PreorderList/text() | | | | | | + | | | | | preorder_statusas | string | | PreorderStatusas/text() | | | | | | | | | | | printeddate | string | | printeddate/text() | | | | | | | | | | | searchparameter1 | string | | searchparameter1/text() | | | | | | | | | | | searchparameter2 | string | | searchparameter2/text() | | | | | | | | | | | searchparameter3 | string | | searchparameter3/text() | | | | | | | | | | | statusas | string | | statusas/text() | | | | | | - | | | | | doc_list | string | | DocList/text() | | | | | | - | | | | | preorder_statusas | string | | PreorderStatusas/text() | | | | | | - | | | | | preorder_list | string | | PreorderList/text() | | | | | | - | | | | | contract_statusas | string | | ContractStatusas/text() | | | | | | - | | | | | contract_list | string | | ContractList/text() | | | | | | - + | | | | | title1 | string | | title1/text() | | | | | | + | | | | | title2 | string | | title2/text() | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -937,17 +954,13 @@ def test_xsd_recursion(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Action | | | /data/responseData/actions/action | | | | | | - | | | | | code | string required | | code/text() | | | | | | Paslaugos kodas (RC kodas) - | | | | | | | | | | - | | | | Data | | | /data | | | | | | - | | | | | response_message | string | | responseMessage/text() | | | | | | - | | | | | action.code | string required | | action/code/text() | | | | | | Paslaugos kodas (RC kodas) - + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Data | | | /data | | | | | | + | | | | | response_data | string required | | responseData/actions/action/code/text() | | | | | | Paslaugos kodas (RC kodas) + | | | | | response_message | string | | responseMessage/text() | | | | | | """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -1020,46 +1033,48 @@ def test_xsd_enumeration(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | ResponseData | | | /data/responseData | | | | | | - | | | | | who_may_consitute | string required | | who_may_consitute/text() | | | | | | Įgaliojimą gali sudaryti. - | | enum | | fiz | | | | | | - | | | | fiz-notarial | | | | | | - | | | | jur | | | | | | - | | | | jur-notarial | | | | | | - | | | | fiz-jur | | | | | | - | | | | fiz-notarial-jur-notarial | | | | | | - | | | | fiz-notarial-jur | | | | | | - | | | | fiz-jur-notarial | | | | | | - | | | | | default_description_editable | string required | | default_description_editable/text() | | | | | | Ar numatytasis aprašymas gali būti redaguojamas? 0 - NE, 1 - TAIP - | | enum | | 0 | | | | | | - | | | | 1 | | | | | | - | | | | | digital_service | string required | | digital_service/text() | | | | | | El. paslauga. Reikšmės: digital - Tik elektroninė paslauga, analog - Tik neelektroninė paslauga, digital-or-analog - Elektroninė arba neelektroninė paslauga - | | enum | | digital | | | | | | - | | | | analog | | | | | | - | | | | digital-or-analog | | | | | | - | | | | | | | | | | - | | | | Data | | | /data | | | | | | - | | | | | response_message | string | | responseMessage/text() | | | | | | - | | | | | response_data.who_may_consitute | string required | | responseData/who_may_consitute/text() | | | | | | Įgaliojimą gali sudaryti. - | | enum | | fiz | | | | | | - | | | | fiz-notarial | | | | | | - | | | | jur | | | | | | - | | | | jur-notarial | | | | | | - | | | | fiz-jur | | | | | | - | | | | fiz-notarial-jur-notarial | | | | | | - | | | | fiz-notarial-jur | | | | | | - | | | | fiz-jur-notarial | | | | | | - | | | | | response_data.default_description_editable | string required | | responseData/default_description_editable/text() | | | | | | Ar numatytasis aprašymas gali būti redaguojamas? 0 - NE, 1 - TAIP - | | enum | | 0 | | | | | | - | | | | 1 | | | | | | - | | | | | response_data.digital_service | string required | | responseData/digital_service/text() | | | | | | El. paslauga. Reikšmės: digital - Tik elektroninė paslauga, analog - Tik neelektroninė paslauga, digital-or-analog - Elektroninė arba neelektroninė paslauga - | | enum | | digital | | | | | | - | | | | analog | | | | | | - | | | | digital-or-analog | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Data | | | /data | | | | | | + | | | | | response_data | ref | ResponseData | responseData | | | | | | + | | | | | response_data.default_description_editable | string required | | responseData/default_description_editable/text() | | | | | | Ar numatytasis aprašymas gali būti redaguojamas? 0 - NE, 1 - TAIP + | | enum | | 0 | | | | | | + | | | | 1 | | | | | | + | | | | | response_data.digital_service | string required | | responseData/digital_service/text() | | | | | | El. paslauga. Reikšmės: digital - Tik elektroninė paslauga, analog - Tik neelektroninė paslauga, digital-or-analog - Elektroninė arba neelektroninė paslauga + | | enum | | digital | | | | | | + | | | | analog | | | | | | + | | | | digital-or-analog | | | | | | + | | | | | response_data.who_may_consitute | string required | | responseData/who_may_consitute/text() | | | | | | Įgaliojimą gali sudaryti. + | | enum | | fiz | | | | | | + | | | | fiz-notarial | | | | | | + | | | | jur | | | | | | + | | | | jur-notarial | | | | | | + | | | | fiz-jur | | | | | | + | | | | fiz-notarial-jur-notarial | | | | | | + | | | | fiz-notarial-jur | | | | | | + | | | | fiz-jur-notarial | | | | | | + | | | | | response_message | string | | responseMessage/text() | | | | | | + | | | | | | | | | | + | | | | ResponseData | | | /data/responseData | | | | | | + | | | | | default_description_editable | string required | | default_description_editable/text() | | | | | | Ar numatytasis aprašymas gali būti redaguojamas? 0 - NE, 1 - TAIP + | | enum | | 0 | | | | | | + | | | | 1 | | | | | | + | | | | | digital_service | string required | | digital_service/text() | | | | | | El. paslauga. Reikšmės: digital - Tik elektroninė paslauga, analog - Tik neelektroninė paslauga, digital-or-analog - Elektroninė arba neelektroninė paslauga + | | enum | | digital | | | | | | + | | | | analog | | | | | | + | | | | digital-or-analog | | | | | | + | | | | | who_may_consitute | string required | | who_may_consitute/text() | | | | | | Įgaliojimą gali sudaryti. + | | enum | | fiz | | | | | | + | | | | fiz-notarial | | | | | | + | | | | jur | | | | | | + | | | | jur-notarial | | | | | | + | | | | fiz-jur | | | | | | + | | | | fiz-notarial-jur-notarial | | | | | | + | | | | fiz-notarial-jur | | | | | | + | | | | fiz-jur-notarial | | | | | | + """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: From 9442a67f9dad951ae628c6134055638a9ef39a46 Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 12 Aug 2024 21:34:23 +0300 Subject: [PATCH 46/62] refs for backrefs moved after other steps --- spinta/manifests/xsd/helpers.py | 92 ++++++++++----------------------- 1 file changed, 27 insertions(+), 65 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index ea4382a4c..67215a031 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -289,7 +289,7 @@ def has_non_ref_properties(self) -> bool: return any([prop["type"] not in ("ref", "backerf") for prop in self.properties.values()]) def add_ref_property(self, ref_model): - property_id = to_property_name(ref_model.basename) + property_id = self.deduplicate(to_property_name(ref_model.basename)) prop = {"type": "ref", "model": ref_model.name} self.properties.update({property_id: prop}) @@ -540,28 +540,16 @@ def _properties_from_type_references( if typed_element.get("name") in source_path.split("/"): continue - if not is_array: - referenced_model_names = self._create_model( - typed_element, - source_path=new_source_path, - parent_model=model - ) - property_type = "ref" - else: - referenced_element_properties = { - to_property_name(model.basename): - { - "type": "ref", - "model": f"{model.name}" - } - } + if is_array: property_type = "backref" - referenced_model_names = self._create_model( - typed_element, - source_path=new_source_path, - parent_model=model, - additional_properties=referenced_element_properties - ) + + else: + property_type = "ref" + referenced_model_names = self._create_model( + typed_element, + source_path=new_source_path, + parent_model=model, + ) for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) @@ -605,7 +593,6 @@ def _properties_from_references( if not sequences: choices = complex_type.xpath("./*[local-name() = 'choice']") if choices and XSDReader.is_array(choices[0]): - sequences = choices # we only make this array if it's only one choice, which means it's a wrapper for an array # also, if it's mixed, and has choices inside, it's not an array even if choices are unbound if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": @@ -622,28 +609,15 @@ def _properties_from_references( if XSDReader.is_array(ref_element): is_array = True - if not is_array: - referenced_model_names = self._create_model( - referenced_element, - source_path=new_source_path, - parent_model=model - ) - property_type = "ref" - else: - referenced_element_properties = { - to_property_name(model.basename): - { - "type": "ref", - "model": f"{model.name}" - } - } + if is_array: property_type = "backref" - referenced_model_names = self._create_model( - referenced_element, - source_path=new_source_path, - parent_model=model, - additional_properties=referenced_element_properties - ) + else: + property_type = "ref" + referenced_model_names = self._create_model( + referenced_element, + source_path=new_source_path, + parent_model=model + ) for referenced_model_name in referenced_model_names: property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) @@ -666,7 +640,6 @@ def _split_choice( node: _Element, source_path: str, parent_model: XSDModel, - additional_properties: dict[str, dict[str, str | bool | dict[str, str]]], is_root_model: bool = False ) -> list[str]: """ @@ -705,7 +678,6 @@ def _split_choice( node_copy, source_path=source_path, parent_model=parent_model, - additional_properties=additional_properties, is_root_model=is_root_model ) @@ -720,7 +692,6 @@ def _split_choice( node_copy, source_path=source_path, parent_model=parent_model, - additional_properties=additional_properties, is_root_model=is_root_model ) model_names.extend(returned_model_names) @@ -734,7 +705,6 @@ def _create_model( source_path: str = "", is_root_model: bool = False, parent_model: XSDModel = None, - additional_properties: dict[str, str | bool | dict[str, str | dict[str, Any]]] = None ) -> list[str]: """ Parses an element and makes a model out of it. If it is a complete model, it will be added to the models list. @@ -742,12 +712,8 @@ def _create_model( model = XSDModel(self) model.parent_model = parent_model - if additional_properties is None: - additional_properties = {} - # properties of this model properties = {} - properties.update(additional_properties) properties.update(model.attributes_to_properties(node)) new_source_path = f"{source_path}/{node.get('name')}" @@ -775,7 +741,6 @@ def _create_model( node, source_path=source_path, parent_model=parent_model, - additional_properties=additional_properties, is_root_model=is_root_model ) @@ -891,6 +856,13 @@ def _parse_root_node(self): def _extract_namespaces(self): self.namespaces = self.root.nsmap + def _add_refs_for_backrefs(self): + for model in self.models.values(): + for property_id, prop in model.properties: + if prop["type"] == "backref": + referenced_model = self.models[prop["model"]] + referenced_model.add_ref_property(model) + def start(self): self._extract_root() @@ -904,6 +876,8 @@ def start(self): self._compile_nested_properties() + self._add_refs_for_backrefs() + def remove_extra_root_models(self, model: XSDModel) -> XSDModel: """ removes root models that have only one property from the root @@ -1043,18 +1017,6 @@ def _compile_nested_properties(self): self._add_model_nested_properties(parsed_model, parsed_model) - # parsed_model.properties.update(nested_properties) - - # if some nested properties are backrefs and still don't have refs - # (in case o indirect links), we need to add them - - for prop in parsed_model.properties.values(): - if prop.get("type") == "backref": - ref_model_name = prop.get("model") - ref_model = self.models[ref_model_name] - if ref_model: - ref_model.add_ref_property(parsed_model) - parsed_model.properties = dict(sorted(parsed_model.properties.items())) From 34c9996bb309e6774a7983a7975cea99228590ed Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 12 Aug 2024 22:16:30 +0300 Subject: [PATCH 47/62] refs or backres at thhe end --- spinta/manifests/xsd/helpers.py | 13 +++++++++---- tests/manifests/xsd/test_xsd.py | 7 +++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 67215a031..7da2c7855 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -858,7 +858,7 @@ def _extract_namespaces(self): def _add_refs_for_backrefs(self): for model in self.models.values(): - for property_id, prop in model.properties: + for property_id, prop in model.properties.items(): if prop["type"] == "backref": referenced_model = self.models[prop["model"]] referenced_model.add_ref_property(model) @@ -906,22 +906,24 @@ def _remove_proxy_models(self, model: XSDModel): self.new_models[model.name] = model + new_properties = {} for property_id, prop in model.properties.items(): if prop["type"] in ("ref", "backref"): referee = self.models[prop["model"]] parse_referee = True while len(referee.properties) == 1: ref_property_id, ref_prop = list(referee.properties.items())[0] + + # if it's not a ref, this means that it's a final property, and we add it as a property itself if ref_prop["type"] not in ("ref", "backref"): - ref_prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' + prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' parse_referee = False - model.properties[property_id] = ref_prop break if prop["type"] == "backref" and ref_prop["type"] == "backref": + # basically, do nothing break else: - referee = self.models[ref_prop["model"]] if prop["type"] == "ref" and ref_prop["type"] == "backref": prop["type"] = "backref" @@ -933,6 +935,9 @@ def _remove_proxy_models(self, model: XSDModel): if not self._has_backref(model, referee) and parse_referee: self._remove_proxy_models(referee) + new_properties[property_id] = prop + model.properties = new_properties + def _remove_unneeded_models(self): """ Proxy models are those that have only one property which is a ref to another model. diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index 91b036573..ebe006a10 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -294,10 +294,10 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | | | | | | ak | string required | | @ak | | | | | | - | | | | | asmenys | ref | Asmenys | | | | | | | | | | | | id | string required | | @id | | | | | | - | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | | | | | | text | string | | text() | | | | | | + | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | + | | | | | asmenys | ref | Asmenys | | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -1081,3 +1081,6 @@ def test_xsd_enumeration(rc: RawConfig, tmp_path: Path): xsd_file.write(xsd) manifest = load_manifest(rc, path) assert manifest == table + + +# todo add test from gr 605 - refs and arrays From ad6331e2c82bd924018b97aca09925696a90ee69 Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 13 Aug 2024 11:38:37 +0300 Subject: [PATCH 48/62] deleted outdated comments --- spinta/manifests/xsd/helpers.py | 20 +++++---------- tests/manifests/xsd/test_xsd.py | 45 ++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 7da2c7855..addb02490 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -863,6 +863,10 @@ def _add_refs_for_backrefs(self): referenced_model = self.models[prop["model"]] referenced_model.add_ref_property(model) + def _sort_properties_alpabetically(self): + for model in self.models.values(): + model.properties = dict(sorted(model.properties.items())) + def start(self): self._extract_root() @@ -878,6 +882,8 @@ def start(self): self._add_refs_for_backrefs() + self._sort_properties_alpabetically() + def remove_extra_root_models(self, model: XSDModel) -> XSDModel: """ removes root models that have only one property from the root @@ -892,9 +898,6 @@ def remove_extra_root_models(self, model: XSDModel) -> XSDModel: else: stop_removing = True - # todo: what if a root element is an array, like in klasifikatoriai. There can be only one - # root element. So it's rowset, and then inside rowset the re are many rows. - # Is this somehow important for us, for later reading the xml, or not? return model def _remove_proxy_models(self, model: XSDModel): @@ -1022,8 +1025,6 @@ def _compile_nested_properties(self): self._add_model_nested_properties(parsed_model, parsed_model) - parsed_model.properties = dict(sorted(parsed_model.properties.items())) - def read_schema( context: Context, @@ -1112,15 +1113,6 @@ def read_schema( Root model or models can have nested properties if they have any properties that point to other models. - - TODO: there are 3 types of creating references: - through ref - through type - direct, when one element that corresponds to a model is inside another one. This still doesn't work. - It also seems that it even stopped adding models if they are connected this way. - - TODO: JADIS 455 sukuria Asmuo modelį, bet ne property jam modelyje Israsas - """ xsd = XSDReader(path, dataset_name) diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index ebe006a10..22f4ab31e 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -294,10 +294,10 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): | | | | | | | | | | | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | | | | | | ak | string required | | @ak | | | | | | + | | | | | asmenys | ref | Asmenys | | | | | | | | | | | | id | string required | | @id | | | | | | - | | | | | text | string | | text() | | | | | | | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | - | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | text | string | | text() | | | | | | """ path = tmp_path / 'manifest.xsd' @@ -340,22 +340,31 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Resource | | | / | | | | http://www.w3.org/2000/01/rdf-schema#Resource | | Įvairūs duomenys - | | | | | klaida | string | | klaida/text() | | | | | | Klaidos atveju - klaidos pranešimas - | | | | | | | | | | - | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | - | | | | | asmenys | ref | Asmenys | asmenys | | | | | | - | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris - | | | | | asmenys.text | string | | asmenys/text() | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | - | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | - | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris - | | | | | text | string | | text() | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | + | | | | | asmenys | ref | Asmenys | asmenys | | | | | | + | | | | | asmenys.asmuo[] | backref | Asmuo | asmenys/asmuo | | | | | | + | | | | | asmenys.asmuo[].ak | string required | | @ak | | | | | | + | | | | | asmenys.asmuo[].id | string required | | @id | | | | | | + | | | | | asmenys.asmuo[].text | string | | text() | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | asmuo[] | backref | Asmuo | asmuo | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | + | | | | | ak | string required | | @ak | | | | | | + | | | | | asmenys | ref | Asmenys | | | | | | | + | | | | | id | string required | | @id | | | | | | + | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | + | | | | | text | string | | text() | | | | | | """ path = tmp_path / 'manifest.xsd' From 655a42c3fa327762cf3275a4a2ef156947f7d5e0 Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 13 Aug 2024 21:26:33 +0300 Subject: [PATCH 49/62] searching for root elements only --- spinta/manifests/xsd/helpers.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 205a203a7..4da660081 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -407,7 +407,7 @@ def _get_referenced_node(self, node): ref = node.get("ref") if ":" in ref: ref = ref.split(":")[1] - xpath_query = f"//*[@name='{ref}']" + xpath_query = f"/*/*[@name='{ref}']" referenced_node = self.root.xpath(xpath_query)[0] return referenced_node @@ -883,10 +883,13 @@ def _create_model( properties.update(model.properties_from_simple_elements(sequence_node)) # TODO: in this case, it might be something else, not sequence too - if complex_type_node.xpath(f'./*[local-name() = "sequence"]') \ - or complex_type_node.xpath(f'./*[local-name() = "all"]')\ - or complex_type_node.xpath(f'./*[local-name() = "simpleContent"]')\ - or len(complex_type_node) > 0: + if ( + complex_type_node.xpath(f'./*[local-name() = "sequence"]') or + complex_type_node.xpath(f'./*[local-name() = "all"]') or + complex_type_node.xpath(f'./*[local-name() = "simpleContent"]') or + choices or + len(complex_type_node) > 0 + ): """ source: https://stackoverflow.com/questions/36286056/the-difference-between-all-sequence-choice-and-group-in-xsd When to use xsd:all, xsd:sequence, xsd:choice, or xsd:group: From 49dddfac9ad975d006d545fb5ab0550b30140953 Mon Sep 17 00:00:00 2001 From: karina Date: Tue, 13 Aug 2024 21:31:28 +0300 Subject: [PATCH 50/62] searching for root elements only --- spinta/manifests/xsd/helpers.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index addb02490..fd4a3ba6c 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -407,7 +407,7 @@ def _get_referenced_node(self, node): ref = node.get("ref") if ":" in ref: ref = ref.split(":")[1] - xpath_query = f"//*[@name='{ref}']" + xpath_query = f"/*/*[@name='{ref}']" referenced_node = self.root.xpath(xpath_query)[0] return referenced_node @@ -759,10 +759,13 @@ def _create_model( properties.update(model.properties_from_simple_elements(sequence_node)) # TODO: in this case, it might be something else, not sequence too - if complex_type_node.xpath(f'./*[local-name() = "sequence"]') \ - or complex_type_node.xpath(f'./*[local-name() = "all"]')\ - or complex_type_node.xpath(f'./*[local-name() = "simpleContent"]')\ - or len(complex_type_node) > 0: + if ( + complex_type_node.xpath(f'./*[local-name() = "sequence"]') or + complex_type_node.xpath(f'./*[local-name() = "all"]') or + complex_type_node.xpath(f'./*[local-name() = "simpleContent"]') or + choices or + len(complex_type_node) > 0 + ): """ source: https://stackoverflow.com/questions/36286056/the-difference-between-all-sequence-choice-and-group-in-xsd When to use xsd:all, xsd:sequence, xsd:choice, or xsd:group: From 66cc4fca9e788002ce0a2d27a35f5e2e107e40d3 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 14 Aug 2024 15:26:46 +0300 Subject: [PATCH 51/62] fixed removal of unneded models --- spinta/manifests/xsd/helpers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index fd4a3ba6c..ed1cf2709 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -923,6 +923,11 @@ def _remove_proxy_models(self, model: XSDModel): # if it's not a ref, this means that it's a final property, and we add it as a property itself if ref_prop["type"] not in ("ref", "backref"): prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' + prop["required"] = ref_prop["required"] + prop["type"] = ref_prop["type"] + del prop["model"] + property_id = ref_property_id + parse_referee = False break From ffacb8d95b1d0535d00602f76e7e13470a67a7bb Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 14 Aug 2024 17:34:59 +0300 Subject: [PATCH 52/62] when removing proxy models, transfer array to property --- spinta/manifests/xsd/helpers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index ed1cf2709..f8658e7c7 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -923,11 +923,21 @@ def _remove_proxy_models(self, model: XSDModel): # if it's not a ref, this means that it's a final property, and we add it as a property itself if ref_prop["type"] not in ("ref", "backref"): prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' + + # also transfer all attributes of the property prop["required"] = ref_prop["required"] + + is_array = False + if prop["type"] == "backref": + is_array = True + prop["type"] = ref_prop["type"] del prop["model"] property_id = ref_property_id + if is_array: + property_id = f"{property_id}[]" + parse_referee = False break From a706a9a133fad9a72889205776555bb5aa9e55f4 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 14 Aug 2024 18:56:40 +0300 Subject: [PATCH 53/62] prop required is transfered when model is removed --- spinta/manifests/xsd/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index f8658e7c7..45fef46cf 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -926,6 +926,7 @@ def _remove_proxy_models(self, model: XSDModel): # also transfer all attributes of the property prop["required"] = ref_prop["required"] + prop["description"] = ref_prop["description"] is_array = False if prop["type"] == "backref": @@ -946,6 +947,7 @@ def _remove_proxy_models(self, model: XSDModel): break else: referee = self.models[ref_prop["model"]] + referee.parent_model = model if prop["type"] == "ref" and ref_prop["type"] == "backref": prop["type"] = "backref" property_id = f"{property_id}[]" @@ -953,6 +955,7 @@ def _remove_proxy_models(self, model: XSDModel): prop["external"]["name"] = f'{prop["external"]["name"]}/{ref_prop["external"]["name"]}' prop["model"] = ref_prop["model"] + if not self._has_backref(model, referee) and parse_referee: self._remove_proxy_models(referee) From fef50bd843d6bcde40d10437c96ae45e4279cef2 Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 14 Aug 2024 19:57:27 +0300 Subject: [PATCH 54/62] improved detecting required properties --- spinta/manifests/xsd/helpers.py | 34 +++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 45fef46cf..09a5dcdf7 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -735,14 +735,13 @@ def _create_model( choices = complex_type_node.xpath(f'./*[local-name() = "choice"]/*[local-name() = "choice"]') else: choices = complex_type_node.xpath(f'./*[local-name() = "sequence"]/*[local-name() = "choice"]') - if choices: - if choices[0].get("maxOccurs") != "unbounded": - return self._split_choice( - node, - source_path=source_path, - parent_model=parent_model, - is_root_model=is_root_model - ) + if choices and choices[0].get("maxOccurs") != "unbounded": + return self._split_choice( + node, + source_path=source_path, + parent_model=parent_model, + is_root_model=is_root_model + ) # if complextype node's property mixed is true, it allows text inside if complex_type_node.get("mixed") == "true": @@ -814,6 +813,9 @@ def _create_model( sequence_or_all_node, model=model, source_path=new_source_path) + if properties_required is False: + for prop in properties_from_references.values(): + prop["required"] = False properties.update(properties_from_references) # complex type child nodes - to models @@ -821,6 +823,9 @@ def _create_model( sequence_or_all_node, model=model, source_path=new_source_path) + if properties_required is False: + for prop in properties_from_references.values(): + prop["required"] = False properties.update(properties_from_references) model.properties = properties @@ -1144,3 +1149,16 @@ def read_schema( for model_name, parsed_model in xsd.models.items(): yield None, parsed_model.get_data() + + +# todo šitoj situacijoj neturėtų būti required: +# < xs: choice +# minOccurs = "0" +# maxOccurs = "unbounded" > +# < xs: element +# ref = "PAVARDE" / > +# < xs: element +# ref = "VARDAS" / > +# < xs: element +# ref = "PAVADINIMAS" / > +# < / xs: choice > From 83b09686d24b03ab75f1788ac491d1fd572ab8b0 Mon Sep 17 00:00:00 2001 From: karina Date: Mon, 19 Aug 2024 15:27:54 +0300 Subject: [PATCH 55/62] #622 when proxy model referes to ab array property, the resulting property has to be array --- spinta/manifests/xsd/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 09a5dcdf7..19e58b916 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -578,7 +578,8 @@ def _properties_from_references( referenced_element = self._get_referenced_node(ref_element) if self.node_is_simple_type_or_inline(referenced_element): - property_id, prop = model.simple_element_to_property(referenced_element) + is_array = XSDReader.is_array(ref_element) + property_id, prop = model.simple_element_to_property(referenced_element, is_array=is_array) if not XSDReader.is_required(ref_element): prop["required"] = False properties[property_id] = prop @@ -1162,3 +1163,4 @@ def read_schema( # < xs: element # ref = "PAVADINIMAS" / > # < / xs: choice > +# todo 47 - dokumentas turėtų būti array, bet ne ref, o paprasta savybė array From 94f472e4f799b346e2e95bb3bcaa954ae4dbf4ee Mon Sep 17 00:00:00 2001 From: karina Date: Wed, 21 Aug 2024 21:10:32 +0300 Subject: [PATCH 56/62] #622 all tests are passing --- tests/manifests/xsd/test_helpers.py | 120 +++++++--------------------- tests/manifests/xsd/test_xsd.py | 71 ++++++++-------- 2 files changed, 65 insertions(+), 126 deletions(-) diff --git a/tests/manifests/xsd/test_helpers.py b/tests/manifests/xsd/test_helpers.py index 95e169456..262e22b23 100644 --- a/tests/manifests/xsd/test_helpers.py +++ b/tests/manifests/xsd/test_helpers.py @@ -80,8 +80,7 @@ def test_get_property_type(): schema = etree.fromstring(element_string) element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") - model = XSDModel(xsd, schema) - result = model.get_property_type(element) + result = xsd.get_property_type(element) assert result == "string" @@ -103,8 +102,7 @@ def test_get_property_type_ref(): element = schema.xpath('//*[@ref="asmenys"]', )[0] print("ELEMENT:", element) xsd = XSDReader("test.xsd", "dataset1") - model = XSDModel(xsd, schema) - result = model.get_property_type(element) + result = xsd.get_property_type(element) assert result == "ref" @@ -126,8 +124,7 @@ def test_get_property_type_simple_type(): schema = etree.fromstring(element_string) element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") - model = XSDModel(xsd, schema) - result = model.get_property_type(element) + result = xsd.get_property_type(element) assert result == "string" @@ -144,8 +141,7 @@ def test_get_property_type_custom(): element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") xsd.custom_types = {"some_type": {"base": "string"}} - model = XSDModel(xsd, schema) - result = model.get_property_type(element) + result = xsd.get_property_type(element) assert result == "string" @@ -161,8 +157,7 @@ def test_get_property_type_unknown(): schema = etree.fromstring(element_string) element = schema.xpath('*[local-name() = "element"]')[0] xsd = XSDReader("test.xsd", "dataset1") - model = XSDModel(xsd, schema) - result = model.get_property_type(element) + result = xsd.get_property_type(element) assert result == "string" @@ -716,7 +711,7 @@ def test_properties_from_references(): model = XSDModel(xsd, schema) result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == ({ + assert result == { 'ct_e200_fc_id': { 'description': 'E200 duomenų kompozicijos unikalus identifikatorius', 'enums': {}, @@ -734,8 +729,8 @@ def test_properties_from_references(): }, 'required': True, 'type': 'integer', - }, - }, {}) + } + } def test_properties_from_references_complex_not_array(): @@ -780,13 +775,14 @@ def test_properties_from_references_complex_not_array(): xsd = XSDReader("test.xsd", "dataset1") xsd.root = schema model = XSDModel(xsd, schema) + xsd.namespaces = [] result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == ({ + assert result == { 'fiziniai_asmenys': { 'description': '', 'enums': {}, - 'external': {'name': ''}, + 'external': {'name': 'FIZINIAI_ASMENYS'}, 'model': 'test/FiziniaiAsmenys', 'required': True, 'type': 'ref', @@ -794,39 +790,15 @@ def test_properties_from_references_complex_not_array(): 'objektai': { 'description': '', 'enums': {}, - 'external': {'name': ''}, + 'external': {'name': 'OBJEKTAI'}, 'model': 'test/Objektai', 'required': True, 'type': 'ref', }, - }, - {'test/FiziniaiAsmenys': {'objektu_asmenys[]': {'description': '', - 'enums': {}, - 'external': { - 'name': 'FIZINIAI_ASMENYS/OBJEKTU_ASMENYS/text()'}, - 'required': False, - 'type': 'string'}, - 'tekstiniai_duomenys[]': {'description': '', - 'enums': {}, - 'external': { - 'name': 'FIZINIAI_ASMENYS/TEKSTINIAI_DUOMENYS/text()'}, - 'required': False, - 'type': 'string'}}, - 'test/Objektai': {'objektu_asmenys[]': {'description': '', - 'enums': {}, - 'external': {'name': 'OBJEKTAI/OBJEKTU_ASMENYS/text()'}, - 'required': False, - 'type': 'string'}, - 'tekstiniai_duomenys[]': {'description': '', - 'enums': {}, - 'external': { - 'name': 'OBJEKTAI/TEKSTINIAI_DUOMENYS/text()'}, - 'required': False, - 'type': 'string'}}} - ) - - - assert xsd.models[0].get_data() == { + } + + + assert list(xsd.models.values())[0].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', 'external': { 'dataset': 'test', @@ -857,7 +829,7 @@ def test_properties_from_references_complex_not_array(): 'type': 'model', } - assert xsd.models[1].get_data() == { + assert list(xsd.models.values())[1].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', 'external': { 'dataset': 'test', @@ -933,13 +905,14 @@ def test_properties_from_references_complex_array(): xsd.root = schema model = XSDModel(xsd, schema) model.set_name("test") + xsd.namespaces = [] result = xsd._properties_from_references(sequence, model, source_path="tst") - assert result == ({ + assert result == { 'fiziniai_asmenys[]': { 'description': '', 'enums': {}, - 'external': {'name': ''}, + 'external': {'name': 'FIZINIAI_ASMENYS'}, 'model': 'test/FiziniaiAsmenys', 'required': False, 'type': 'backref', @@ -947,43 +920,15 @@ def test_properties_from_references_complex_array(): 'objektai[]': { 'description': '', 'enums': {}, - 'external': {'name': ''}, + 'external': {'name': 'OBJEKTAI'}, 'model': 'test/Objektai', 'required': True, 'type': 'backref', - }, - }, - {'test/FiziniaiAsmenys[]': {'objektu_asmenys[]': {'description': '', - 'enums': {}, - 'external': { - 'name': 'FIZINIAI_ASMENYS/OBJEKTU_ASMENYS/text()'}, - 'required': False, - 'type': 'string'}, - 'tekstiniai_duomenys[]': {'description': '', - 'enums': {}, - 'external': { - 'name': 'FIZINIAI_ASMENYS/TEKSTINIAI_DUOMENYS/text()'}, - 'required': False, - 'type': 'string'}, - 'test': {'model': 'test/test', - 'type': 'ref'}}, - 'test/Objektai[]': {'objektu_asmenys[]': {'description': '', - 'enums': {}, - 'external': {'name': 'OBJEKTAI/OBJEKTU_ASMENYS/text()'}, - 'required': False, - 'type': 'string'}, - 'tekstiniai_duomenys[]': {'description': '', - 'enums': {}, - 'external': { - 'name': 'OBJEKTAI/TEKSTINIAI_DUOMENYS/text()'}, - 'required': False, - 'type': 'string'}, - 'test': {'model': 'test/test', - 'type': 'ref'}}}, - ) - - - assert xsd.models[0].get_data() == { + } + } + + + assert list(xsd.models.values())[0].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', 'external': { 'dataset': 'test', @@ -1010,16 +955,12 @@ def test_properties_from_references_complex_array(): 'required': False, 'type': 'string', }, - 'test': { - 'model': 'test/test', - 'type': 'ref', - }, }, 'type': 'model', } - assert xsd.models[1].get_data() == { + assert list(xsd.models.values())[1].get_data() == { 'description': 'Pagrindiniai juridinio asmens duomenys.', 'external': { 'dataset': 'test', @@ -1046,10 +987,6 @@ def test_properties_from_references_complex_array(): 'required': False, 'type': 'string', }, - 'test': { - 'model': 'test/test', - 'type': 'ref', - }, }, 'type': 'model', } @@ -1215,6 +1152,7 @@ def test_properties_from_simple_elements(): xsd = XSDReader("test.xsd", "dataset1") xsd.root = schema model = XSDModel(xsd, schema) + xsd.namespaces = [] result = model.properties_from_simple_elements(schema) assert result == { 'ct_e200_forma': { @@ -1249,6 +1187,7 @@ def test_properties_from_simple_elements_mix(): xsd = XSDReader("test.xsd", "dataset1") xsd.root = schema model = XSDModel(xsd, schema) + xsd.namespaces = [] result = model.properties_from_simple_elements(schema) assert result == { 'ct_e200_forma': { @@ -1297,6 +1236,7 @@ def test_properties_from_simple_elements_not_from_sequence(): xsd = XSDReader("test.xsd", "dataset1") xsd.root = schema model = XSDModel(xsd, schema) + xsd.namespaces = [] result = model.properties_from_simple_elements(schema) assert result == { 'ct_e200_forma': { diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index 22f4ab31e..87fbb665b 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -235,7 +235,7 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): - + @@ -244,7 +244,7 @@ def test_xsd_ref(rc: RawConfig, tmp_path: Path): - + @@ -340,31 +340,23 @@ def test_xsd_resource_model(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | - | | | | | asmenys | ref | Asmenys | asmenys | | | | | | - | | | | | asmenys.asmuo[] | backref | Asmuo | asmenys/asmuo | | | | | | - | | | | | asmenys.asmuo[].ak | string required | | @ak | | | | | | - | | | | | asmenys.asmuo[].id | string required | | @id | | | | | | - | | | | | asmenys.asmuo[].text | string | | text() | | | | | | - | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris - | | | | | asmenys.text | string | | asmenys/text() | | | | | | - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | - | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | - | | | | | asmuo[] | backref | Asmuo | asmuo | | | | | | - | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris - | | | | | text | string | | text() | | | | | | - | | | | | | | | | | - | | | | Asmuo | | | /klientu_saraso_rezultatas/asmenys/asmuo | | | | | | - | | | | | ak | string required | | @ak | | | | | | - | | | | | asmenys | ref | Asmenys | | | | | | | - | | | | | id | string required | | @id | | | | | | - | | | | | klientu_saraso_rezultatas | ref | KlientuSarasoRezultatas | | | | | | | - | | | | | text | string | | text() | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Resource | | | / | | | | http://www.w3.org/2000/01/rdf-schema#Resource | | Įvairūs duomenys + | | | | | klaida | string | | klaida/text() | | | | | | Klaidos atveju - klaidos pranešimas + | | | | | | | | | | + | | | | KlientuSarasoRezultatas | | | /klientu_saraso_rezultatas | | | | | | + | | | | | asmenys | ref | Asmenys | asmenys | | | | | | + | | | | | asmenys.puslapis | integer required | | asmenys/@puslapis | | | | | | rezultatu puslapio numeris + | | | | | asmenys.text | string | | asmenys/text() | | | | | | + | | | | | text | string | | text() | | | | | | + | | | | | | | | | | + | | | | Asmenys | | | /klientu_saraso_rezultatas/asmenys | | | | | | + | | | | | puslapis | integer required | | @puslapis | | | | | | rezultatu puslapio numeris + | | | | | text | string | | text() | | | | | | + """ path = tmp_path / 'manifest.xsd' @@ -634,7 +626,7 @@ def test_xsd_attributes(rc: RawConfig, tmp_path: Path): | | resource1 | xml | | | | | | | | | | | | | | | | | | | | | | Salygos | | | /SALYGOS | | | | | | - | | | | | salyga[] | backref required | Salyga | SALYGA | | | | | | + | | | | | salyga[] | backref | Salyga | SALYGA | | | | | | | | | | | salyga[].aprasymas | string | | APRASYMAS/text() | | | | | | | | | | | salyga[].kodas | string | | @kodas | | | | | | | | | | | salyga[].nr | integer | | @nr | | | | | | @@ -699,15 +691,16 @@ def test_xsd_model_one_property(rc: RawConfig, tmp_path: Path): | | resource1 | xml | | | | | | | | | | | | | | | | | | | | | | GetTzByTRAResponse | | | /getTzByTRAResponse | | | | | | + | | | | | aprasymas | string | | klaida/Aprasymas/text() | | | | | | | | | | | extracttz | ref | Extracttz | extracttz | | | | | | | | | | | extracttz.extract_preparation_time | datetime | | extracttz/extractPreparationTime/text() | | | | | | | | | | | extracttz.phipoteka | integer | | extracttz/phipoteka/text() | | | | | | - | | | | | klaida | string | | klaida/Aprasymas/text() | | | | | | | | | | | search_parameters | string | | searchParameters/text() | | | | | | | | | | | | | | | | | | | | Extracttz | | | /getTzByTRAResponse/extracttz | | | | | | | | | | | extract_preparation_time | datetime | | extractPreparationTime/text() | | | | | | | | | | | phipoteka | integer | | phipoteka/text() | | | | | | + """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: @@ -963,13 +956,19 @@ def test_xsd_recursion(rc: RawConfig, tmp_path: Path): """ table = """ - id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description - | manifest | | | | | | | | | - | | resource1 | xml | | | | | | | | - | | | | | | | | | | - | | | | Data | | | /data | | | | | | - | | | | | response_data | string required | | responseData/actions/action/code/text() | | | | | | Paslaugos kodas (RC kodas) - | | | | | response_message | string | | responseMessage/text() | | | | | | + id | d | r | b | m | property | type | ref | source | prepare | level | access | uri | title | description + | manifest | | | | | | | | | + | | resource1 | xml | | | | | | | | + | | | | | | | | | | + | | | | Data | | | /data | | | | | | + | | | | | response_data[] | backref | Actions | responseData/actions | | | | | | + | | | | | response_data[].code[] | string required | | action/code/text() | | | | | | Paslaugos kodas (RC kodas) + | | | | | response_message | string | | responseMessage/text() | | | | | | + | | | | | | | | | | + | | | | Actions | | | /data/responseData/actions | | | | | | + | | | | | code[] | string required | | action/code/text() | | | | | | Paslaugos kodas (RC kodas) + | | | | | data | ref | Data | | | | | | | + """ path = tmp_path / 'manifest.xsd' with open(path, "w") as xsd_file: From f73c444417ab66bc64e8fd4e31b0c72835a88391 Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 22 Aug 2024 12:14:12 +0300 Subject: [PATCH 57/62] #622 removed comment --- tests/manifests/xsd/test_xsd.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index 87fbb665b..37e9f12f4 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -1089,6 +1089,3 @@ def test_xsd_enumeration(rc: RawConfig, tmp_path: Path): xsd_file.write(xsd) manifest = load_manifest(rc, path) assert manifest == table - - -# todo add test from gr 605 - refs and arrays From 8a103cbe9c4853c718fd181292439035ca4bf91d Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 22 Aug 2024 14:38:52 +0300 Subject: [PATCH 58/62] #622 refactored to remove duplicate code --- spinta/manifests/xsd/helpers.py | 131 ++++++++++---------------------- 1 file changed, 42 insertions(+), 89 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index d04806d0f..b94f53a01 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -525,45 +525,48 @@ def _properties_from_type_references( is_array = False - # TODO fix this because it probably doesn't cover all cases, only something like - # https://github.com/atviriduomenys/spinta/issues/613 - sequences = complex_type.xpath("./*[local-name() = 'sequence']") - if not sequences: - choices = complex_type.xpath("./*[local-name() = 'choice']") - if choices and choices[0].get('maxOccurs') == 'unbounded': - is_array = True - - if XSDReader.is_array(typed_element): - is_array = True + referenced_element = typed_element # avoiding recursion - if typed_element.get("name") in source_path.split("/"): + if referenced_element.get("name") in source_path.split("/"): continue - if is_array: - property_type = "backref" - - else: - property_type = "ref" - referenced_model_names = self._create_model( - typed_element, - source_path=new_source_path, - parent_model=model, - ) - - for referenced_model_name in referenced_model_names: - property_id, prop = model.simple_element_to_property(typed_element, is_array=is_array) - prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') - if is_array: - if not property_id.endswith("[]"): - property_id += "[]" - property_type = "backref" - prop["type"] = property_type - prop["model"] = f"{referenced_model_name}" - properties[property_id] = prop + built_properties = self._build_properties(complex_type, is_array, model, new_source_path, node, + properties, referenced_element) + properties.update(built_properties) return properties + def _build_properties(self, complex_type: _Element, is_array: bool, model: XSDModel, new_source_path: str, node: _Element, properties: dict, referenced_element: _Element) -> dict: + sequences = complex_type.xpath("./*[local-name() = 'sequence']") + if not sequences: + choices = complex_type.xpath("./*[local-name() = 'choice']") + if choices and XSDReader.is_array(choices[0]): + if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": + is_array = True + # if we only have one ref element and if it's inside a choice/sequence (this node) which is maxOccurs = unbounded then it's array + if XSDReader.is_array(node) and len(node) == 1: + is_array = True + if is_array: + property_type = "backref" + else: + property_type = "ref" + referenced_model_names = self._create_model( + referenced_element, + source_path=new_source_path, + parent_model=model, + ) + for referenced_model_name in referenced_model_names: + property_id, prop = model.simple_element_to_property(referenced_element, is_array=is_array) + prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') + if is_array: + if not property_id.endswith("[]"): + property_id += "[]" + property_type = "backref" + prop["type"] = property_type + prop["model"] = f"{referenced_model_name}" + properties[property_id] = prop + def _properties_from_references( self, node: _Element, @@ -585,54 +588,18 @@ def _properties_from_references( properties[property_id] = prop else: is_array = False - # TODO fix this because it probably doesn't cover all cases, only something like - # also it covers choice now. - # https://github.com/atviriduomenys/spinta/issues/613 - complex_type = referenced_element.xpath("./*[local-name() = 'complexType']")[0] - sequences = complex_type.xpath("./*[local-name() = 'sequence']") - if not sequences: - choices = complex_type.xpath("./*[local-name() = 'choice']") - if choices and XSDReader.is_array(choices[0]): - # we only make this array if it's only one choice, which means it's a wrapper for an array - # also, if it's mixed, and has choices inside, it's not an array even if choices are unbound - if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": - is_array = True - - # if we only have one ref element and if it's inside a choice/sequence (this node) which is maxOccurs = unbounded then it's array - if XSDReader.is_array(node) and len(node) == 1: + if XSDReader.is_array(ref_element): is_array = True + complex_type = referenced_element.xpath("./*[local-name() = 'complexType']")[0] + # avoiding recursion if referenced_element.get("name") in source_path.split("/"): continue - if XSDReader.is_array(ref_element): - is_array = True - - if is_array: - property_type = "backref" - else: - property_type = "ref" - referenced_model_names = self._create_model( - referenced_element, - source_path=new_source_path, - parent_model=model - ) - - for referenced_model_name in referenced_model_names: - property_id, prop = model.simple_element_to_property(ref_element, is_array=is_array) - prop['external']['name'] = prop['external']['name'].rstrip('/text()') - - if is_array: - if not property_id.endswith("[]"): - property_id += "[]" - property_type = "backref" - - prop['type'] = property_type - prop['model'] = f'{referenced_model_name}' - - properties[property_id] = prop + built_properties = self._build_properties(complex_type, is_array, model, new_source_path, node, properties, referenced_element) + properties.update(built_properties) return properties @@ -757,7 +724,6 @@ def _create_model( if complex_content_base_node.xpath(f'./*[local-name() = "sequence"]'): sequence_node = complex_content_base_node.xpath(f'./*[local-name() = "sequence"]')[0] properties.update(model.properties_from_simple_elements(sequence_node)) - # TODO: in this case, it might be something else, not sequence too if ( complex_type_node.xpath(f'./*[local-name() = "sequence"]') or @@ -1106,9 +1072,9 @@ def read_schema( B2. If maxOccurs = 1 B21. If both have annotation, then it's a model B22. If only one of them has annotation, it's a part of a path - todo finish defining behaviours of different options for sequences - c) element has a choice. todo define behaviour here + + c) element has a choice. 4. complex type described separately @@ -1150,16 +1116,3 @@ def read_schema( for model_name, parsed_model in xsd.models.items(): yield None, parsed_model.get_data() - - -# todo šitoj situacijoj neturėtų būti required: -# < xs: choice -# minOccurs = "0" -# maxOccurs = "unbounded" > -# < xs: element -# ref = "PAVARDE" / > -# < xs: element -# ref = "VARDAS" / > -# < xs: element -# ref = "PAVADINIMAS" / > -# < / xs: choice > From 75c03d42a34adca4270f1f2059bd9ae87f1c7f8b Mon Sep 17 00:00:00 2001 From: karina Date: Thu, 22 Aug 2024 17:43:55 +0300 Subject: [PATCH 59/62] added note about ref and backref when choice maxoccurs=unbound --- spinta/manifests/xsd/helpers.py | 83 +++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index b94f53a01..050361bb9 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -502,6 +502,50 @@ def is_required(element: _Element) -> bool: return True return False + def _build_ref_properties(self, complex_type: _Element, is_array: bool, model: XSDModel, new_source_path: str, node: _Element, referenced_element: _Element) -> dict[str, dict[str, str | bool | dict[str, str | dict[str, Any]]]]: + """ + Helper method for methods properties_from_references and properties_from_type_references + """ + + properties = {} + + sequences = complex_type.xpath("./*[local-name() = 'sequence']") + if not sequences: + choices = complex_type.xpath("./*[local-name() = 'choice']") + if choices and XSDReader.is_array(choices[0]): + if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": + is_array = True + + # NOTE: it's not fully clear, if it's a ref or a backref if `choice` `maxOccurs="unbounded" always, or only + # when there's only one element inside + # if we only have one ref element and if it's inside a choice/sequence (this node) which is maxOccurs = unbounded then it's array + # if XSDReader.is_array(node) and len(node) == 1: + if XSDReader.is_array(node): + is_array = True + if XSDReader.is_array(referenced_element): + is_array = True + if is_array: + property_type = "backref" + else: + property_type = "ref" + referenced_model_names = self._create_model( + referenced_element, + source_path=new_source_path, + parent_model=model, + ) + for referenced_model_name in referenced_model_names: + property_id, prop = model.simple_element_to_property(referenced_element, is_array=is_array) + prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') + if is_array: + if not property_id.endswith("[]"): + property_id += "[]" + property_type = "backref" + prop["type"] = property_type + prop["model"] = f"{referenced_model_name}" + properties[property_id] = prop + + return properties + def _properties_from_type_references( self, node: _Element, @@ -531,42 +575,12 @@ def _properties_from_type_references( if referenced_element.get("name") in source_path.split("/"): continue - built_properties = self._build_properties(complex_type, is_array, model, new_source_path, node, - properties, referenced_element) + built_properties = self._build_ref_properties(complex_type, is_array, model, new_source_path, node, + referenced_element) properties.update(built_properties) return properties - def _build_properties(self, complex_type: _Element, is_array: bool, model: XSDModel, new_source_path: str, node: _Element, properties: dict, referenced_element: _Element) -> dict: - sequences = complex_type.xpath("./*[local-name() = 'sequence']") - if not sequences: - choices = complex_type.xpath("./*[local-name() = 'choice']") - if choices and XSDReader.is_array(choices[0]): - if len(choices[0]) == 1 and not complex_type.get("mixed") == "true": - is_array = True - # if we only have one ref element and if it's inside a choice/sequence (this node) which is maxOccurs = unbounded then it's array - if XSDReader.is_array(node) and len(node) == 1: - is_array = True - if is_array: - property_type = "backref" - else: - property_type = "ref" - referenced_model_names = self._create_model( - referenced_element, - source_path=new_source_path, - parent_model=model, - ) - for referenced_model_name in referenced_model_names: - property_id, prop = model.simple_element_to_property(referenced_element, is_array=is_array) - prop["external"]["name"] = prop["external"]["name"].replace("/text()", '') - if is_array: - if not property_id.endswith("[]"): - property_id += "[]" - property_type = "backref" - prop["type"] = property_type - prop["model"] = f"{referenced_model_name}" - properties[property_id] = prop - def _properties_from_references( self, node: _Element, @@ -598,7 +612,7 @@ def _properties_from_references( if referenced_element.get("name") in source_path.split("/"): continue - built_properties = self._build_properties(complex_type, is_array, model, new_source_path, node, properties, referenced_element) + built_properties = self._build_ref_properties(complex_type, is_array, model, new_source_path, node, referenced_element) properties.update(built_properties) return properties @@ -898,7 +912,8 @@ def _remove_proxy_models(self, model: XSDModel): # also transfer all attributes of the property prop["required"] = ref_prop["required"] - prop["description"] = ref_prop["description"] + if ref_prop["description"]: + prop["description"] = ref_prop["description"] is_array = False if prop["type"] == "backref": From cdffcbf59aab9f8f51bb1a9ca3659b25bf5a0e0e Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 23 Aug 2024 13:38:22 +0300 Subject: [PATCH 60/62] #622 description is form child model, unless it's empty, then it's from parent model when trimming proxy models --- spinta/manifests/xsd/helpers.py | 11 +++++++---- tests/manifests/xsd/test_helpers.py | 8 ++++---- tests/manifests/xsd/test_xsd.py | 6 +++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/spinta/manifests/xsd/helpers.py b/spinta/manifests/xsd/helpers.py index 050361bb9..9e3b9a402 100644 --- a/spinta/manifests/xsd/helpers.py +++ b/spinta/manifests/xsd/helpers.py @@ -594,6 +594,10 @@ def _properties_from_references( new_source_path = source_path referenced_element = self._get_referenced_node(ref_element) + # avoiding recursion + if referenced_element.get("name") in source_path.split("/"): + continue + if self.node_is_simple_type_or_inline(referenced_element): is_array = XSDReader.is_array(ref_element) property_id, prop = model.simple_element_to_property(referenced_element, is_array=is_array) @@ -608,11 +612,10 @@ def _properties_from_references( complex_type = referenced_element.xpath("./*[local-name() = 'complexType']")[0] - # avoiding recursion - if referenced_element.get("name") in source_path.split("/"): - continue - built_properties = self._build_ref_properties(complex_type, is_array, model, new_source_path, node, referenced_element) + for prop in built_properties.values(): + if not XSDReader.is_required(ref_element): + prop["required"] = False properties.update(built_properties) return properties diff --git a/tests/manifests/xsd/test_helpers.py b/tests/manifests/xsd/test_helpers.py index 262e22b23..a1187bd5b 100644 --- a/tests/manifests/xsd/test_helpers.py +++ b/tests/manifests/xsd/test_helpers.py @@ -780,7 +780,7 @@ def test_properties_from_references_complex_not_array(): assert result == { 'fiziniai_asmenys': { - 'description': '', + 'description': 'Pagrindiniai juridinio asmens duomenys.', 'enums': {}, 'external': {'name': 'FIZINIAI_ASMENYS'}, 'model': 'test/FiziniaiAsmenys', @@ -788,7 +788,7 @@ def test_properties_from_references_complex_not_array(): 'type': 'ref', }, 'objektai': { - 'description': '', + 'description': 'Pagrindiniai juridinio asmens duomenys.', 'enums': {}, 'external': {'name': 'OBJEKTAI'}, 'model': 'test/Objektai', @@ -910,7 +910,7 @@ def test_properties_from_references_complex_array(): assert result == { 'fiziniai_asmenys[]': { - 'description': '', + 'description': 'Pagrindiniai juridinio asmens duomenys.', 'enums': {}, 'external': {'name': 'FIZINIAI_ASMENYS'}, 'model': 'test/FiziniaiAsmenys', @@ -918,7 +918,7 @@ def test_properties_from_references_complex_array(): 'type': 'backref', }, 'objektai[]': { - 'description': '', + 'description': 'Pagrindiniai juridinio asmens duomenys.', 'enums': {}, 'external': {'name': 'OBJEKTAI'}, 'model': 'test/Objektai', diff --git a/tests/manifests/xsd/test_xsd.py b/tests/manifests/xsd/test_xsd.py index 37e9f12f4..cce73a801 100644 --- a/tests/manifests/xsd/test_xsd.py +++ b/tests/manifests/xsd/test_xsd.py @@ -476,13 +476,13 @@ def test_xsd_choice(rc: RawConfig, tmp_path: Path): | | resource1 | xml | | | | | | | | | | | | | | | | | | | | | | Parcels | | | /parcels | | | | | | Pasikeitusių žemės sklypų sąrašas - | | | | | parcel1[] | backref | Parcel2 | parcel | | | | | | + | | | | | parcel1[] | backref | Parcel2 | parcel | | | | | | Žemės sklypo pasikeitimo informacija | | | | | parcel1[].sign_of_change | integer required | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis | | enum | | 1 | | | | | | | | | | 2 | | | | | | | | | | 3 | | | | | | | | | | | parcel1[].text | string | | text() | | | | | | - | | | | | parcel[] | backref | Parcel1 | parcel | | | | | | + | | | | | parcel[] | backref | Parcel1 | parcel | | | | | | Žemės sklypo pasikeitimo informacija | | | | | parcel[].parcel_unique_number | integer required | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris | | | | | parcel[].text | string | | text() | | | | | | | | | | | text | string | | text() | | | | | | @@ -564,7 +564,7 @@ def test_xsd_choice_max_occurs_unbounded(rc: RawConfig, tmp_path: Path): | | resource1 | xml | | | | | | | | | | | | | | | | | | | | | | Parcels | | | /parcels | | | | | | Pasikeitusių žemės sklypų sąrašas - | | | | | parcel[] | backref | Parcel | parcel | | | | | | + | | | | | parcel[] | backref | Parcel | parcel | | | | | | Žemės sklypo pasikeitimo informacija | | | | | parcel[].parcel_unique_number | integer | | parcel_unique_number/text() | | | | | | Žemės sklypo unikalus numeris | | | | | parcel[].sign_of_change | integer | | sign_of_change/text() | | | | | | Žemės sklypo pasikeitimo požymis | | enum | | 1 | | | | | | From ea63c688370db60b77cc5e03223b92f0e3edc9c1 Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 23 Aug 2024 14:38:55 +0300 Subject: [PATCH 61/62] fixed typo in readme --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 5b13273f4..c8cf85b03 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ .. default-role:: literal Spinta is a command line tool and REST JSON API service for publishing and -mapping data between different phisical data models, JSON API and semantic data +mapping data between different physical data models, JSON API and semantic data models. It supports a great deal of data schemes and formats. .. image:: https://gitlab.com/atviriduomenys/spinta/badges/master/pipeline.svg @@ -12,7 +12,7 @@ models. It supports a great deal of data schemes and formats. :: - Phisical data Different Real time REST JSON + Physical data Different Real time REST JSON sources formats transformation API +-----+ From e8e5761ab913b3f16fc699bfdd617b3ef6826f1f Mon Sep 17 00:00:00 2001 From: karina Date: Fri, 23 Aug 2024 15:55:54 +0300 Subject: [PATCH 62/62] entry in changes.rst --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index c14f50221..7aade8309 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,6 +3,13 @@ Changes ####### +0.1.68 (2024-06-27) +=================== + +- Nested properties for XSD. (`#622`_). + + .. _#622: https://github.com/atviriduomenys/spinta/issues/622 + 0.1.64 (unreleased) ===================