Merge pull request #397 from OpenDataServices/cove-1366-org-file-orde…

…ring-no-gha-change sort_xml: Handle extensions in the schema xsd correctly when sorting
OpenDataServices · Jul 5, 2022 · b5619d6 · b5619d6
2 parents 84d83cb + 1adfb33
commit b5619d6
Show file tree

Hide file tree

Showing 8 changed files with 909 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+## [0.17.2] - 2022-06-15
+
+### Fixed
+
+- Handle extensions in the schema xsd correctly when sorting https://github.com/OpenDataServices/cove/issues/1366
+
 ## [0.17.1] - 2021-07-21
 
 ### Fixed

diff --git a/examples/iati/iati-organisations-schema.xsd b/examples/iati/iati-organisations-schema.xsd
diff --git a/flatten-tool b/flatten-tool
@@ -1,3 +1,4 @@
 #!/usr/bin/env python
 import flattentool.cli
+
 flattentool.cli.main()
diff --git a/flattentool/sort_xml.py b/flattentool/sort_xml.py
@@ -74,6 +74,23 @@ def get_schema_element(self, tag_name, name_attribute):
                 return schema_element
         return schema_element
 
+    def handle_complexType(self, complexType):
+        type_elements = []
+        if complexType is not None:
+            extension = complexType.find(
+                "xsd:complexContent/xsd:extension", namespaces=namespaces
+            )
+            if extension:
+                base = extension.attrib.get("base")
+                complexType = self.get_schema_element("complexType", base)
+                type_elements = self.handle_complexType(complexType)
+            else:
+                type_elements = []
+            type_elements += complexType.findall(
+                "xsd:choice/xsd:element", namespaces=namespaces
+            ) + complexType.findall("xsd:sequence/xsd:element", namespaces=namespaces)
+        return type_elements
+
     def element_loop(self, element, path):
         """
         Return information about the children of the supplied element.
@@ -82,12 +99,7 @@ def element_loop(self, element, path):
         type_elements = []
         if "type" in a:
             complexType = self.get_schema_element("complexType", a["type"])
-            if complexType is not None:
-                type_elements = complexType.findall(
-                    "xsd:choice/xsd:element", namespaces=namespaces
-                ) + complexType.findall(
-                    "xsd:sequence/xsd:element", namespaces=namespaces
-                )
+            type_elements += self.handle_complexType(complexType)
 
         children = (
             element.findall(

diff --git a/flattentool/tests/fixtures/iati-org-with-documents.xml b/flattentool/tests/fixtures/iati-org-with-documents.xml
@@ -0,0 +1,67 @@
+<?xml version='1.0' encoding='utf-8'?>
+<iati-organisations version="@generated-datetime">
+  <!--XML generated by flatten-tool-->
+  <iati-organisation default-currency="USD" last-updated-datetime="2022-05-24T07:35:54.240000+00:00" xml:lang="en">
+    <organisation-identifier>XE-EXAMPLE-ORG</organisation-identifier>
+    <name>
+      <narrative>Example Org</narrative>
+    </name>
+    <reporting-org ref="XE-EXAMPLE-ORG" type="40">
+      <narrative>Example Org</narrative>
+    </reporting-org>
+    <total-budget status="2">
+      <period-start iso-date="2016-01-01"/>
+      <period-end iso-date="2016-12-31"/>
+      <value currency="USD" value-date="2016-01-01">100000</value>
+    </total-budget>
+    <total-budget status="2">
+      <period-start iso-date="2017-01-01"/>
+      <period-end iso-date="2017-12-31"/>
+      <value currency="USD" value-date="2017-01-01">200000</value>
+    </total-budget>
+    <total-budget status="1">
+      <period-start iso-date="2018-01-01"/>
+      <period-end iso-date="2018-12-31"/>
+      <value currency="USD" value-date="2018-01-01">300000</value>
+    </total-budget>
+    <total-budget status="1">
+      <period-start iso-date="2019-01-01"/>
+      <period-end iso-date="2019-12-31"/>
+      <value currency="USD" value-date="2019-01-01">400000</value>
+    </total-budget>
+    <total-budget status="1">
+      <period-start iso-date="2020-01-01"/>
+      <period-end iso-date="2020-12-31"/>
+      <value currency="USD" value-date="2020-01-01">300000</value>
+    </total-budget>
+    <total-budget status="1">
+      <period-start iso-date="2021-01-01"/>
+      <period-end iso-date="2021-12-31"/>
+      <value currency="USD" value-date="2021-01-01">200000</value>
+    </total-budget>
+    <total-expenditure>
+      <period-start iso-date="2015-01-01"/>
+      <period-end iso-date="2015-12-31"/>
+      <value currency="USD" value-date="2015-01-01">100000</value>
+    </total-expenditure>
+    <total-expenditure>
+      <period-start iso-date="2016-01-01"/>
+      <period-end iso-date="2016-12-31"/>
+      <value currency="USD" value-date="2016-01-01">100000</value>
+    </total-expenditure>
+    <total-expenditure>
+      <period-start iso-date="2017-01-01"/>
+      <period-end iso-date="2017-12-31"/>
+      <value currency="USD" value-date="2017-01-01">100000</value>
+    </total-expenditure>
+    <document-link format="application/pdf" url="http://example.org/document.pdf">
+      <title>
+        <narrative xml:lang="en">Example Org document</narrative>
+      </title>
+      <category code="B02"/>
+      <language code="en"/>
+      <document-date iso-date="2018-01-01"/>
+      <recipient-country code="SS"/>
+    </document-link>
+  </iati-organisation>
+</iati-organisations>
diff --git a/...ool/tests/fixtures/xlsx/IATI CoVE #organisation #broken-docs #template #public #demo.xlsx b/...ool/tests/fixtures/xlsx/IATI CoVE #organisation #broken-docs #template #public #demo.xlsx
diff --git a/flattentool/tests/test_unflatten.py b/flattentool/tests/test_unflatten.py
@@ -120,7 +120,14 @@ def test_unflatten_xml_comment(tmpdir, dirname):
 
 
 @pytest.mark.parametrize("input_format", ["xlsx", "ods"])
-def test_unflatten_org_xml_xlsx(tmpdir, input_format):
+def test_unflatten_org_xml_minimal(tmpdir, input_format):
+    schema_path = "examples/iati"
+    schemas = [
+        "iati-activities-schema.xsd",
+        "iati-organisations-schema.xsd",
+        "iati-common.xsd",
+    ]
+    schema_filepaths = ["{}/{}".format(schema_path, schema) for schema in schemas]
     unflatten(
         input_name="flattentool/tests/fixtures/{}/iati-org.{}".format(
             input_format, input_format
@@ -130,13 +137,40 @@ def test_unflatten_org_xml_xlsx(tmpdir, input_format):
         id_name="organisation-identifier",
         xml=True,
         metatab_name="Meta",
+        xml_schemas=schema_filepaths,
     )
     assert (
         open("flattentool/tests/fixtures/iati-org.xml").read()
         == tmpdir.join("output.xml").read()
     )
 
 
+@pytest.mark.parametrize("input_format", ["xlsx"])
+def test_unflatten_org_xml_with_documents(tmpdir, input_format):
+    schema_path = "examples/iati"
+    schemas = [
+        "iati-activities-schema.xsd",
+        "iati-organisations-schema.xsd",
+        "iati-common.xsd",
+    ]
+    schema_filepaths = ["{}/{}".format(schema_path, schema) for schema in schemas]
+    unflatten(
+        input_name="flattentool/tests/fixtures/{}/IATI CoVE #organisation #broken-docs #template #public #demo.{}".format(
+            input_format, input_format
+        ),
+        output_name=tmpdir.join("output.xml").strpath,
+        input_format=input_format,
+        id_name="organisation-identifier",
+        xml=True,
+        metatab_name="Meta",
+        xml_schemas=schema_filepaths,
+    )
+    assert (
+        open("flattentool/tests/fixtures/iati-org-with-documents.xml").read()
+        == tmpdir.join("output.xml").read()
+    )
+
+
 @pytest.mark.parametrize("input_format", ["xlsx", "ods"])
 def test_unflatten_empty_column_header(tmpdir, input_format):
     unflatten(

diff --git a/setup.py b/setup.py
@@ -43,7 +43,7 @@ def run(self):
 
 setup(
     name="flattentool",
-    version="0.17.1",
+    version="0.17.2",
     author="Open Data Services",
     author_email="code@opendataservices.coop",
     packages=["flattentool"],