From 7c190e4121752f310dcdb4f76b479c23a87823b0 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Wed, 15 Jun 2022 17:02:35 +0100 Subject: [PATCH 1/2] sort_xml: Handle extensions in the schema xsd correctly when sorting https://github.com/OpenDataServices/cove/issues/1366 --- CHANGELOG.md | 4 + examples/iati/iati-organisations-schema.xsd | 781 ++++++++++++++++++ flatten-tool | 1 + flattentool/sort_xml.py | 24 +- .../fixtures/iati-org-with-documents.xml | 67 ++ ... #broken-docs #template #public #demo.xlsx | Bin 0 -> 15119 bytes flattentool/tests/test_unflatten.py | 36 +- 7 files changed, 906 insertions(+), 7 deletions(-) create mode 100644 examples/iati/iati-organisations-schema.xsd create mode 100644 flattentool/tests/fixtures/iati-org-with-documents.xml create mode 100644 flattentool/tests/fixtures/xlsx/IATI CoVE #organisation #broken-docs #template #public #demo.xlsx diff --git a/CHANGELOG.md b/CHANGELOG.md index 59f0e542..f44eec3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Fixed + +- Handle extensions in the schema xsd correctly when sorting https://github.com/OpenDataServices/cove/issues/1366 + ## [0.17.1] - 2021-07-21 ### Fixed diff --git a/examples/iati/iati-organisations-schema.xsd b/examples/iati/iati-organisations-schema.xsd new file mode 100644 index 00000000..90e216a8 --- /dev/null +++ b/examples/iati/iati-organisations-schema.xsd @@ -0,0 +1,781 @@ + + + + + + + International Aid Transparency Initiative: Organisation-Information Schema + + Release 2.03, 2018-02-19 + + NOTE: the xml.xsd and iati-common.xsd schemas must be in the + same directory as this one. + + This W3C XML Schema defines an XML document type for information + about an aid organisation, following the standard published at + http://iatistandard.org + + This document type may be extended with additional elements and + attributes, but they must belong to an explicit XML namespace. + + + + + + + + + + + Top-level list of one or more IATI organisation records. + + + + + + + + + + + A number indicating the IATI specification version in use. + This is mandatory and must be a valid version. + + + + + + + A date/time stamp for when this file was generated. This + is not necessarily the last-updated date for the + individual activity records in it. Use of this attribute + is highly recommended, to allow recipients to know when a + file has been updated. + + + + + + + + + + + Top-level element for a single IATI organisation report. + + + + + + + + Machine-readable identification string for the organisation issuing the report. Must be in the format {RegistrationAgency}-{RegistrationNumber} where {RegistrationAgency} is a valid code in the Organisation Registration Agency code list and {RegistrationNumber} is a valid identifier issued by the {RegistrationAgency}. + + + + + + + + + + + + + + + + + The last date/time that the data for this specific + organisation was updated. This date must change whenever + the value of any field changes. + + + + + + + A code specifying the default language of text in this organisation. It is recommended that wherever possible only codes from ISO 639-1 are used. If this is not declared then the xml:lang attribute MUST be specified for each narrative element. + + + + + + + Default ISO 4217 currency code for all financial values in + this activity report. If this is not declared then the + currency attribute MUST be specified for all monetary + values. + + + + + + + + + + + The human-readable name of the organisation. + + + + + + + + + + + + + + + The total-budget element allows for the reporting of the organisation's + own budget. The recommendation is that, where and when possible, the + organisation's total annual planned budget for each of the next three + years is reported. The status explains whether the budget being reported + is indicative or has been formally committed. The value should appear + within the BudgetStatus codelist. If the @status attribute is not present, + the budget is assumed to be indicative. + + + + + + + + The start of the budget period. + + + + + + + + + + + + + + The end of the period (which must not be greater than one year) + + + + + + + + + + + + + + The total value of the organisation's aid budget for + this period. + + + + + + + A breakdown of the total budget into sub-totals. The + breakdown is determined by the reporting organisation + and described in the narrative. The period covered is + the same as that covered by the parent total-budget + element. The sum of budget-line values does not have to + equal the value given in the parent element. + + + + + + + + The budget sub-total. The definition of the + sub-division is determined by + iati-organisation/total-budget/budget-line/narrative + + + + + + + + + + An internal reference for this budget line taken + from the reporting organisation's own system. + Optional. + + + + + + + + + + + + The status explains whether the budget being reported is indicative or has + been formally committed. The value should appear within the BudgetStatus + codelist. If the @status attribute is not present, the budget is assumed + to be indicative. + + + + + + + + + + + The recipient-org-budget element allows for the reporting of + forward looking budgets for each institution which receives + core funding from the reporting organisation. The + recommendation is that, where and when possible, annual + planned budgets for each recipient institution for each of the + next three financial years are reported. This is primarily + applicable to donors but any provider of core funding is + expected to use it. Earmarked budgets should be reported at + activity-level through the Activity Standard. The status + explains whether the budget being reported is indicative or + has been formally committed. The value should appear within + the BudgetStatus codelist. If the @status attribute is not + present, the budget is assumed to be indicative. + + + + + + + + The organisation that will receive the funds. + + + + + + + + The name of the organisation. This can be repeated in multiple languages. + + + + + + + + + Machine-readable identification string for the organisation issuing the report. Must be in the format {RegistrationAgency}-{RegistrationNumber} where {RegistrationAgency} is a valid code in the Organisation Registration Agency code list and {RegistrationNumber} is a valid identifier issued by the {RegistrationAgency}. If this is not present then the narrative MUST contain the name of the organisation. + + + + + + + + + The start of the budget period. + + + + + + + + + + + + + + The end of the period (which must not be greater than one year) + + + + + + + + + + + + + + The total value of the money budgeted to be disbursed to + the specified recipient organisation during this time + period. + + + + + + + A breakdown of the recipient organisation's budget into + sub-totals. The breakdown is determined by the + reporting organisation and described in the narrative. + The period covered is the same as that covered by the + parent recipient-org-budget element. The sum of + budget-line values does not have to equal the value + given in the parent element. + + + + + + + + The budget sub-total. The definition of the + sub-division is determined by + iati-organisation/recipient-org-budget/budget-line/narrative + + + + + + + + + + An internal reference for this budget line taken + from the reporting organisation's own system. + Optional. + + + + + + + + + + + + The status explains whether the budget being reported is indicative or has + been formally committed. The value should appear within the BudgetStatus + codelist. If the @status attribute is not present, the budget is assumed + to be indicative. + + + + + + + + + + + The recipient-region-budget element allows for the reporting of forward + looking budgets where the organisation maintains region-wide, rather than + or in addition to country-specific budgets. The recommendation is that, + where and when possible, the organisation’s total annual planned budget + for each of the next three financial years is reported for each recipient + region. This must NOT include an aggregation of budgets reported in the + recipient-country-budget element. It is strongly recommended that + publishers report to existing defined regions wherever possible. The + status explains whether the budget being reported is indicative or has + been formally committed. The value should appear within the BudgetStatus + codelist. If the @status attribute is not present, the budget is assumed + to be indicative. + + + + + + + + The supranational geographic region where funds have been allocated. + + + + + + + + + + + An IATI code for the vocabulary from which the region code is + drawn. If it is not present, code 1 ('OECD DAC') is assumed. + + + + + + + The URI where this vocabulary is defined. If the vocabulary is 99 (reporting organisation), the URI where this internal vocabulary is defined. While this is an optional field it is STRONGLY RECOMMENDED that all publishers use it to ensure that the meaning of their codes are fully understood by data users. + + + + + + + Either an OECD DAC, UN region code or (if code ‘99’ Reporting + organisation is selected for recipient-region/\@vocabulary) a + code from your internal vocabulary. The codelist is determined + by vocabulary attribute. The value in recipient-region/\@code + should appear within the Region codelist, if the vocabulary + code 1 ('OECD DAC') is used. + + + + + + + + + + The start of the budget period. + + + + + + + + + + + + + + The end of the period (which must not be greater than one year) + + + + + + + + + + + + + + The total value of the money budgeted to be disbursed to + the specified region during this time period. This + element is required. + + + + + + + A breakdown of the recipient region’s budget into sub-totals. The + breakdown is determined by the reporting organisation and described + in the narrative. The period covered is the same as that covered by + the parent recipient-region-budget element. The sum of budget-line + values does not have to equal the value given in the parent element. + + + + + + + + The budget sub-total. The definition of the + sub-division is determined by + iati-organisation/recipient-region-budget/budget-line/narrative + + + + + + + + + + An internal reference for this budget line taken + from the reporting organisation's own system. + Optional. + + + + + + + + + + + + The status explains whether the budget being reported is indicative or has + been formally committed. The value should appear within the BudgetStatus + codelist. If the @status attribute is not present, the budget is assumed + to be indicative. + + + + + + + + + + + The recipient-country-budget element allows for the reporting of + forward looking budgets for each country in which the organisation + operates. The recommendation is that, where and when possible, the + organisation's total annual planned budget for each of the next + three financial years is reported for each recipient country. + It is strongly recommended that the start and end of the reported + financial years match those of the recipient country's + budgetary/planning cycle. The status explains whether the budget + being reported is indicative or has been formally committed. The + value should appear within the BudgetStatus codelist. If the + @status attribute is not present, the budget is assumed to be + indicative. + + + + + + + + The recipient country. + + + + + + + + + + + ISO 3166-1 alpha-2 code for the country. + + + + + + + + + + The start of the budget period. + + + + + + + + + + + + + + The end of the period (which must not be greater than one year) + + + + + + + + + + + + + + The total value of the money budgeted to be disbursed to + the specified country during this time period. This + element is required. + + + + + + + A breakdown of the recipient country's budget into + sub-totals. The breakdown is determined by the + reporting organisation and described in the narrative. + The period covered is the same as that covered by the + parent recipient-country-budget element. The sum of + budget-line values does not have to equal the value + given in the parent element. + + + + + + + + The budget sub-total. The definition of the + sub-division is determined by + iati-organisation/recipient-country-budget/budget-line/narrative + + + + + + + + + + An internal reference for this budget line taken + from the reporting organisation's own system. + Optional. + + + + + + + + + + + + The status explains whether the budget being reported is indicative or has + been formally committed. The value should appear within the BudgetStatus + codelist. If the @status attribute is not present, the budget is assumed + to be indicative. + + + + + + + + + + + The total-expenditure element allows for the reporting of the + organisation’s international development expenditure. The + recommendation is that, where and when possible, the organisation’s + total expenditure for each of the past three years is reported. + The expense line allows publishers to record further breakdown. + + + + + + + + The start of the budget period. + + + + + + + + + + + + + + The end of the period (which must not be greater than one year) + + + + + + + + + + + + + + The total value of the organisation's aid expenditure for + this period. + + + + + + + A breakdown of the total expenditure into sub-totals. + The breakdown is determined by the reporting + organisation and described in the narrative. The period + covered is the same as that covered by the parent + total-expenditure element. The sum of expenditure-line + values does not have to equal the value given in the + parent element. + + + + + + + + The expenditure sub-total. The definition of the + sub-division is determined by + iati-organisation/total-expenditure/expenditure-line/narrative + + + + + + + + + + An internal reference for this expenditure line taken + from the reporting organisation’s own system. Optional. + + + + + + + + + + + + + + + + + + + A link to an online, publicly accessible web page or document. + + + + + + + The recipient country that is the focus of the document. + May be repeated for multiple countries. + + + + + + + + + + + ISO 3166-1 alpha-2 code for the country. + + + + + + + + + + + + + diff --git a/flatten-tool b/flatten-tool index 328a7b7d..df6fda33 100755 --- a/flatten-tool +++ b/flatten-tool @@ -1,3 +1,4 @@ #!/usr/bin/env python import flattentool.cli + flattentool.cli.main() diff --git a/flattentool/sort_xml.py b/flattentool/sort_xml.py index ee8d3257..cfe13165 100644 --- a/flattentool/sort_xml.py +++ b/flattentool/sort_xml.py @@ -74,6 +74,23 @@ def get_schema_element(self, tag_name, name_attribute): return schema_element return schema_element + def handle_complexType(self, complexType): + type_elements = [] + if complexType is not None: + extension = complexType.find( + "xsd:complexContent/xsd:extension", namespaces=namespaces + ) + if extension: + base = extension.attrib.get("base") + complexType = self.get_schema_element("complexType", base) + type_elements = self.handle_complexType(complexType) + else: + type_elements = [] + type_elements += complexType.findall( + "xsd:choice/xsd:element", namespaces=namespaces + ) + complexType.findall("xsd:sequence/xsd:element", namespaces=namespaces) + return type_elements + def element_loop(self, element, path): """ Return information about the children of the supplied element. @@ -82,12 +99,7 @@ def element_loop(self, element, path): type_elements = [] if "type" in a: complexType = self.get_schema_element("complexType", a["type"]) - if complexType is not None: - type_elements = complexType.findall( - "xsd:choice/xsd:element", namespaces=namespaces - ) + complexType.findall( - "xsd:sequence/xsd:element", namespaces=namespaces - ) + type_elements += self.handle_complexType(complexType) children = ( element.findall( diff --git a/flattentool/tests/fixtures/iati-org-with-documents.xml b/flattentool/tests/fixtures/iati-org-with-documents.xml new file mode 100644 index 00000000..43ff3513 --- /dev/null +++ b/flattentool/tests/fixtures/iati-org-with-documents.xml @@ -0,0 +1,67 @@ + + + + + XE-EXAMPLE-ORG + + Example Org + + + Example Org + + + + + 100000 + + + + + 200000 + + + + + 300000 + + + + + 400000 + + + + + 300000 + + + + + 200000 + + + + + 100000 + + + + + 100000 + + + + + 100000 + + + + <narrative xml:lang="en">Example Org document</narrative> + + + + + + + + diff --git a/flattentool/tests/fixtures/xlsx/IATI CoVE #organisation #broken-docs #template #public #demo.xlsx b/flattentool/tests/fixtures/xlsx/IATI CoVE #organisation #broken-docs #template #public #demo.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0ad84094988768cc3809640a5a0b07ef505c6a45 GIT binary patch literal 15119 zcmeHu1yo+kvL^2C?(P;OSb`HAg1fuBOK=bF9wfLE2<{FcxCeK48zA?+L-Nj?xpU^e zw`SJ7=3hViueJBLtE+ccSAEr6S_~Kj1pop90>Fo>?mfVZhVgjpWJa%NqwQ#HVQBkr z2aI%1=4PoQ>K1JbFyM#CyKwRg#wr-d{3seDk_>Q5q!!p8Bl_U7=R8K=u^PJq;+gmJ zUL6ct*wtJL=pefhZ&x6Jx}a7asT0E;SXRbTu=`AEedrcai|n+xslt3!Jp46}XwOw6 zuL99Mv|Z($0{81BCYa5!D%e$m+^E+DkVB#l3PXk4g2!(ll3Y9L(#d{ow*n^tubkXV zO#0Tmg1UH{igV3Q=!>Rv4SHd)R8#<7BEv0x3)to9!yF70z#2zYw?jEC!gt37$$pP@ zHs2%L)Hkl#Gmp{@h%41j^M&7~C!m~6qbXWn$Z_Ah)(9BayT7N7{c>f#oT`_#nu6M_ zTw|elppvLoQ(6oh0$j)V11KN>05~uJfb{>aDVhG;l>aJIGXJ+J|5c`B`EOJHt4#Uo zzfJjfP3dT9V`^)puWx5d|NQUgvNJzxf!z%VwnVgkCR-GjBfK{}sN2p=6VXqr3g7LHRVDB3oXx6<0D2 zt;LWcdVex{UNrv0-EJWd$ISjZ8~RD%_}~w_<9;2^tLt5Oa!bWz#o73f&V8fBmceto zW1Cu4-bBgT4SMv+lf5I8#=2?zFKXr`8x41}gCScEjmiV`aIqz&xz??&J1~!vz z(P35P^U&=gPRJuYYC`_InZO(zd{jQ3rfo0QEJ;~Q2^#?Rcv zZV*$X&<}>t0|3w9!R*}Wi0jiVH5xTMt1WFB@jHk63)lAJ+w1U>oNZq_H-2%pS?p4T zZ?7vp(OZ0oW}8$zfNqAK?z-#k#2Gtk-CXoLeyBz<7(?JOp&I6GMB^#Cy5bj`oEYFI zy3qVs8OpY}tTEqowg1D8 zDVgiNi`w}#;mAgE_Q~Yz{Py*1=W%EM{C#@+>ZXo0ho^h>je-5y*s;!q&4V&m$=33T zXYm3L3SD==#IT~`Z2Ry{*TQ5njyk>N{(V|A=k4A3z6eft`P}5~-s__#$=hnl+s4;h zO_DBZ-?0%Pr5lYXW%i-gUr>ak;zExI670 zI99J@BVDy44fD{-;Jp*SFub%%`Y3r~&3~3X7V6l}o952amv_YA-%xm4*Ds)+D>ani z{{2Wj=qs;-^ZD?B=k-{+RBn8L>Bre3oIL3r%XFK2vxl(&8_<4vn#nS?ZX4i!zeKd# zTgQ8u)?rUmJk(pai3lhYinnr*SYjh;K48c!iTS+(5@aS!GSNXv;@|bM$dF(C6d5y# zMvEqif76R1i)Km8=_QdMdlK~~iBITNkVU^rEb7&hATwoB{wWG3lVeHD>P3;rJ@mX! z5+Bw3hv29v!{ zHDywd-bxam)O$T_!15@9kuWe~;*Z8m8W_>@Cu3$w%Yn z56w$vPO}_xYU5bJ+z8D}VIJCqX7^&YL9jx!Lb5`(La{=%LeoNrfC2W_Ct!dDsqt=M zM^FL>I(ego4*>(>?MJ|X2*T>qB7&d<3B>$XiyQ(5)cX?w11d<3Pm2<%wSiZUR;Y463ZU%3)EhC~_K;iId;JuxA%0F_o~AS>nR=@HFD~EWhk_ zJJE5jdx!62k zJkdNl<6g>3<4`&&ymRGJex-f2*m<8qE?hWhV_liN2)A!np<(Wjd}ZUPx?D1B7@(b! zT0}?J#O-Ev-hFj;O}~iaaetfCg2w0U@Zl{t5ln~PuLR-V|bzVMZO zhD%u^MUr(37($7*rbr0@xC=Qvic&^n_m^%bt+0#V0&1kz9B)Jd&f)+2pXSrM9(!VYhb8SMbc0 z*naFB)S0WG3>AG_y>LiF^VW^PXvivIm6asgnGh0H{3)Z1-%L6=%?^U5Ctf~i@piR3kS6`G0hLco>ojzUh z8v#7{qD00B6GLKkKhxMAWr!xA9#k59hWuoM;`|+n%Eum3J#vNrOP=tB4W4t~pFE*r ze^fRr%ALj={eB3@OB-ipU;YQ5<`>9^FG zmg9)JWY!5CDeiCbAw->}A}6>kVhiaO0qO6a`PflNZxM5+XG4CZ~T zCz*vaT5EgNK`WM~6Q9a-F0s08Mn`=m9-_Z<^u06xl~~jLaa8*LYO=*(KWy+9Y4i`L zZ1eY?lJ&XU*FAlFWoRe5nK`z78`E~U#;48eXcLb;&k*JNkYdj~?E7H?b=DeM2Z~T$PG+Blsg%*YtAF zCA?sq>{sCp{(C4dYY1W#MBcDfmu!wc~XW_9+i4-KMV=bW{C^ z@Py5&=&&$}{DIP=3I$9lFABkj=W>uDI&a=vpCUOmxfFb&UHPQV&fTTc-%_fJJ&5htW zfcbtrNnXB?v8v-CfFjF2!dX}Y5D7>RHWGpW+pEJ}4`Cq zh*>lGLL%w{;4F!tbg;dl9&}H>sq)q4x$*2p!%iO<68__)&sc(UNHN6f2AOS1D zL_i{8-~0fDC_Nv7BOQb~7p5%1+Os}!6o6Nj5#?;VTnW61Hm{oxLD!XnXrEIpv{VrG zVBIGp|7v(Pa<=U=l8bafNxbGeyAtV2_X#OB!K1fHS4NeO*@F=2A|qA}sgI&hiPmT$ zR3Nm4+!za*;1Y9f&9d~z-LxXUlZ5d!tbV^@OotV&DmqPA9jxS_NvdRT4B&5upq|x??+x()1Paxa6!k>f&C8LCOEs-pA ziwq_knud@}1VhIToX`i$sH>%zI67+R8dDd`ogU{i^*Ko}%`D`D`I~Ac+J&moSK z+bCn}Be&vrD=>_d(v2*w1FW!08%B!X@x@LqN(bfiuyT60)fKX^=mzOU1~$fPJ3*JF zY`n@CjowJat!EmC+x8P(0V-f7W)c@|#d5dFPmDxdwi5}%RH_?(e%D{V*Ydd{i}l8r zBQ47N{Dvq1vQG-ND47SOoypY5-G(B3HBLdjy6$riBazYKfX(}Ik{N>9rB?y!nIcs0 z*W|F|G4Hu>(~VX?);Q7kWnFE%93`B5<63qZVoGxgn_$knNKY?3CTjw+RI!9LFzb}P zp+0<|hnzwKco!4DMUy+(XOs3`B4nAfExgBMRPer|P%yf&R3@m*n|C38-Jmi&I$lCg zr4aP>D}9;v^eKY8V2l1qYMN1f<8Q=+V3wWTna1Gn&5t_v2h)Kx0uL9A;4eA(()R-J zIeSS=*yd$2E(WA>-i|v?EdnKU!R=SdO^;M~hN#0#UsX0#980O; z3W`7)o}~Ah(%z^%FhvAG!190R#Hv@8s4@{k+OgSMTS7ZkU)m? zfg7|dvP)45#yH0HlQ?onlC(EtOt#sOg9tKwyA=maEpyO?LyUdXIa2q2=}=6gT4jGm z!!Cz~QDKz8N8ggtcd}&G*L!8N`0|y@g1!|{1A9i;LEhb%P7#<+D==L4-rikr;_BXL zYHkO?q-cXJfPiJ2lYsFUF7X;Hg}``qwJ4(Jiy{gB0KMio6a?F60QLm>Fv^Pb!HXFJ zj~Sv16J#6c%r--WNRb!ZyQc-)06|=fVMa&?!3Jzu7i^jRgS2{v2b^9IxDKowmLxm) z@=tt3Lj-OEgfQm7@1Xuzz=A)(Ro-DqT)HAi{Jhu}%=%%tV+T*Pp7a8_do8+}RkTX9 zkL)5PEO$B>xd5*0V3@u}Auy~%=$c|wc*~q5=9|2#G07wnUZIx;^k#LHOjg9VS&^bo z1vTIDOZ6e)JvdX;Hyt&AhoP^_B0P8B{>SLP9NxaX5JH6PZ{pKkMq3&tyqv8!(Ja{6yS5C zt&AmixXTbmjc|$yA(=t*`{b>25q0Y!@0NhX0wIyEG3MU}X1F9VSnc%WenpsBAx0G=xem_)8CqZ)iF)VpOrX;S&M82lv|%W@D5kT3pD{*bN-R@;?MOB!*J}7 zR9zqU)UGc~Lw9`MQqFLJun!C*ayUTruD~izfXXODx#+uwo2upyiONg_#NOS|x+ct` zt(4Je37S+@@FG;=Px3}lXCCYZBGmf68B{p`cru0r5QmTGKR`(SK7`Yik8$-!6dnMl z3<$Jn8|tAt8yH4h^*j~L77A>|Vm!w?QID?t^`vQsF#hITm-+PDU-_{`;{QWIbzEq znvAwv=o>26ObaD{2ix&Yah9Lh^i$al?W}O)$_!RhteN`u`A`Kf{3|5{%SiC?s#H)( zk*&omY&YQ^SKA}CQxSDDvRa?b#B@yZOLP-eryBVTZZ1W&ggaHVTMCmVWbpd?cO_Uj z9J1WxsL_k>cB2hd>xyQHzxJ-k&j(=%S_Q07hnI4A=Ou^90SQ7jQS3XK^L0Wt7Y(K= z+r=YOO6G9f4;<}Z1fi1O{}{lX2CHh1HjUzgn}j4)40&h5tn0aanHBxzU6vy7HKnw; z3ZgvO=lC-&z<41A!Za1%MZe1s9s_0{$T=P|`GmTVqw9pdgz&{~J>7&ix?Uos!b>{| zO(f2haJ?a68BojzT71ME1~9t5BB%zVofw2Qy05(v?O>3@!C~!2-bB?m-i`E3^W7%e ztk?fgH0od>c>9%su)t?vR=Iw#yTc4MccUx8#bHu{UN4{De=z_i45ZS#Aq+4P^aDrE zvTx+$$8r9YPw2JYse4b4I08K1IiWWjOr#Jp-{WV&eheCOC(^p1oB!%CI&tRR(srML zw5WXNSnM7^v^}-C?TE7w8j40|a++ZLr^}G+VZZM{*J~Dc@K63iIV*m44F~{$0P1i1 zi{}aRr~X3V{}F#NF4yzvZ~Kd%<>tpcv5@uAUp(cB4U7lzx;GP{jQ)aDC3ADnX(DhS z(y{LG*<%T$McYCq(T{eon3RVq_bsuv<|qBzlAEsftqG!P8Tk{(e&sd=6!LDZ9G}s0IzS&*1&qQmJaY^` zJ*J646n)Hsit?(xGAcb`ZBav`1bK2;y6GL0k7*)eCOdy z<$~hJ3PFwt)R&05qmi6O6cHPCF~rv%w3>B#y$$TS%6&WnnyCZkw3^1{X0J5rS+|o& z(>2bY7AT?Ev zZOGzbLlAS%kaa7@=;cpIqYw3z(W7i7w;RN;Tu*1wMi1YDp4~j;?QJAhh0er_Exbd$y0t9@k_FFj&g>#ec1mtkz94(j;1}IM_qp-J>wGeczDaYMet8)(f=Vdpn^MtO-9tT^6LQx@`SuyG@&j{Bw_^+bH%a zcP8xS){E)}4Lp;7bc^TA66DShs(M`lOaqbt^gQK)Sv*W-x`P$EfShP$zzFyRoL(RR z)4(JkB6{xn-D&#WU=C(l-~>Ex8~`8zD!@e`B75%p!kPQSfp^(k;NG}-?E)YH=)p%K z5PUp|#dVCsMW3?v#(jAMq6=!y1sYDgvK2s1fFr;PU=e^w?l*j10(fQFr$gfL5lCY+ zv7F3{ax&U5Qsc5G*^Oe}vR8{Z&)%phkrIIdORk^U%lmwM>bNrjNON)=P9 zp0E+6$fnS@f6Hu0Ff&yo66FgtbsKzqTwCnqu1=Z$oIgT6O+ihHY z8z!+w)N(|fzXgBtmibQowJVR&)zIHh9-rOCv$rfBZIuKjd?eM#4{>|u{LGQV81-EX z$he&t&j8678pNQ00l)9fhIA<;pDwkoFtoFIG8LZTe09oJQ`3Wmp7thS9es`W>gz6E z>A{x!3#yfTq+Z`AA9J z;W=^Yj)ljTz}L)jX2t$rtDzAZ__La)tT9qDC0I(m>ok-ybIJ4%oED`Ah{ut|nEh9` z`pXx6#2wh-cm!vexCUaGvcc^+c8_`6XF?D5pm=3IaW$nTu~n3(8-JhXzJ6X^zLQU( zFFxj(lYdFTq5LEvZH=^T^!4QIY@RkgKJDR&eGI~88DQ>w?mjGWH`v@r!w5#Xn10KA zQ;+%u#(kWo3z14JO~7$*QOe<4(Z^L5_aNOg{*PQ)l&~e5RHA9EN}1yf9K8KVK%Ym{ z@@*1m&}+BieUoa0HYF6q3p-~<8z0|mq}6HUl|2j`oDFEMS>xY* zvn6h-Ru%=}Ph^jnfHZM8-gmtk$AOB5k5R&3Kiq`2%gmLszgtg&g+fRk*PMs|56D>JUhw^omK?%M65 z?o-?^WFYEIIPhqsgWcRuh)r-+qoA0xrE98|A6Yhfqr|%x)6f$;YI%_s}PS3B1 zfF1tmqC=m;xYYV0GmLD)bIK{}a(Kt)@7!2aQ@c~hZ034pD601qwtE}&U2feK0o9`ir<5>A48}$$iE7q9;x`JjlH&Z z&Sv`0PxrX8`XhsaatC^YpYT<+%Hut5^@_;!bbtf?_R#LoDe0RLapX4Nsqy=3i8r9a zBhoU8ZeFWo8!2{u$Pa;9yW{>&se?yBBgDilIYAT3SB%f*L_6 zPyj__TYeBJf^bPCCV>e;5a4G(C)!OmSRF$28^wi|A)w`1L4i_UWoCxr@X^S5Eu*tA zxq}h~%4NGsv{LIi}F2q`|dFJkIrXnT~}9_tYw zh`39MTPf-wt{`=X5-O+1z??u{$sJZ~KsVbZO*kue+pjGKi&`P!GBLHkUiQf!YI?U< z&Q)ja@sNs}qXuh&WH?%tpH9^+dUT0lzxU1<86LK*Ko)mW2-7SuQn8x+?%Xkn&Yjpe z+ooV{4rOy)Mrih$QKGg2pgppvi%A?P7%3sA&p=f4$Y2ZU7{+oZq(e+ouGI*DO^Dyz z*QWBmApghEXv)fxd!HH2kBYfDPzU)Y>jZ>QpcVRnWfWT7O00ozlTowzd={AYVNBcm zh0?Wo>)-YhnZtSY8>3p}vv^kA_MK=v5=Y}wVl>dcznu$Tlwbjqo%wQ6FkxW4@h%h@ zIB$T@Y$5!+0#&uWv`G_PCD1B;ZVl4TCwJKUT)Jj|`jE2E;bWCyN4GpBB+F@4?s)L5 zd!z-Aq#F7lW4Dk(&U$@6bF|Ovyz7Q-I|Y{qY9#665V3v}O%70$qi%9)MKxE!`Vwr! znM!#&U|hWA?oZfyJ_=kdVbi&8h}zCEB3HZSh`IvZH>; zd)E&PlZNAo$S0{M$L8ixnLJ9ctsz*1fc}*&0!swtdpLe#;+BbNKI9XpoJX6_^)oYV zxHrj`xQ%|b}&C28>C8;_5mas&w0LwX$0}<`xrRlL>Ae_X2tYf7GOX4rkoQ80FS2#6@hCYh|*+ z)&0hpdlwqtCxyFG!Z6~U1;^3={)#tr4`2-L&k%$cCpr&i~ z0e!h@#LYxS#sVxgW4}#!6VyX!1Yu@+@)%^^E$iY;VPB7^5@+<-Xc@(ryNp3P72sPQ zc!Ngee*Ex?LaPlRVB6rq_e0Jl+2#T$8oxL~M$laNNd`+elioyT5L@wCc8=0@IA?)m z1;57nRosm;*n7jo-6@E}?py!`DY%Dd^Vu53ORe#pK!3W>CGT9qYh_yZ0FtGKAsx8G z{+0g1@X2qwC*nMGjwK1TatY(F7!M}WeS~s&dmjA>Rs@lsGJPneT*6>mf;?2EI!2|W z_C8vdnFqt|Za_ZIGl%A1-=B5x>189z<;4+7vVoDTy6=*VN8*x+2&N;@>O zjFHR?7B8cQpYTndaberuo@^L&eDu|cM{ZHPvkv^JHUagsl~A`@u>dE}zj6aW%c#(j zS5Rnm0@znm(zEv_Sn%=IwIJzy@Zu42T;jNF%ZAq4+djlkuD6hOBC<~(?ZQXZ3?dP;WPnm4yC>mY2<)QCYIR?53hRs$$ zWae_0;Q$651AO2%E^>+Po?pNtH~*L)ERgg5bam}w;9>fBD!X@P(=5n4rVMi(@xauokJ2VYwDI2`i>%XfAd{%qJ}|&_bQli9VUKHdrb z+Fjg%90QT?H+QOZDPcys^b;IYPN#vSGT8SrghY|5lz`Lo3;IGxX%O`ZV`msAaIUs}H%gQ)T9ixID_S*J5I0XXd;rnqx2X zY)=|_EqP{+(4VGmb-deqna9z{2Y;HnfIuh!f4nXA^5vhWx21k<|Fd_e{&^I!$NT;X z%5%~0pGS#%3<&=O<(XUm=TVj)7oa~ud1l=Hc@!GZKSg=YM1MzlxozZE>pFrxqx`;2 z;`tSU-%(y}ydeH%fGi)Ay~iHCK>5=RC%*%}tlIvnmM!Y8KgV~lYg{Q z`hV}uzlD~+U;J{7eu`uMXx5C+7ys=4e~4;+$9cH`KCS3~v{I&D*7x5peCem3*2h0u d67!#~meOLNk8T720QK>2@X;;Nyn6ceKLEZV%@zOv literal 0 HcmV?d00001 diff --git a/flattentool/tests/test_unflatten.py b/flattentool/tests/test_unflatten.py index 0b42518d..15cd395b 100644 --- a/flattentool/tests/test_unflatten.py +++ b/flattentool/tests/test_unflatten.py @@ -120,7 +120,14 @@ def test_unflatten_xml_comment(tmpdir, dirname): @pytest.mark.parametrize("input_format", ["xlsx", "ods"]) -def test_unflatten_org_xml_xlsx(tmpdir, input_format): +def test_unflatten_org_xml_minimal(tmpdir, input_format): + schema_path = "examples/iati" + schemas = [ + "iati-activities-schema.xsd", + "iati-organisations-schema.xsd", + "iati-common.xsd", + ] + schema_filepaths = ["{}/{}".format(schema_path, schema) for schema in schemas] unflatten( input_name="flattentool/tests/fixtures/{}/iati-org.{}".format( input_format, input_format @@ -130,6 +137,7 @@ def test_unflatten_org_xml_xlsx(tmpdir, input_format): id_name="organisation-identifier", xml=True, metatab_name="Meta", + xml_schemas=schema_filepaths, ) assert ( open("flattentool/tests/fixtures/iati-org.xml").read() @@ -137,6 +145,32 @@ def test_unflatten_org_xml_xlsx(tmpdir, input_format): ) +@pytest.mark.parametrize("input_format", ["xlsx"]) +def test_unflatten_org_xml_with_documents(tmpdir, input_format): + schema_path = "examples/iati" + schemas = [ + "iati-activities-schema.xsd", + "iati-organisations-schema.xsd", + "iati-common.xsd", + ] + schema_filepaths = ["{}/{}".format(schema_path, schema) for schema in schemas] + unflatten( + input_name="flattentool/tests/fixtures/{}/IATI CoVE #organisation #broken-docs #template #public #demo.{}".format( + input_format, input_format + ), + output_name=tmpdir.join("output.xml").strpath, + input_format=input_format, + id_name="organisation-identifier", + xml=True, + metatab_name="Meta", + xml_schemas=schema_filepaths, + ) + assert ( + open("flattentool/tests/fixtures/iati-org-with-documents.xml").read() + == tmpdir.join("output.xml").read() + ) + + @pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_unflatten_empty_column_header(tmpdir, input_format): unflatten( From 1adfb33ed4f4937178011106bd76befc08059d03 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Wed, 15 Jun 2022 17:06:37 +0100 Subject: [PATCH 2/2] release: Make 0.17.2 release --- CHANGELOG.md | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f44eec3b..50012bb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [0.17.2] - 2022-06-15 + ### Fixed - Handle extensions in the schema xsd correctly when sorting https://github.com/OpenDataServices/cove/issues/1366 diff --git a/setup.py b/setup.py index dd027ae2..70cd70d2 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def run(self): setup( name="flattentool", - version="0.17.1", + version="0.17.2", author="Open Data Services", author_email="code@opendataservices.coop", packages=["flattentool"],