Last active
February 22, 2022 19:34
-
-
Save bwbroersma/152c88f5919a72aaad85efaf84498719 to your computer and use it in GitHub Desktop.
How to convert EMLNL candidate lists to CSV and JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# c2j can be found here https://github.com/bwbroersma/csv2jsonl/ or be replace with the slower csvjson from csvkit, use --stream! (https://csvkit.readthedocs.io/en/latest/scripts/csvjson.html) | |
# add -C for the generated xslt (added below) | |
TS="$(date -u -Iminute|sed 's/+00:00//g;s/:/./')" | |
CSV="GR2022_alle-kandidaten_$TS.csv"; | |
JSON="GR2022_alle-kandidaten_$TS.json"; | |
(echo -e "\xEF\xBB\xBFGemeenteCode;GemeenteNaam;LijstNummer;LijstNaam;DocumentTaal;PublicatieGeslacht;KandidaatNummer;Initialen;Roepnaam;Tussenvoegsel;Achternaam;Woonplaats;Land;Geslacht"; | |
xmlstarlet sel \ | |
-N eml="urn:oasis:names:tc:evs:schema:eml" \ | |
-N kr="http://www.kiesraad.nl/extensions" \ | |
-N xNL="urn:oasis:names:tc:ciq:xsdschema:xNL:2.0" \ | |
-N xAL="urn:oasis:names:tc:ciq:xsdschema:xAL:2.0" \ | |
-T -t \ | |
--var regionCode='string(/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/@Id)' \ | |
--var regionName='/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/text()'\ | |
-m '/eml:EML/eml:CandidateList/eml:Election/eml:Contest/eml:Affiliation' \ | |
--var listId='number(eml:AffiliationIdentifier/@Id)' \ | |
--var listName \ | |
--if 'eml:AffiliationIdentifier/eml:RegisteredName/text()' \ | |
--if "contains(eml:AffiliationIdentifier/eml:RegisteredName/text(),'\"')" \ | |
-c "concat('\"',str:replace(eml:AffiliationIdentifier/eml:RegisteredName/text(),'\"','\"\"'),'\"')" \ | |
--else \ | |
-c 'eml:AffiliationIdentifier/eml:RegisteredName/text()' \ | |
-b \ | |
--else \ | |
--var candidatePersonName='./eml:Candidate[eml:CandidateIdentifier/@Id=1]/eml:CandidateFullName/xNL:PersonName' \ | |
--var candidateLastName \ | |
--if '$candidatePersonName/xNL:NamePrefix' \ | |
-c 'concat($candidatePersonName/xNL:NamePrefix/text()," ",$candidatePersonName/xNL:LastName/text())' \ | |
--else \ | |
-c '$candidatePersonName/xNL:LastName/text()' \ | |
-b \ | |
-b \ | |
-c 'concat("Blanco (",$candidateLastName,", ",$candidatePersonName/xNL:NameLine[@NameType="Initials"]/text(),")")' \ | |
-b \ | |
-b \ | |
--var publicationLanguage='kr:ListData/@PublicationLanguage' \ | |
--var publishGender \ | |
--if 'kr:ListData[@PublishGender="true"]' \ | |
-o 'ja' \ | |
--else \ | |
-o 'nee' \ | |
-b \ | |
-b \ | |
--var prefix='concat($regionCode,";",$regionName,";",$listId,";",$listName,";",$publicationLanguage,";",$publishGender,";")' \ | |
-m 'eml:Candidate' \ | |
--var candidatePersonName='eml:CandidateFullName/xNL:PersonName' \ | |
-c 'concat($prefix,string(eml:CandidateIdentifier/@Id),";",$candidatePersonName/xNL:NameLine[@NameType="Initials"]/text(),";",$candidatePersonName/xNL:FirstName/text(),";",$candidatePersonName/xNL:NamePrefix/text(),";",$candidatePersonName/xNL:LastName/text(),";",eml:QualifyingAddress//xAL:Locality/xAL:LocalityName/text(),";",substring(concat(.//xal:CountryNameCode,"NL"),1,2),";",translate(substring(eml:Gender,1,1),"fu","v?"))' \ | |
-n \ | |
-b \ | |
-b \ | |
*/K*.eml.xml) | sed -z 's/\n/\r\n/g' > "$CSV"; | |
cat "$CSV" | c2j -d ';' | jq -sc 'group_by(.GemeenteCode,.GemeenteNaam)|map({GemeenteCode:(.[0].GemeenteCode|tostring),GemeenteNaam:.[0].GemeenteNaam,Lijsten:(map(del(.GemeenteCode,.GemeenteNaam))|group_by(.LijstNummer,.LijstNaam,.DocumentTaal,.PublicatieGeslacht)|map({LijstNummer:.[0].LijstNummer,LijstNaam:.[0].LijstNaam,DocumentTaal:.[0].DocumentTaal,PublicatieGeslacht:(.[0].PublicatieGeslacht=="ja"),Kandidaten:map(del(.LijstNummer,.LijstNaam,.DocumentTaal,.PublicatieGeslacht))}))})' > "$JSON"; | |
exit; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:eml="urn:oasis:names:tc:evs:schema:eml" xmlns:kr="http://www.kiesraad.nl/extensions" xmlns:xNL="urn:oasis:names:tc:ciq:xsdschema:xNL:2.0" xmlns:xAL="urn:oasis:names:tc:ciq:xsdschema:xAL:2.0" xmlns:str="http://exslt.org/strings" xmlns:xalanredirect="org.apache.xalan.xslt.extensions.Redirect" version="1.0" extension-element-prefixes="str xalanredirect"> | |
<xsl:output omit-xml-declaration="yes" indent="no" method="text"/> | |
<xsl:template match="/"> | |
<xsl:variable select="string(/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/@Id)" name="regionCode"/> | |
<xsl:variable select="/eml:EML/eml:CandidateList/eml:Election/eml:ElectionIdentifier/kr:ElectionDomain/text()" name="regionName"/> | |
<xsl:for-each select="/eml:EML/eml:CandidateList/eml:Election/eml:Contest/eml:Affiliation"> | |
<xsl:variable select="number(eml:AffiliationIdentifier/@Id)" name="listId"/> | |
<xsl:variable name="listName"> | |
<xsl:choose> | |
<xsl:when test="eml:AffiliationIdentifier/eml:RegisteredName/text()"> | |
<xsl:choose> | |
<xsl:when test="contains(eml:AffiliationIdentifier/eml:RegisteredName/text(),'"')"> | |
<xsl:copy-of select="concat('"',str:replace(eml:AffiliationIdentifier/eml:RegisteredName/text(),'"','""'),'"')"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:copy-of select="eml:AffiliationIdentifier/eml:RegisteredName/text()"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:variable select="./eml:Candidate[eml:CandidateIdentifier/@Id=1]/eml:CandidateFullName/xNL:PersonName" name="candidatePersonName"/> | |
<xsl:variable name="candidateLastName"> | |
<xsl:choose> | |
<xsl:when test="$candidatePersonName/xNL:NamePrefix"> | |
<xsl:copy-of select="concat($candidatePersonName/xNL:NamePrefix/text()," ",$candidatePersonName/xNL:LastName/text())"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:copy-of select="$candidatePersonName/xNL:LastName/text()"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:variable> | |
<xsl:copy-of select="concat("Blanco (",$candidateLastName,", ",$candidatePersonName/xNL:NameLine[@NameType="Initials"]/text(),")")"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:variable> | |
<xsl:variable select="kr:ListData/@PublicationLanguage" name="publicationLanguage"/> | |
<xsl:variable name="publishGender"> | |
<xsl:choose> | |
<xsl:when test="kr:ListData[@PublishGender="true"]"> | |
<xsl:text>ja</xsl:text> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:text>nee</xsl:text> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:variable> | |
<xsl:variable select="concat($regionCode,";",$regionName,";",$listId,";",$listName,";",$publicationLanguage,";",$publishGender,";")" name="prefix"/> | |
<xsl:for-each select="eml:Candidate"> | |
<xsl:variable select="eml:CandidateFullName/xNL:PersonName" name="candidatePersonName"/> | |
<xsl:copy-of select="concat($prefix,string(eml:CandidateIdentifier/@Id),";",$candidatePersonName/xNL:NameLine[@NameType="Initials"]/text(),";",$candidatePersonName/xNL:FirstName/text(),";",$candidatePersonName/xNL:NamePrefix/text(),";",$candidatePersonName/xNL:LastName/text(),";",eml:QualifyingAddress//xAL:Locality/xAL:LocalityName/text(),";",substring(concat(.//xal:CountryNameCode,"NL"),1,2),";",translate(substring(eml:Gender,1,1),"fu","v?"))"/> | |
<xsl:value-of select="' '"/> | |
</xsl:for-each> | |
</xsl:for-each> | |
</xsl:template> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment