Created
August 4, 2023 17:12
-
-
Save vincentml/87eb0f254d2b4af1fcd94d6277f3dbe3 to your computer and use it in GitHub Desktop.
XML Formatter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:xs="http://www.w3.org/2001/XMLSchema" | |
xmlns:xd="http://www.oxygenxml.com/ns/doc/xsl" | |
xmlns:this="formatter" | |
exclude-result-prefixes="xs xd this" | |
expand-text="true" | |
version="3.0"> | |
<xd:doc scope="stylesheet"> | |
<xd:desc> | |
<xd:p><xd:b>Created on:</xd:b> Aug 3, 2023</xd:p> | |
<xd:p><xd:b>Author:</xd:b> Vincent Lizzi</xd:p> | |
<xd:p><xd:b>Input:</xd:b> XML</xd:p> | |
<xd:p><xd:b>Output:</xd:b> Indented XML as text or html for display</xd:p> | |
</xd:desc> | |
</xd:doc> | |
<xd:doc> | |
<xd:desc>'text' or 'html'</xd:desc> | |
</xd:doc> | |
<xsl:param name="output" as="xs:string" select="'html'" static="true"/> | |
<xd:doc> | |
<xd:desc>space is a single space character. using a different character such as _ can be useful for testing to see which spaces are inserted and which spaces are from the source document.</xd:desc> | |
</xd:doc> | |
<xsl:param name="space" as="xs:string" select="' '"/> | |
<xd:doc> | |
<xd:desc>break is the character to use for line ending</xd:desc> | |
</xd:doc> | |
<xsl:param name="break" as="xs:string" select="'
'"/> | |
<xd:doc> | |
<xd:desc>how many attributes can be output in a single line vs output on multiple lines</xd:desc> | |
</xd:doc> | |
<xsl:param name="attinlinenum" as="xs:integer" select="2"/> | |
<xd:doc> | |
<xd:desc>how long the value of any attribute may to be output on a single line vs output on multiple lines</xd:desc> | |
</xd:doc> | |
<xsl:param name="attinlinelen" as="xs:integer" select="6"/> | |
<xsl:mode use-accumulators="#all" on-no-match="shallow-copy"/> | |
<xsl:output method="text" use-when="$output eq 'text'"/> | |
<xsl:output method="html" use-when="$output eq 'html'"/> | |
<xd:doc> | |
<xd:desc>produce HTML wrapper if HTML output is requested</xd:desc> | |
</xd:doc> | |
<xsl:template match="/" expand-text="no" use-when="$output eq 'html'"> | |
<html> | |
<head> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/styles/default.min.css"/> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/highlight.min.js"></script> | |
<script>hljs.highlightAll();</script> | |
<style type="text/css"> | |
/* Browser specific (not valid) styles to make preformatted text wrap */ | |
pre, code.language-xml { | |
white-space: pre-wrap; /* css-3 */ | |
white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ | |
white-space: -pre-wrap; /* Opera 4-6 */ | |
white-space: -o-pre-wrap; /* Opera 7 */ | |
word-wrap: break-word; /* Internet Explorer 5.5+ */ | |
} | |
</style> | |
</head> | |
<body> | |
<p>Viewing: <xsl:value-of select="replace(base-uri(/), '.*?/?([^/]+)$', '$1')"/></p> | |
<pre> | |
<code class="language-xml"> | |
<xsl:apply-templates select="node()"/> | |
</code> | |
</pre> | |
</body> | |
</html> | |
</xsl:template> | |
<xsl:key name="text-elements" match="*[text()[normalize-space()]]" use="name()"/> | |
<xsl:accumulator name="mixed-content" initial-value="false()" as="xs:boolean"> | |
<xsl:accumulator-rule match="text()" select=" | |
string-length(normalize-space()) gt 0 | |
or exists(key('text-elements', name(..))) | |
"/> | |
</xsl:accumulator> | |
<xsl:accumulator name="mixed-parent" initial-value="false()" as="xs:boolean"> | |
<xsl:accumulator-rule match="*" select="exists(../text()[normalize-space()])"/> | |
</xsl:accumulator> | |
<xd:doc> | |
<xd:desc>depth indicates how far to indent based on a count of ancestor elements that should not increment in mixed-content</xd:desc> | |
</xd:doc> | |
<xsl:accumulator name="depth" initial-value="0" as="xs:integer"> | |
<xsl:accumulator-rule match="*" | |
select="count(ancestor::*[accumulator-before('mixed-content') = false()])"/> | |
</xsl:accumulator> | |
<xd:doc> | |
<xd:desc>element nodes should be output using context aware formatting</xd:desc> | |
</xd:doc> | |
<xsl:template match="*"> | |
<xsl:variable name="depth" as="xs:integer" select="accumulator-before('depth')"/> | |
<xsl:variable name="mixed-content" as="xs:boolean" select="accumulator-before('mixed-content')"/> | |
<xsl:variable name="mixed-parent" as="xs:boolean" select="accumulator-before('mixed-parent')"/> | |
<xsl:variable name="indent" as="xs:string" select="if ($mixed-parent or $mixed-content) then '' else | |
$break || string-join(for $i in 1 to $depth * 2 return $space, '') | |
"/> | |
<xsl:variable name="attcount" select="count(@*)"/> | |
<xsl:variable name="open" as="xs:string" select="$indent || '<' || name()"/> | |
<xsl:variable name="tail" as="xs:string" select=" | |
if (exists(node())) | |
then if ($attcount gt 1 and accumulator-before('mixed-content')) | |
then $indent || ' >' | |
else '>' | |
else '/>'"/> | |
<xsl:variable name="close" as="xs:string" select=" | |
if ($tail eq '/>') then '' else | |
(if (exists(text()[normalize-space()])) then () else $indent) | |
|| '</' || name() || '>'"/> | |
<xsl:variable name="attlist" as="xs:string*"> | |
<xsl:variable name="attlen" as="xs:boolean" select="every $a in @* satisfies string-length($a) le $attinlinelen"/> | |
<xsl:for-each select="@*"> | |
<xsl:sort select="name()"/> | |
<xsl:sequence select=" | |
(if ($mixed-parent or $attcount le $attinlinenum or $attlen) then $space else $indent || $space) | |
|| name() || '="' || this:escape(string()) || '"'"/> | |
</xsl:for-each> | |
<xsl:variable name="here" select="."/> | |
<xsl:for-each select="in-scope-prefixes(.)"> | |
<xsl:if test="not(. eq 'xml') and | |
not(. = $here/ancestor::*/in-scope-prefixes(.))"> | |
<xsl:variable name="xmlns" select="if (. eq '') then 'xmlns' else 'xmlns:'"/> | |
<xsl:sequence select="$indent || $space || $xmlns || . || '="' || namespace-uri-for-prefix(., $here) || '"'"/> | |
</xsl:if> | |
</xsl:for-each> | |
</xsl:variable> | |
<xsl:variable name="debug">{$indent} d="{$depth}" mpb="{accumulator-before('mixed-parent')}" mpa="{accumulator-after('mixed-parent')}" mcb="{accumulator-before('mixed-content')}" mca="{accumulator-after('mixed-content')}"</xsl:variable> | |
<xsl:value-of select="$open || $attlist || $tail"/> | |
<xsl:apply-templates select="node()"/> | |
<xsl:value-of select="$close"/> | |
</xsl:template> | |
<xd:doc> | |
<xd:desc>text node should be output if it contains text or is in mixed-content or discarded if insignificant space between elements not in mixed-content</xd:desc> | |
</xd:doc> | |
<xsl:template match="text()"> | |
<xsl:if test="accumulator-after('mixed-content') or string-length(normalize-space()) ge 1"> | |
<xsl:value-of select="this:escape(.)"/> | |
</xsl:if> | |
</xsl:template> | |
<xd:doc> | |
<xd:desc>comment nodes</xd:desc> | |
</xd:doc> | |
<xsl:template match="comment()"> | |
<xsl:text><--{this:escape-amp(.)}--></xsl:text> | |
</xsl:template> | |
<xd:doc> | |
<xd:desc>processing instruction nodes</xd:desc> | |
</xd:doc> | |
<xsl:template match="processing-instruction()"> | |
<xsl:text><?{name()} {.}?></xsl:text> | |
</xsl:template> | |
<xd:doc> | |
<xd:desc>escape ampersand and less-than characters</xd:desc> | |
<xd:param name="text">any string</xd:param> | |
</xd:doc> | |
<xsl:function name="this:escape" as="xs:string"> | |
<xsl:param name="text" as="xs:string"/> | |
<xsl:value-of select="$text => replace('&', '&amp;') => replace('<', '&lt;')"/> | |
</xsl:function> | |
<xd:doc> | |
<xd:desc>escape ampersand characters</xd:desc> | |
<xd:param name="text">any string</xd:param> | |
</xd:doc> | |
<xsl:function name="this:escape-amp" as="xs:string"> | |
<xsl:param name="text" as="xs:string"/> | |
<xsl:value-of select="$text => replace('&', '&amp;')"/> | |
</xsl:function> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment