Created
April 30, 2020 16:37
-
-
Save MikeyBurkman/76ab65246d6244ab123cf2d94d228fbd to your computer and use it in GitHub Desktop.
XPath for TypeScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { SelectedValue, useNamespaces } from 'xpath'; | |
import { DOMParser } from 'xmldom'; | |
import * as _ from 'lodash'; | |
import { VError } from 'verror'; | |
interface Parser { | |
parseXml: (xml: string) => Document; | |
/** | |
* Type guard that returns true iff the selected value is a Node. | |
*/ | |
isNode(selectedValue: SelectedValue | undefined): selectedValue is Node; | |
isNodeMaybe( | |
selectedValue: SelectedValue | undefined | |
): selectedValue is Node | undefined; | |
/** | |
* Type guard that returns true iff the selected value is a n Attr. | |
*/ | |
isAttrNode(selectedValue: SelectedValue | undefined): selectedValue is Attr; | |
isAttrNodeMaybe( | |
selectedValue: SelectedValue | undefined | |
): selectedValue is Attr | undefined; | |
/** | |
* Type guard that returns true iff the selected value is a Text. | |
*/ | |
isTextNode(selectedValue: SelectedValue | undefined): selectedValue is Text; | |
isTextNodeMaybe( | |
selectedValue: SelectedValue | undefined | |
): selectedValue is Text | undefined; | |
/** | |
* Selects the first Node to match the given selector. Will throw an error if it doesn't exist or | |
* the selector matches a non-Node | |
*/ | |
selectFirstNode(selector: string, node: string | Node): Node; | |
/** | |
* Selects the first Node to match the given selector. Will throw an error if the selector matches | |
* a non-Node | |
*/ | |
selectFirstNodeMaybe(selector: string, node: string | Node): Node | undefined; | |
/** | |
* Selects the first Text node to match the given selector. Will throw an error if it doesn't exist or | |
* the selector matches a non-Text node | |
*/ | |
selectFirstTextNode(selector: string, node: string | Node): Text; | |
/** | |
* Selects the first Text node to match the given selector. Will throw an error if | |
* the selector matches a non-Text node | |
*/ | |
selectFirstTextNodeMaybe( | |
selector: string, | |
node: string | Node | |
): Text | undefined; | |
/** | |
* Selects the first Attr node to match the given selector. Will throw an error if it doesn't exist or | |
* the selector matches a non-Attr node | |
*/ | |
selectFirstAttrNode(selector: string, node: string | Node): Attr; | |
/** | |
* Selects the first Text node to match the given selector. Will throw an error if | |
* the selector matches a non-Attr node | |
*/ | |
selectFirstAttrNodeMaybe( | |
selector: string, | |
node: string | Node | |
): Attr | undefined; | |
/** | |
* Selects all Nodes that match the given selector. Will throw an error if the selector matches any non-Nodes. | |
*/ | |
selectNodes(selector: string, node: string | Node): Node[]; | |
/** | |
* Selects all Text nodes that match the given selector. Will throw an error if the selector matches any non-Text nodes. | |
*/ | |
selectTextNodes(selector: string, node: string | Node): Text[]; | |
/** | |
* Selects all Attr nodes that match the given selector. Will throw an error if the selector matches any non-Attr nodes. | |
*/ | |
selectAttrNodes(selector: string, node: string | Node): Attr[]; | |
} | |
/** | |
* Builds an Xpath parser using the provided namespaces. | |
* For instance, if you are parsing SOAP messages, you will need to provide at least | |
* `{'xmlns:soap': 'http://schemas.xmlsoap.org/soap/envelope/'}` | |
*/ | |
export const buildParser = ( | |
namespaces: Record<string, string> = {} | |
): Parser => { | |
const select = useNamespaces(namespaces); | |
const parseXml = (xml: string) => new DOMParser().parseFromString(xml); | |
const isNode = ( | |
selectedValue: SelectedValue | undefined | |
): selectedValue is Node => _.hasIn(selectedValue, 'nodeType'); | |
const isNodeMaybe = ( | |
selectedValue: SelectedValue | undefined | |
): selectedValue is Node | undefined => | |
_.isNil(selectedValue) || isNode(selectedValue); | |
/** | |
* HOF for verifying that a value is a Node with the correct nodeType | |
*/ | |
function isNodeType<T extends Node>(nodeType: number) { | |
return function( | |
selectedValue: SelectedValue | undefined | |
): selectedValue is T { | |
return isNode(selectedValue) && selectedValue.nodeType === nodeType; | |
}; | |
} | |
/** | |
* HOF for verifying that a value is either null/undefined, OR a node with | |
* the correct nodeType | |
*/ | |
function isNodeTypeMaybe<T extends Node>(nodeType: number) { | |
return function( | |
selectedValue: SelectedValue | undefined | |
): selectedValue is T | undefined { | |
return ( | |
_.isNil(selectedValue) || | |
(isNode(selectedValue) && selectedValue.nodeType === nodeType) | |
); | |
}; | |
} | |
/** | |
* HOF for doing a selectFirst and then asserting that the returned value is a specific type. | |
*/ | |
function selectFirstX<T extends SelectedValue | undefined>( | |
isX: (sv: SelectedValue | undefined) => sv is T | |
) { | |
return (selector: string, node: string | Node): T => { | |
if (typeof node === 'string') { | |
node = parseXml(node); | |
} | |
const selected = select(selector, node, true); | |
if (isX(selected)) { | |
return selected; | |
} | |
throw new VError( | |
{ | |
info: { | |
selector, | |
resultFound: !!selected, | |
isRightType: isX(selected) | |
} | |
}, | |
'Unable to find first value for selectFirst; might be wrong type' | |
); | |
}; | |
} | |
/** | |
* HOF for doing a select and then asserting that every returned value is a specific type. | |
*/ | |
function selectX<T extends SelectedValue | undefined>( | |
isX: (sv: SelectedValue | undefined) => sv is T | |
) { | |
return (selector: string, node: string | Node): T[] => { | |
if (typeof node === 'string') { | |
node = parseXml(node); | |
} | |
return select(selector, node).map((selected, idx) => { | |
if (isX(selected)) { | |
return selected; | |
} | |
throw new VError( | |
{ | |
info: { | |
selector, | |
idx, | |
resultFound: !!selected, | |
isRightType: isX(selected) | |
} | |
}, | |
'Unable to find a value for select; might be wrong type' | |
); | |
}); | |
}; | |
} | |
const isAttrNode = isNodeType<Attr>(2); // Attr nodes have nodeType = 2 | |
const isAttrNodeMaybe = isNodeTypeMaybe<Attr>(2); | |
const isTextNode = isNodeType<Text>(3); // Text nodes have nodeType = 3 | |
const isTextNodeMaybe = isNodeTypeMaybe<Text>(3); | |
const parser: Parser = { | |
parseXml, | |
isNode, | |
isNodeMaybe, | |
isAttrNode, | |
isAttrNodeMaybe, | |
isTextNode, | |
isTextNodeMaybe, | |
selectFirstNode: selectFirstX(isNode), | |
selectFirstNodeMaybe: selectFirstX(isNodeMaybe), | |
selectFirstAttrNode: selectFirstX(isAttrNode), | |
selectFirstAttrNodeMaybe: selectFirstX(isAttrNodeMaybe), | |
selectFirstTextNode: selectFirstX(isTextNode), | |
selectFirstTextNodeMaybe: selectFirstX(isTextNodeMaybe), | |
selectNodes: selectX(isNode), | |
selectAttrNodes: selectX(isAttrNode), | |
selectTextNodes: selectX(isTextNode) | |
}; | |
return parser; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { assert } from 'chai'; | |
import { buildParser } from './index'; | |
describe(__filename, () => { | |
it('Should do basic parsing with namespaces', () => { | |
const xml = ` | |
<book xmlns:bookml="http://example.com/book"> | |
<title id="55">Harry Potter</title> | |
<characters> | |
<character>Harry</character> | |
<character>Ron</character> | |
</characters> | |
</book>`; | |
const parser = buildParser({ bookml: 'http://example.com/book' }); | |
// The "selectFirst" functions that don't end in "maybe" will always match something. | |
// If they do not match, they will throw a VError with some extra details. | |
assert.equal( | |
parser.selectFirstTextNode('//book/title/text()', xml).data, | |
'Harry Potter' | |
); | |
// You can pre-parse the xml string first, and pass that document object around. | |
// This is recommended if you are going to be selecting multiple things from the xml. | |
const doc = parser.parseXml(xml); | |
assert.equal( | |
parser.selectFirstTextNode('//book/title/text()', doc).data, | |
'Harry Potter' | |
); | |
assert.equal( | |
parser.selectFirstAttrNode('//book/title/@id', doc).value, | |
'55' | |
); | |
// There are also functions to return an array of nodes. | |
// All nodes in the array must match the expected type, or a VError is thrown. | |
assert.deepEqual( | |
parser | |
.selectTextNodes('//book/characters/character/text()', doc) | |
.map((node) => node.data), | |
['Harry', 'Ron'] | |
); | |
// Can also pull out inner elements and then use them, instead of making very long selector strings. | |
const characters = parser.selectFirstNode('//book/characters', doc); | |
assert.deepEqual( | |
parser | |
.selectTextNodes('character/text()', characters) | |
.map((node) => node.data), | |
['Harry', 'Ron'] | |
); | |
// The "maybe" functions are allowed to return undefined if the selector doesn't match anything. | |
assert.isUndefined( | |
parser.selectFirstTextNodeMaybe('//book/idontexist', doc) | |
); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment