Last active
February 7, 2021 18:39
-
-
Save unarist/bf2d9d9960d900107cb27e821242be0d to your computer and use it in GitHub Desktop.
DenoでEBMLパース
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { DOMParser, Element } from "https://deno.land/x/deno_dom@v0.1.3-alpha2/deno-dom-wasm.ts"; | |
// https://github.com/ietf-wg-cellar/ebml-specification/blob/master/specification.markdown#ebml-header-elements | |
let elementDefs = [ | |
{ path: "\\EBML", id: 0x1A45DFA3, type: "master" }, | |
{ path: "\\EBML\\EBMLVersion", id: 0x4286, type: "uinteger" }, | |
{ path: "\\EBML\\EBMLReadVersion", id: 0x42F7, type: "uinteger" }, | |
{ path: "\\EBML\\EBMLMaxIDLength", id: 0x42F2, type: "uinteger" }, | |
{ path: "\\EBML\\EBMLMaxSizeLength", id: 0x42F3, type: "uinteger" }, | |
{ path: "\\EBML\\DocType", id: 0x4282, type: "string" }, | |
{ path: "\\EBML\\DocTypeVersion", id: 0x4287, type: "uinteger" }, | |
{ path: "\\EBML\\DocTypeReadVersion", id: 0x4285, type: "uinteger" }, | |
{ path: "\\EBML\\DocTypeExtension", id: 0x4281, type: "master" }, | |
{ path: "\\EBML\\DocTypeExtensionName", id: 0x4283, type: "string" }, | |
{ path: "\\EBML\\DocTypeExtensionVersion", id: 0x4284, type: "uinteger" }, | |
{ path: "\\(1-\\)CRC-32", id: 0xBF, type: "binary" }, | |
{ path: "\\(-\\)Void", id: 0xEC, type: "binary" } | |
]; | |
async function fetchElementDefs(schemaUrl: string) { | |
const resp = await fetch(schemaUrl); | |
if (!resp.ok) throw new Error(`Fetch failed: ${resp.status}`); | |
const bodyText = await resp.text(); | |
const doc = new DOMParser().parseFromString(bodyText, "text/html")!; | |
return [...doc.querySelectorAll("element")] | |
.map(x => (x as Element).attributes) | |
.map(x => ({ path: x.path, id: Number(x.id), type: x.type })); | |
} | |
elementDefs = elementDefs.concat(await fetchElementDefs("https://github.com/ietf-wg-cellar/matroska-specification/raw/master/ebml_matroska.xml")); | |
// memo: 4byteあればTimecodeScale 1ms で1ヶ月、offset/sizeで2GBはいける | |
// MAX_SAFE_INTEGERは6バイト強 | |
// MatroskaのEBMLSchema上はIDが4バイト、サイズが8バイトまで | |
// ...と思ったらMAX8バイトを安定して使ってくるのか…… | |
class DataViewEx extends DataView { | |
textDecoder = new TextDecoder("utf-8") | |
constructor(buffer: ArrayBuffer) { | |
super(buffer); | |
} | |
// int、実際はほとんど使わなさそう… | |
getInt(offset: number, size: number) { | |
switch (size) { | |
case 1: return this.getInt8(offset); | |
case 2: return this.getInt16(offset); | |
case 3: return (this.getInt16(offset) << 8) + this.getUint8(offset + 2); | |
case 4: return this.getInt32(offset); | |
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`); | |
} | |
} | |
getUint(offset: number, size: number) { | |
switch (size) { | |
case 1: return this.getUint8(offset); | |
case 2: return this.getUint16(offset); | |
case 3: return (this.getUint16(offset) << 8) + this.getUint8(offset + 2); | |
case 4: return this.getUint32(offset); | |
case 5: return (this.getUint32(offset) * 0x100) + this.getUint8(offset + 4); | |
case 6: return (this.getUint32(offset) * 0x10000) + this.getUint16(offset + 4); | |
case 7: return (this.getUint32(offset) * 0x1000000) + (this.getUint32(offset + 3) & 0xFFFFFF); | |
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`); | |
} | |
} | |
setUint(offset: number, size: number, value: number) { | |
switch (size) { | |
case 1: return this.setUint8(offset, value); | |
case 2: return this.setUint16(offset, value); | |
case 3: return this.setUint16(offset, value >>> 8), this.setUint8(offset + 2, value); | |
case 4: return this.setUint32(offset, value); | |
case 5: return this.setUint32(offset, value / 0x100), this.setUint8(offset + 4, value); | |
case 6: return this.setUint32(offset, value / 0x10000), this.setUint16(offset + 4, value); | |
case 7: return this.setUint32(offset, value / 0x1000000), this.setUint32(offset + 3, value); | |
case 8: | |
if (value > Number.MAX_SAFE_INTEGER) throw new Error(`${value} > Number.MAX_SAFE_INTEGER at offset 0x${offset.toString(16)}`); | |
else return this.setBigUint64(offset, BigInt(value)); | |
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`); | |
} | |
} | |
getFloat(offset: number, size: number) { | |
switch (size) { | |
case 4: return this.getFloat32(offset); | |
case 8: return this.getFloat64(offset); | |
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`); | |
} | |
} | |
getVINT(offset: number, raw: boolean = false) { | |
const leading = this.getUint8(offset); | |
const mask = (value: number, mask: number) => { | |
if (raw) return value; | |
const masked = value & mask; | |
return (masked == mask) ? Number.POSITIVE_INFINITY : masked; | |
}; | |
if (leading & 0x80) return [1, mask(leading, 0x7F)]; | |
if (leading & 0x40) return [2, mask(this.getUint(offset, 2), 0x3FFF)]; | |
if (leading & 0x20) return [3, mask(this.getUint(offset, 3), 0x1FFFFF)]; | |
if (leading & 0x10) return [4, mask(this.getUint(offset, 4), 0x0FFFFFFF)]; | |
if (leading & 0x08) return [5, mask(this.getUint(offset, 5), 0x07FFFFFFFF)]; | |
if (leading & 0x04) return [6, mask(this.getUint(offset, 6), 0x03FFFFFFFFFF)]; | |
if (leading & 0x02) return [7, mask(this.getUint(offset, 7), 0x01FFFFFFFFFFFF)]; | |
// 8 octet の仮対応 | |
const bigint = this.getBigUint64(offset); | |
if (bigint === 0x01FFFFFFFFFFFFFFn) return [8, Number.POSITIVE_INFINITY]; | |
const result_bigint = raw ? bigint : bigint & 0x00FFFFFFFFFFFFFFn; | |
if (result_bigint > Number.MAX_SAFE_INTEGER) | |
throw new Error(`Unsupported VINT (${bigint.toString(16)}>MAX_SAFE_INTEGER) at offset 0x${offset.toString(16)}`); | |
return [8, Number(result_bigint)]; | |
} | |
setVINT(offset: number, value: number, width: number) { | |
this.setUint(offset, width, value); | |
const vint_marker = 1 << (8 - width); | |
this.setInt8(offset, this.getInt8(offset) & vint_marker - 1 | vint_marker); | |
} | |
getString(offset: number, size: number) { | |
return this.textDecoder.decode(this.buffer.slice(offset, offset + size)); | |
} | |
} | |
function* createVisitor(arr: ArrayBuffer) { | |
const view = new DataViewEx(arr); | |
const readValue = (type: string, pos: number, size: number) => { | |
switch (type) { | |
case "uinteger": return view.getUint(pos, size); | |
case "integer": return view.getInt(pos, size); | |
case "string": return view.getString(pos, size).replace(/\x00.+/, ""); | |
case "utf-8": return view.getString(pos, size).replace(/\x00.+/, ""); | |
case "float": return view.getFloat(pos, size); | |
default: return undefined; | |
} | |
}; | |
const readSimpleBlockStructure = (pos: number) => { | |
const [track_len, track] = view.getVINT(pos); | |
const relativeTs = view.getInt16(pos + track_len); | |
const flags = view.getUint8(pos + track_len + 2); | |
return { trackNo: track, relativeTs, keyFrame: !!(flags & 0x80) }; | |
} | |
let position = 0; | |
while (position < view.byteLength) { | |
const [id_len, id] = view.getVINT(position, true); | |
const [size_len, size] = view.getVINT(position + id_len); | |
const def = elementDefs.find(x => x.id === id); | |
const value = !def ? undefined | |
: def.id === 0xA3 ? readSimpleBlockStructure(position + id_len + size_len) | |
: readValue(def.type, position + id_len + size_len, size); | |
let stepOver = false; | |
yield { | |
position, | |
id, | |
id_len, | |
size, | |
size_len, | |
value, | |
def, | |
makeVoid(): void { | |
view.setUint8(this.position, 0xEC /*Void*/); | |
const len_with_size = this.id_len + this.size_len + this.size - 1; | |
if (len_with_size <= 127) view.setVINT(this.position + 1, len_with_size - 1, 1); | |
else view.setVINT(this.position + 1, len_with_size - 7, 7); | |
}, | |
makeUnknownSize(): void { | |
view.setVINT(this.position + this.id_len, -1, this.size_len); | |
}, | |
stepOver(): void { | |
stepOver = true; | |
} | |
}; | |
position = position + id_len + size_len; | |
if (stepOver || def?.type !== "master") position += size; | |
} | |
} | |
const arr = await Deno.readFile(Deno.args[0]); | |
const visitor = createVisitor(arr.buffer); | |
for (const entry of visitor) { | |
if (entry.def) | |
console.log(`${entry.def?.path} ${entry.def?.type}(${entry.size} bytes) ${entry.value != null ? "= " + JSON.stringify(entry.value) : ""}`); | |
else | |
console.log(`unknown element 0x${entry.id.toString(16)}`); | |
switch(((entry.def?.path ?? "").match(/\w+$/) ?? [])[0]) { | |
case "SeekHead": | |
case "Duration": | |
case "Cues": | |
// entry.makeVoid(); | |
// entry.stepOver(); | |
break; | |
case "Segment": | |
// entry.makeUnknownSize(); | |
break; | |
case "Cluster": | |
case "Cues": | |
// entry.stepOver(); | |
break; | |
} | |
} | |
if (Deno.args[1]){ | |
await Deno.writeFile(Deno.args[1], arr); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment