Created
July 4, 2020 22:41
-
-
Save rohieb/10f58325f5fb6bc02ffa2e7ed6260274 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- SPDX-License-Identifier: 0BSD | |
-- Copyright (C) 2020 Roland Hieber | |
-- Convert headers of level >= 3 to LaTeX \paragraph{}s (usable with koma-script classes) | |
-- | |
if FORMAT ~= "latex" and | |
FORMAT ~= "beamer" and | |
FORMAT ~= "json" and | |
FORMAT ~= "native" | |
then | |
error("unsupported format: " .. FORMAT) | |
end | |
local List = require 'pandoc.List' | |
-- The identifier in the native representation can still contain Unicode, and | |
-- pandoc's LaTeX writer takes additional care to escape unicode characters | |
-- when creating the label in the header and when referencing it in links (see | |
-- the function `toLabel` in pandoc's Text/Pandoc/Writers/LaTeX.hs). | |
-- Sadly this function is not exposed to Lua, so we have to make our own | |
-- compatible version. | |
-- | |
-- Input: (string) identifier from native AST (`attr.identifier`) | |
-- Output: (string) label, compatible with the LaTeX writer | |
function to_label(str) | |
local function do_error(msg, n, c) | |
error(string.format("%s at input byte %d (0x%x)", msg, n, c)) | |
end | |
local function uc_escape(val) | |
if (val >= 0x30 and val <= 0x39) or -- isDigit x && isAscii x | |
(val >= 0x41 and val <= 0x5A) or | |
(val >= 0x61 and val <= 0x7a) or -- isLetter x && isAscii x | |
val == 0x5F or val == 0x2D or | |
val == 0x2B or val == 0x3D or | |
val == 0x3A or val == 0x3B or | |
val == 0x2E -- x `elemText` "_-+=:;." | |
then | |
return string.char(val) | |
else | |
return string.format("ux%x", val) | |
end | |
end | |
-- no Unicode support in Lua... :( decode codepoint from UTF-8 manually | |
local res = "" | |
local seq = 0 | |
local val = nil | |
local c = nil | |
local n = 1 | |
while n <= #str do | |
c = string.byte(str, n) | |
if seq == 0 then | |
-- leader byte: 0b0xxx xxxx, 0b110x xxxx, 0b1110 xxxx, 0b1111 0xxx | |
if c >= 0x80 and c < 0xC0 then | |
do_error("invalid UTF-8 sequence: expecting leader", n-1, c) | |
return nil | |
end | |
seq = (c < 0x80 and 1) or (c < 0xE0 and 2) or | |
(c < 0xF0 and 3) or (c < 0xF8 and 4) | |
val = c & (2^(8-seq) - 1) | |
elseif seq > 0 and c >= 0x80 and c < 0xC0 then | |
-- continuation byte: 0b10xx xxxx | |
val = (val << 6) | (c & 0x3F) | |
else | |
do_error("invalid UTF-8 continuation byte", n-1, c) | |
return nil | |
end | |
seq = seq - 1 | |
if seq == 0 then | |
res = res .. uc_escape(val) | |
end | |
n = n+1 | |
end | |
if seq > 0 then | |
do_error("invalid UTF-8 sequence: leader without continuation bytes", n-1, c) | |
return nil | |
end | |
return res | |
end | |
-- Temporary pandoc.List of the elements making up the previous header | |
saved_header = nil | |
-- | |
-- Handle Headers | |
-- | |
function Header(elem) | |
if elem.level < 3 then | |
return elem | |
else | |
-- Fallback: if more than one Header occurs directly after another, put the | |
-- elements of the previous one in an extra Para | |
local result = {} | |
if saved_header then | |
result = pandoc.Para(saved_header) | |
end | |
-- maybe we need to merge Header with the next Para, save it | |
local label = to_label(elem.identifier) | |
saved_header = List:new { | |
pandoc.RawInline("tex", "\\hypertarget{" .. label .. "}{%\n") | |
, pandoc.RawInline("tex", "\\paragraph{") | |
, pandoc.Span(elem.content) | |
, pandoc.RawInline("tex", "}\\label{" .. label .. "}}\n") | |
} | |
return result | |
end | |
end | |
-- If Para directly after Header, merge Para with header to get an in-line | |
-- paragraph header in LaTeX, i.e.: | |
-- | |
-- \paragraph{Title} Lorem ipsum dolor sit amet... | |
-- | |
-- instead of: | |
-- | |
-- \paragraph{Title} | |
-- | |
-- Lorem ipsum dolor sit amet... | |
-- | |
function Para(elem) | |
if saved_header then | |
saved_header:extend(elem.content) | |
local p = pandoc.Para(saved_header) | |
saved_header = nil | |
return p | |
else | |
return elem | |
end | |
end | |
-- Fallback: In-line LaTeX paragraphs look bad when followed by e.g. a list, so | |
-- just output the saved header as its own block, followed by this Para. | |
function Block(elem) | |
if saved_header then | |
local p = pandoc.Para(saved_header) | |
saved_header = nil | |
return { p, elem } | |
else | |
return elem | |
end | |
end | |
-- Fallback for one lonely header at the end with nothing after it | |
function Pandoc(doc) | |
if saved_header then | |
table.insert(doc.blocks, pandoc.Para(saved_header)) | |
end | |
return doc | |
end | |
return { | |
{ Header = Header }, | |
{ Para = Para }, | |
{ Block = Block }, | |
{ Pandoc = Pandoc }, | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment