Skip to content

Instantly share code, notes, and snippets.

@ejfox
Created September 21, 2024 19:45
Show Gist options
  • Save ejfox/333fa5fa676160b2e5757160a62f555c to your computer and use it in GitHub Desktop.
Save ejfox/333fa5fa676160b2e5757160a62f555c to your computer and use it in GitHub Desktop.
Take markdown, run it through remark/rehype, and end up with most of a blog
import { promises as fs } from 'fs'
import path from 'path'
import { fileURLToPath } from 'url'
import { unified } from 'unified'
import remarkParse from 'remark-parse'
import remarkRehype from 'remark-rehype'
import rehypeStringify from 'rehype-stringify'
import rehypeSlug from 'rehype-slug'
import rehypeAutolinkHeadings from 'rehype-autolink-headings'
import remarkObsidian from 'remark-obsidian'
import remarkMermaid from 'remark-mermaidjs'
import remarkGfm from 'remark-gfm'
import remarkUnwrapImages from 'remark-unwrap-images'
import rehypeMermaid from 'rehype-mermaid'
import { visit } from 'unist-util-visit'
import matter from 'gray-matter'
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
const contentDir = path.join(__dirname, '..', 'content', 'blog')
const outputDir = path.join(__dirname, '..', 'dist', 'processed')
const CLOUDINARY_BASE_URL = 'https://res.cloudinary.com/ejf/image/upload/'
async function processMarkdown(filePath) {
const fileContent = await fs.readFile(filePath, 'utf8')
const { data: frontmatter, content } = matter(fileContent)
const processor = unified()
.use(remarkParse)
.use(remarkObsidian)
.use(remarkCustomElements)
.use(remarkGfm)
.use(remarkUnwrapImages)
.use(remarkRehype, { allowDangerousHtml: true })
.use(rehypeMermaid, {
strategy: 'inline-svg'
// other options as needed
})
.use(rehypeSlug)
.use(rehypeAutolinkHeadings)
.use(rehypeStringify, { allowDangerousHtml: true })
const ast = processor.parse(content)
const { firstHeading, toc } = extractHeadersAndToc(ast)
const result = await processor.run(ast)
const html = processor.stringify(result)
const wordCount = content.split(/\s+/).length
const readingTime = Math.ceil(wordCount / 250)
const imageCount = (content.match(/!\[.*?\]\(.*?\)/g) || []).length
const linkCount = (content.match(/\[.*?\]\(.*?\)/g) || []).length
return {
html,
metadata: {
...frontmatter,
title:
frontmatter.title || firstHeading || path.basename(filePath, '.md'),
toc,
wordCount,
readingTime,
imageCount,
linkCount
}
}
}
function remarkCustomElements() {
return (tree) => {
visit(tree, 'image', (node) => {
if (!node.url.startsWith('http')) {
const imageId = path.basename(node.url)
node.url = `${CLOUDINARY_BASE_URL}${imageId}`
}
})
visit(tree, 'link', (node) => {
const { href, icon, isWikilink } = processLink(node.url)
node.url = href
if (icon || isWikilink) {
node.type = 'html'
let linkClass = isWikilink ? 'class="internal-link"' : ''
let iconHtml = icon
? `<sup><span class="${icon} ml-1"></span></sup>`
: ''
node.value = `<a href="${href}" ${linkClass}>${node.children[0].value}${iconHtml}</a>`
}
})
visit(tree, 'code', (node) => {
if (node.meta === 'runnable') {
node.type = 'html'
node.value = `<div class="runnable-code" data-language="${
node.lang
}" data-source="${encodeURIComponent(node.value)}">
<pre><code class="language-${node.lang}">${node.value}</code></pre>
</div>`
}
})
}
}
function extractHeadersAndToc(tree) {
let firstHeading = null
const toc = []
let currentH2 = null
visit(tree, 'heading', (node) => {
if (node.children && node.children[0] && node.children[0].value) {
const headingText = node.children[0].value
const headingSlug = generateSlug(headingText)
if (!firstHeading && (node.depth === 1 || node.depth === 2)) {
firstHeading = headingText
}
if (node.depth === 2) {
currentH2 = {
text: headingText,
slug: headingSlug,
level: 'h2',
children: []
}
toc.push(currentH2)
} else if (node.depth === 3 && currentH2) {
currentH2.children.push({
text: headingText,
slug: headingSlug,
level: 'h3'
})
}
}
})
return { firstHeading, toc }
}
function generateSlug(str) {
return str.toLowerCase().replace(/[^a-z0-9]+/g, '-')
}
function processLink(href) {
const socialPlatforms = {
wikipedia: 'i-simple-icons-wikipedia',
'github.com': 'i-simple-icons-github',
'youtube.com': 'i-simple-icons-youtube',
'twitter.com': 'i-simple-icons-twitter',
'itunes.apple': 'i-simple-icons-apple',
observablehq: 'i-simple-icons-observable',
'pinboard.in': 'i-simple-icons-pinboard',
'goodreads.com': 'i-simple-icons-goodreads',
'glitch.com': 'i-simple-icons-glitch',
'stackoverflow.com': 'i-simple-icons-stackoverflow',
'mailto:': 'i-ic-baseline-email'
}
for (const [platform, icon] of Object.entries(socialPlatforms)) {
if (href.includes(platform)) {
return { href, icon }
}
}
if (href.startsWith('#/page/') || href.startsWith('blog/test#/page/')) {
const slug = href.split('/').pop()
return { href: `/blog/${slug}`, isWikilink: true }
}
return { href }
}
async function processAllFiles() {
const manifestLite = []
async function processDirectory(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true })
for (const entry of entries) {
const fullPath = path.join(dir, entry.name)
if (entry.isDirectory()) {
await processDirectory(fullPath)
} else if (entry.isFile() && path.extname(entry.name) === '.md') {
const { html, metadata } = await processMarkdown(fullPath)
const relativePath = path.relative(contentDir, fullPath)
const slug = relativePath.replace(/\.md$/, '')
const outputPath = path.join(outputDir, `${slug}.json`)
await fs.mkdir(path.dirname(outputPath), { recursive: true })
await fs.writeFile(
outputPath,
JSON.stringify({ slug, ...metadata, content: html }, null, 2)
)
manifestLite.push({ slug, ...metadata })
}
}
}
await processDirectory(contentDir)
await fs.writeFile(
path.join(outputDir, 'manifest-lite.json'),
JSON.stringify(manifestLite, null, 2)
)
}
processAllFiles().catch(console.error)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment