Skip to content

Instantly share code, notes, and snippets.

@shepherdwind
Created March 26, 2024 10:20
Show Gist options
  • Save shepherdwind/f23d46046e18c16192d16a6769113d5c to your computer and use it in GitHub Desktop.
Save shepherdwind/f23d46046e18c16192d16a6769113d5c to your computer and use it in GitHub Desktop.
download image from yuque exported markdown file
import { remark } from 'remark';
import path from 'path';
import fs from 'fs/promises';
import { selectAll } from 'unist-util-select';
import { glob } from 'glob';
import { createWriteStream } from 'fs';
import { pipeline } from 'stream/promises';
import { request } from 'undici';
import { pinyin } from 'pinyin-pro';
export async function run() {
const files = await glob('**/*.md', {
ignore: 'node_modules/**'
});
for (let file of files) {
const doc = await fs.readFile(file);
console.log(`start handle ${file}`);
const content = await remark()
.data('settings', { bullet: '-', listItemIndent: 'one' })
.use([
[ replaceHTML ],
// [ relativeLink, { doc, mapping }],
[ downloadAsset, { filePath: file } ],
])
.process(doc.toString());
await writeFile(file, content.toString());
console.log(`finish write file ${file}`);
}
}
function downloadAsset(opts) {
return async tree => {
const docFilePath = opts.filePath;
const base = pinyin(path.basename(docFilePath, '.md'), {
toneType: 'none',
type: 'array',
nonZh: 'consecutive',
}).map(o => o.trim()).join('_');
const assetsDir = path.join(docFilePath.split('/')[0], 'assets', base);
const assetNodes = selectAll('image[url^=http]', tree);
for (const node of assetNodes) {
const assetName = new URL(node.url).pathname.split('/').pop();
const filePath = path.join(assetsDir, assetName);
await download(node.url, path.join('./', filePath), { headers: { 'User-Agent': 'yuque-exporter' } });
node.url = path.relative(path.dirname(docFilePath), filePath);
}
};
}
function replaceHTML() {
return tree => {
const htmlNodes = selectAll('html', tree);
for (const node of htmlNodes) {
if (node.value === '<br />' || node.value === '<br/>') {
node.type = 'text';
node.value = '\n';
}
}
};
}
async function download(url, filePath, opts = {}) {
const { headers, ...otherOpts } = opts;
const { body, statusCode } = await request(url, {
headers: {
'User-Agent': 'yuque-exporter',
...headers,
},
maxRedirections: 10,
...otherOpts,
});
if (statusCode !== 200) {
return url;
}
await mkdir(path.dirname(filePath));
await pipeline(body, createWriteStream(filePath));
return filePath;
}
async function mkdir(p) {
await fs.mkdir(p, { recursive: true });
}
async function writeFile(filePath, content) {
await fs.mkdir(path.dirname(filePath), { recursive: true });
content = typeof content === 'string' || Buffer.isBuffer(content) ? content : JSON.stringify(content, null, 2);
await fs.writeFile(filePath, content, 'utf-8');
}
run();
@shepherdwind
Copy link
Author

use

$ npm i remark unist-util-select glob undici pinyin-pro
$ node index.mjs

The minimum required version of node is 18.

The majority of the code was copied from https://github.com/atian25/yuque-exporter/blob/master/src/lib/doc.ts.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment