Files
wordpress-export-to-markdown/src/writer.js
T

199 lines
5.5 KiB
JavaScript
Raw Normal View History

const chalk = require('chalk');
2019-12-17 13:52:09 -05:00
const fs = require('fs');
const luxon = require('luxon');
const path = require('path');
2019-12-21 15:57:25 -05:00
const requestPromiseNative = require('request-promise-native');
2019-12-17 13:52:09 -05:00
const shared = require('./shared');
2020-04-11 10:10:01 -04:00
const settings = require('./settings');
2019-12-17 13:52:09 -05:00
2019-12-21 15:57:25 -05:00
async function writeFilesPromise(posts, config) {
2019-12-28 15:01:51 -05:00
await writeMarkdownFilesPromise(posts, config);
await writeImageFilesPromise(posts, config);
2019-12-21 15:57:25 -05:00
}
2020-12-28 14:50:39 -05:00
async function processPayloadsPromise(payloads, loadFunc) {
2019-12-28 14:34:50 -05:00
const promises = payloads.map(payload => new Promise((resolve, reject) => {
2019-12-28 15:01:51 -05:00
setTimeout(async () => {
2019-12-27 13:49:08 -05:00
try {
2020-12-28 14:50:39 -05:00
const data = await loadFunc(payload.item);
2019-12-30 09:17:33 -05:00
await writeFile(payload.destinationPath, data);
2019-12-28 14:34:50 -05:00
console.log(chalk.green('[OK]') + ' ' + payload.name);
2019-12-27 13:49:08 -05:00
resolve();
} catch (ex) {
2020-01-21 16:52:37 -05:00
console.log(chalk.red('[FAILED]') + ' ' + payload.name + ' ' + chalk.red('(' + ex.toString() + ')'));
2019-12-27 13:49:08 -05:00
reject();
}
2019-12-28 15:01:51 -05:00
}, payload.delay);
2019-12-28 14:34:50 -05:00
}));
const results = await Promise.allSettled(promises);
const failedCount = results.filter(result => result.status === 'rejected').length;
if (failedCount === 0) {
console.log('Done, got them all!');
} else {
console.log('Done, but with ' + chalk.red(failedCount + ' failed') + '.');
}
2019-12-21 15:57:25 -05:00
}
2019-12-30 09:17:33 -05:00
async function writeFile(destinationPath, data) {
await fs.promises.mkdir(path.dirname(destinationPath), { recursive: true });
await fs.promises.writeFile(destinationPath, data);
2019-12-29 08:36:12 -05:00
}
async function writeMarkdownFilesPromise(posts, config ) {
2019-12-28 14:34:50 -05:00
// package up posts into payloads
2020-12-28 14:50:39 -05:00
let skipCount = 0;
let delay = 0;
const payloads = posts.flatMap(post => {
const destinationPath = getPostPath(post, config);
if (checkFile(destinationPath)) {
// already exists, don't need to save again
skipCount++;
return [];
} else {
const payload = {
item: post,
name: (config.includeOtherTypes ? post.meta.type + ' - ' : '') + post.meta.slug,
destinationPath,
delay
};
delay += settings.markdown_file_write_delay;
return [payload];
}
});
2019-12-28 14:34:50 -05:00
2020-12-28 14:50:39 -05:00
const remainingCount = payloads.length;
if (remainingCount + skipCount === 0) {
console.log('\nNo posts to save...');
} else {
console.log(`\nSaving ${remainingCount} posts (${skipCount} already exist)...`);
await processPayloadsPromise(payloads, loadMarkdownFilePromise);
}
2019-12-28 14:34:50 -05:00
}
2019-12-21 15:57:25 -05:00
2019-12-29 08:36:12 -05:00
async function loadMarkdownFilePromise(post) {
2019-12-28 15:01:51 -05:00
let output = '---\n';
Object.entries(post.frontmatter).forEach(([key, value]) => {
let outputValue;
if (Array.isArray(value)) {
if (value.length > 0) {
// array of one or more strings
outputValue = value.reduce((list, item) => `${list}\n - "${item}"`, '');
}
} else {
// single string value
const escapedValue = (value || '').replace(/"/g, '\\"');
outputValue = `"${escapedValue}"`;
}
if (outputValue !== undefined) {
output += `${key}: ${outputValue}\n`;
}
2019-12-28 15:01:51 -05:00
});
output += `---\n\n${post.content}\n`;
2019-12-29 08:36:12 -05:00
return output;
2019-12-21 15:57:25 -05:00
}
async function writeImageFilesPromise(posts, config) {
2019-12-29 08:36:12 -05:00
// collect image data from all posts into a single flattened array of payloads
2020-12-28 14:50:39 -05:00
let skipCount = 0;
2019-12-29 08:36:12 -05:00
let delay = 0;
const payloads = posts.flatMap(post => {
2019-12-30 09:17:33 -05:00
const postPath = getPostPath(post, config);
const imagesDir = path.join(path.dirname(postPath), 'images');
2020-12-28 14:50:39 -05:00
return post.meta.imageUrls.flatMap(imageUrl => {
2019-12-30 09:17:33 -05:00
const filename = shared.getFilenameFromUrl(imageUrl);
2020-12-28 14:50:39 -05:00
const destinationPath = path.join(imagesDir, filename);
if (checkFile(destinationPath)) {
// already exists, don't need to save again
skipCount++;
return [];
} else {
const payload = {
item: imageUrl,
name: filename,
destinationPath,
delay
};
delay += settings.image_file_request_delay;
return [payload];
}
2019-12-29 08:36:12 -05:00
});
2019-12-28 14:34:50 -05:00
});
2019-12-21 15:57:25 -05:00
2020-12-28 14:50:39 -05:00
const remainingCount = payloads.length;
if (remainingCount + skipCount === 0) {
2020-01-14 10:26:50 -05:00
console.log('\nNo images to download and save...');
2020-12-28 14:50:39 -05:00
} else {
console.log(`\nDownloading and saving ${remainingCount} images (${skipCount} already exist)...`);
await processPayloadsPromise(payloads, loadImageFilePromise);
2020-01-14 10:26:50 -05:00
}
2019-12-17 13:52:09 -05:00
}
2019-12-29 08:36:12 -05:00
async function loadImageFilePromise(imageUrl) {
2020-12-29 16:41:38 -05:00
// only encode the URL if it doesn't already have encoded characters
const url = (/%[\da-f]{2}/i).test(imageUrl) ? imageUrl : encodeURI(imageUrl);
let buffer;
try {
buffer = await requestPromiseNative.get({
2020-12-29 16:41:38 -05:00
url,
encoding: null, // preserves binary encoding
headers: {
2020-12-20 14:05:55 -05:00
'User-Agent': 'wordpress-export-to-markdown'
}
});
} catch (ex) {
if (ex.name === 'StatusCodeError') {
// these errors contain a lot of noise, simplify to just the status code
ex.message = ex.statusCode;
}
throw ex;
}
2019-12-29 08:36:12 -05:00
return buffer;
2019-12-17 13:52:09 -05:00
}
2019-12-30 09:17:33 -05:00
function getPostPath(post, config) {
const dt = luxon.DateTime.fromISO(post.frontmatter.date);
2019-12-17 13:52:09 -05:00
2019-12-30 09:17:33 -05:00
// start with base output dir
const pathSegments = [config.output];
2019-12-17 13:52:09 -05:00
2020-12-30 15:43:19 -05:00
// create segment for post type if we're dealing with more than just "post"
if (config.includeOtherTypes) {
2020-12-26 13:18:49 -05:00
pathSegments.push(post.meta.type);
}
2020-01-12 09:03:32 -05:00
if (config.yearFolders) {
2019-12-30 09:17:33 -05:00
pathSegments.push(dt.toFormat('yyyy'));
2020-01-12 09:03:32 -05:00
}
2019-12-30 09:17:33 -05:00
2020-01-12 09:03:32 -05:00
if (config.monthFolders) {
pathSegments.push(dt.toFormat('LL'));
2019-12-17 13:52:09 -05:00
}
2019-12-30 09:17:33 -05:00
// create slug fragment, possibly date prefixed
let slugFragment = post.meta.slug;
2020-01-12 09:03:32 -05:00
if (config.prefixDate) {
2019-12-30 09:17:33 -05:00
slugFragment = dt.toFormat('yyyy-LL-dd') + '-' + slugFragment;
}
2019-12-17 13:52:09 -05:00
2019-12-30 09:17:33 -05:00
// use slug fragment as folder or filename as specified
2020-01-12 09:03:32 -05:00
if (config.postFolders) {
2019-12-30 09:17:33 -05:00
pathSegments.push(slugFragment, 'index.md');
2019-12-17 13:52:09 -05:00
} else {
2019-12-30 09:17:33 -05:00
pathSegments.push(slugFragment + '.md');
2019-12-17 13:52:09 -05:00
}
2019-12-30 09:17:33 -05:00
return path.join(...pathSegments);
2019-12-17 13:52:09 -05:00
}
2020-12-28 14:50:39 -05:00
function checkFile(path) {
return fs.existsSync(path);
}
2019-12-21 15:57:25 -05:00
exports.writeFilesPromise = writeFilesPromise;