From f8d214c362d61075f73e443e5d4e4c5d4152ae69 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Fri, 7 Feb 2025 17:49:31 -0500 Subject: [PATCH] questions.load(), config refactor --- index.js | 11 +- src/intake.js | 28 ++--- src/parser.js | 28 ++--- src/questions.js | 294 +++++++++++++++++++++++----------------------- src/shared.js | 19 +-- src/translator.js | 7 +- src/writer.js | 44 +++---- 7 files changed, 218 insertions(+), 213 deletions(-) diff --git a/index.js b/index.js index 7a5c7a0..63fd3b9 100644 --- a/index.js +++ b/index.js @@ -2,8 +2,9 @@ import * as commander from 'commander'; import path from 'path'; -import * as parser from './src/parser.js'; import * as intake from './src/intake.js'; +import * as parser from './src/parser.js'; +import * as shared from './src/shared.js'; import * as writer from './src/writer.js'; (async () => { @@ -14,17 +15,17 @@ import * as writer from './src/writer.js'; .addHelpText('after', '\nMore documentation is at https://github.com/lonekorean/wordpress-export-to-markdown') // gather config options from command line and wizard - const config = await intake.getConfig(); + await intake.getConfig(); // parse data from XML and do Markdown translations - const posts = await parser.parseFilePromise(config) + const posts = await parser.parseFilePromise() // write files and download images - await writer.writeFilesPromise(posts, config); + await writer.writeFilesPromise(posts); // happy goodbye console.log('\nAll done!'); - console.log('Look for your output files in: ' + path.resolve(config.output)); + console.log('Look for your output files in: ' + path.resolve(shared.config.output)); })().catch((ex) => { // sad goodbye console.log('\nSomething went wrong, execution halted early.'); diff --git a/src/intake.js b/src/intake.js index aec05f0..f43b5a0 100644 --- a/src/intake.js +++ b/src/intake.js @@ -19,7 +19,7 @@ const promptTheme = { export async function getConfig() { // check command line for any config options - const commandLineQuestions = questions.all; + const commandLineQuestions = questions.load(); const commandLineAnswers = getCommandLineAnswers(commandLineQuestions); let wizardAnswers; @@ -27,15 +27,15 @@ export async function getConfig() { console.log('\nStarting wizard...'); // run wizard for questions with prompts that were not answered via the command line - const wizardQuestions = questions.all.filter((question) => { + const wizardQuestions = questions.load().filter((question) => { return question.prompt && !(shared.camelCase(question.name) in commandLineAnswers); }); wizardAnswers = await getWizardAnswers(wizardQuestions, commandLineAnswers); } else { - console.log('\nSkipping wizard...'); + console.dir('\nSkipping wizard...'); } - return { ...commandLineAnswers, ...wizardAnswers }; + Object.assign(shared.config, commandLineAnswers, wizardAnswers); } function getCommandLineAnswers(questions) { @@ -106,13 +106,14 @@ export async function getWizardAnswers(questions, commandLineAnswers) { promptConfig.loop = false; if (question.isPathQuestion) { - // create a snapshot config of command line answers and wizard answers so far - const config = { ...commandLineAnswers, ...answers }; - promptConfig.choices.forEach((choice) => { // show example path if this choice is selected - config[answerKey] = choice.value; - choice.description = buildSamplePostPath(config); + choice.description = buildSamplePostPath({ + ...commandLineAnswers, // with command line answers + ...answers, // and wizard answers so far + output: path.sep, // and a simplified output folder + [answerKey]: choice.value // and this choice selected + }); }); } } else { @@ -154,11 +155,6 @@ function normalize(value, type, onError) { } } -export function buildSamplePostPath(config) { - const outputDir = path.sep; - const type = ''; - const date = luxon.DateTime.now(); - const slug = 'my-post'; - - return shared.buildPostPath(outputDir, type, date, slug, config); +export function buildSamplePostPath(overrideConfig) { + return shared.buildPostPath('', luxon.DateTime.now(), 'my-post', overrideConfig); } diff --git a/src/parser.js b/src/parser.js index 2a3c95a..f8c06b6 100644 --- a/src/parser.js +++ b/src/parser.js @@ -5,9 +5,9 @@ import * as frontmatter from './frontmatter.js'; import * as shared from './shared.js'; import * as translator from './translator.js'; -export async function parseFilePromise(config) { +export async function parseFilePromise() { console.log('\nParsing...'); - const content = await fs.promises.readFile(config.input, 'utf8'); + const content = await fs.promises.readFile(shared.config.input, 'utf8'); const allData = await xml2js.parseStringPromise(content, { trim: true, tagNameProcessors: [xml2js.processors.stripPrefix] @@ -15,18 +15,18 @@ export async function parseFilePromise(config) { const channelData = allData.rss.channel[0].item; const postTypes = getPostTypes(channelData); - const posts = collectPosts(channelData, postTypes, config); + const posts = collectPosts(channelData, postTypes); const images = []; - if (config.saveImages === 'attached' || config.saveImages === 'all') { + if (shared.config.saveImages === 'attached' || shared.config.saveImages === 'all') { images.push(...collectAttachedImages(channelData)); } - if (config.saveImages === 'scraped' || config.saveImages === 'all') { + if (shared.config.saveImages === 'scraped' || shared.config.saveImages === 'all') { images.push(...collectScrapedImages(channelData, postTypes)); } mergeImagesIntoPosts(images, posts); - populateFrontmatter(posts, config); + populateFrontmatter(posts); return posts; } @@ -51,7 +51,7 @@ function getItemsOfType(channelData, type) { return channelData.filter(item => item.post_type[0] === type); } -function collectPosts(channelData, postTypes, config) { +function collectPosts(channelData, postTypes) { // this is passed into getPostContent() for the markdown conversion const turndownService = translator.initTurndownService(); @@ -60,7 +60,7 @@ function collectPosts(channelData, postTypes, config) { const postsForType = getItemsOfType(channelData, postType) .filter(postData => postData.status[0] !== 'trash' && postData.status[0] !== 'draft') .filter(postData => !(postType === 'page' && postData.post_name[0] === 'sample-page')) - .map(postData => buildPost(postData, turndownService, config)); + .map(postData => buildPost(postData, turndownService)); if (postsForType.length > 0) { console.log(`${postsForType.length} posts of type "${postType}" found.`); @@ -72,19 +72,19 @@ function collectPosts(channelData, postTypes, config) { return allPosts; } -function buildPost(data, turndownService, config) { +function buildPost(data, turndownService) { return { // full raw post data, used by some frontmatter getters data, // contents of the post in markdown - content: translator.getPostContent(data, turndownService, config), + content: translator.getPostContent(data, turndownService), // these are not written to file, but help with other things type: data.post_type[0], id: data.post_id[0], slug: decodeURIComponent(data.post_name[0]), - date: luxon.DateTime.fromRFC2822(data.pubDate[0], { zone: config.customDateTimezone }), + date: luxon.DateTime.fromRFC2822(data.pubDate[0], { zone: shared.config.customDateTimezone }), coverImageId: getPostMetaValue(data.postmeta, '_thumbnail_id'), // these are possibly set later in mergeImagesIntoPosts() @@ -160,10 +160,10 @@ function mergeImagesIntoPosts(images, posts) { }); } -function populateFrontmatter(posts, config) { +function populateFrontmatter(posts) { posts.forEach(post => { post.frontmatter = {}; - config.frontmatterFields.forEach(field => { + shared.config.frontmatterFields.forEach(field => { const [key, alias] = field.split(':'); let frontmatterGetter = frontmatter[key]; @@ -171,7 +171,7 @@ function populateFrontmatter(posts, config) { throw `Could not find a frontmatter getter named "${key}".`; } - post.frontmatter[alias || key] = frontmatterGetter(post, config); + post.frontmatter[alias || key] = frontmatterGetter(post); }); }); } diff --git a/src/questions.js b/src/questions.js index 0ae9f62..f2783eb 100644 --- a/src/questions.js +++ b/src/questions.js @@ -1,148 +1,150 @@ import * as inquirer from '@inquirer/prompts'; -// questions with a description are displayed in command line help -// questions with a prompt are included in the wizard (if not set on the command line) -export const all = [ - { - name: 'wizard', - type: 'boolean', - description: 'Use wizard', - default: true - }, - { - name: 'input', - type: 'file-path', - description: 'Path to WordPress export file', - default: 'export.xml', - prompt: inquirer.input - }, - { - name: 'post-folders', - type: 'boolean', - description: 'Put each post into its own folder', - default: true, - choices: [ - { - name: 'Yes', - value: true - }, - { - name: 'No', - value: false - } - ], - isPathQuestion: true, - prompt: inquirer.select - }, - { - name: 'prefix-date', - type: 'boolean', - description: 'Prefix with date', - default: false, - choices: [ - { - name: 'Yes', - value: true - }, - { - name: 'No', - value: false - } - ], - isPathQuestion: true, - prompt: inquirer.select - }, - { - name: 'date-folders', - type: 'choice', - description: 'Organize into folders based on date', - default: 'none', - choices: [ - { - name: 'Year folders', - value: 'year' - }, - { - name: 'Year and month folders', - value: 'year-month' - }, - { - name: 'No', - value: 'none' - } - ], - isPathQuestion: true, - prompt: inquirer.select - }, - { - name: 'save-images', - type: 'choice', - description: 'Save images', - default: 'all', - choices: [ - { - name: 'Images attached to posts', - value: 'attached' - }, - { - name: 'Images scraped from post body content', - value: 'scraped' - }, - { - name: 'Both', - value: 'all' - }, - { - name: 'No', - value: 'none' - } - ], - prompt: inquirer.select - }, - { - name: 'output', - type: 'folder-path', - description: 'Path to output folder', - default: 'output' - }, - { - name: 'frontmatter-fields', - type: 'list', - default: ['title', 'date', 'categories', 'tags', 'coverImage'] - }, - { - name: 'image-file-request-delay', - type: 'integer', - default: 500 - }, - { - name: 'markdown-file-write-delay', - type: 'integer', - default: 25 - }, - { - name: 'include-time-with-date', - type: 'boolean', - default: false - }, - { - name: 'custom-date-formatting', - type: 'string', - default: '' - }, - { - name: 'custom-date-timezone', - type: 'string', - default: 'utc' - }, - { - name: 'quote-date', - type: 'boolean', - default: false - }, - { - name: 'strict-ssl', - type: 'boolean', - default: true - } -]; +export function load() { + // questions with a description are displayed in command line help + // questions with a prompt are included in the wizard (if not set on the command line) + return [ + { + name: 'wizard', + type: 'boolean', + description: 'Use wizard', + default: true + }, + { + name: 'input', + type: 'file-path', + description: 'Path to WordPress export file', + default: 'export.xml', + prompt: inquirer.input + }, + { + name: 'post-folders', + type: 'boolean', + description: 'Put each post into its own folder', + default: true, + choices: [ + { + name: 'Yes', + value: true + }, + { + name: 'No', + value: false + } + ], + isPathQuestion: true, + prompt: inquirer.select + }, + { + name: 'prefix-date', + type: 'boolean', + description: 'Prefix with date', + default: false, + choices: [ + { + name: 'Yes', + value: true + }, + { + name: 'No', + value: false + } + ], + isPathQuestion: true, + prompt: inquirer.select + }, + { + name: 'date-folders', + type: 'choice', + description: 'Organize into folders based on date', + default: 'none', + choices: [ + { + name: 'Year folders', + value: 'year' + }, + { + name: 'Year and month folders', + value: 'year-month' + }, + { + name: 'No', + value: 'none' + } + ], + isPathQuestion: true, + prompt: inquirer.select + }, + { + name: 'save-images', + type: 'choice', + description: 'Save images', + default: 'all', + choices: [ + { + name: 'Images attached to posts', + value: 'attached' + }, + { + name: 'Images scraped from post body content', + value: 'scraped' + }, + { + name: 'Both', + value: 'all' + }, + { + name: 'No', + value: 'none' + } + ], + prompt: inquirer.select + }, + { + name: 'output', + type: 'folder-path', + description: 'Path to output folder', + default: 'output' + }, + { + name: 'frontmatter-fields', + type: 'list', + default: ['title', 'date', 'categories', 'tags', 'coverImage'] + }, + { + name: 'image-file-request-delay', + type: 'integer', + default: 500 + }, + { + name: 'markdown-file-write-delay', + type: 'integer', + default: 25 + }, + { + name: 'include-time-with-date', + type: 'boolean', + default: false + }, + { + name: 'custom-date-formatting', + type: 'string', + default: '' + }, + { + name: 'custom-date-timezone', + type: 'string', + default: 'utc' + }, + { + name: 'quote-date', + type: 'boolean', + default: false + }, + { + name: 'strict-ssl', + type: 'boolean', + default: true + } + ]; +} diff --git a/src/shared.js b/src/shared.js index af467f0..3c6dc04 100644 --- a/src/shared.js +++ b/src/shared.js @@ -1,29 +1,34 @@ import path from 'path'; +// simple data store, populated via intake, used everywhere +export const config = {}; + export function camelCase(str) { return str.replace(/-(.)/g, (match) => match[1].toUpperCase()); } -export function buildPostPath(outputDir, type, date, slug, config) { - // start with base output dir and post type - const pathSegments = [outputDir, type]; +export function buildPostPath(type, date, slug, overrideConfig) { + const pathConfig = overrideConfig ?? config; - if (config.dateFolders === 'year' || config.dateFolders === 'year-month') { + // start with base output dir and post type + const pathSegments = [pathConfig.output, type]; + + if (pathConfig.dateFolders === 'year' || pathConfig.dateFolders === 'year-month') { pathSegments.push(date.toFormat('yyyy')); } - if (config.dateFolders === 'year-month') { + if (pathConfig.dateFolders === 'year-month') { pathSegments.push(date.toFormat('LL')); } // create slug fragment, possibly date prefixed let slugFragment = slug; - if (config.prefixDate) { + if (pathConfig.prefixDate) { slugFragment = date.toFormat('yyyy-LL-dd') + '-' + slugFragment; } // use slug fragment as folder or filename as specified - if (config.postFolders) { + if (pathConfig.postFolders) { pathSegments.push(slugFragment, 'index.md'); } else { pathSegments.push(slugFragment + '.md'); diff --git a/src/translator.js b/src/translator.js index 1fc3f75..f1b8df0 100644 --- a/src/translator.js +++ b/src/translator.js @@ -1,5 +1,6 @@ -import turndown from 'turndown'; import turndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'; +import turndown from 'turndown'; +import * as shared from './shared.js'; export function initTurndownService() { const turndownService = new turndown({ @@ -94,7 +95,7 @@ export function initTurndownService() { return turndownService; } -export function getPostContent(postData, turndownService, config) { +export function getPostContent(postData, turndownService) { let content = postData.encoded[0]; // insert an empty div element between double line breaks @@ -102,7 +103,7 @@ export function getPostContent(postData, turndownService, config) { // without mucking up content inside of other elements (like blocks) content = content.replace(/(\r?\n){2}/g, '\n
\n'); - if (config.saveImages === 'scraped' || config.saveImages === 'all') { + if (shared.config.saveImages === 'scraped' || shared.config.saveImages === 'all') { // writeImageFile() will save all content images to a relative /images // folder so update references in post content to match content = content.replace(/(]*src=").*?([^/"]+\.(?:gif|jpe?g|png|webp))("[^>]*>)/gi, '$1images/$2$3'); diff --git a/src/writer.js b/src/writer.js index 17a28a1..a201fcf 100644 --- a/src/writer.js +++ b/src/writer.js @@ -7,16 +7,16 @@ import * as luxon from 'luxon'; import path from 'path'; import * as shared from './shared.js'; -export async function writeFilesPromise(posts, config) { - await writeMarkdownFilesPromise(posts, config); - await writeImageFilesPromise(posts, config); +export async function writeFilesPromise(posts) { + await writeMarkdownFilesPromise(posts); + await writeImageFilesPromise(posts); } -async function processPayloadsPromise(payloads, loadFunc, config) { +async function processPayloadsPromise(payloads, loadFunc) { const promises = payloads.map(payload => new Promise((resolve, reject) => { setTimeout(async () => { try { - const data = await loadFunc(payload.item, config); + const data = await loadFunc(payload.item); await writeFile(payload.destinationPath, data); logPayloadResult(payload); resolve(); @@ -41,12 +41,12 @@ async function writeFile(destinationPath, data) { await fs.promises.writeFile(destinationPath, data); } -async function writeMarkdownFilesPromise(posts, config) { +async function writeMarkdownFilesPromise(posts) { // package up posts into payloads let skipCount = 0; let delay = 0; const payloads = posts.flatMap(post => { - const destinationPath = buildPostPath(post, config); + const destinationPath = buildPostPath(post); if (checkFile(destinationPath)) { // already exists, don't need to save again skipCount++; @@ -59,7 +59,7 @@ async function writeMarkdownFilesPromise(posts, config) { destinationPath, delay }; - delay += config.markdownFileWriteDelay; + delay += shared.config.markdownFileWriteDelay; return [payload]; } }); @@ -69,11 +69,11 @@ async function writeMarkdownFilesPromise(posts, config) { console.log('\nNo posts to save...'); } else { console.log(`\nSaving ${remainingCount} posts (${skipCount} already exist)...`); - await processPayloadsPromise(payloads, loadMarkdownFilePromise, config); + await processPayloadsPromise(payloads, loadMarkdownFilePromise); } } -async function loadMarkdownFilePromise(post, config) { +async function loadMarkdownFilePromise(post) { let output = '---\n'; Object.entries(post.frontmatter).forEach(([key, value]) => { @@ -84,13 +84,13 @@ async function loadMarkdownFilePromise(post, config) { outputValue = value.reduce((list, item) => `${list}\n - "${item}"`, ''); } } else if (value instanceof luxon.DateTime) { - if (config.customDateFormatting) { - outputValue = value.toFormat(config.customDateFormatting); + if (shared.config.customDateFormatting) { + outputValue = value.toFormat(shared.config.customDateFormatting); } else { - outputValue = config.includeTimeWithDate ? value.toISO() : value.toISODate(); + outputValue = shared.config.includeTimeWithDate ? value.toISO() : value.toISODate(); } - if (config.quoteDate) { + if (shared.config.quoteDate) { outputValue = `"${outputValue}"`; } } else { @@ -110,12 +110,12 @@ async function loadMarkdownFilePromise(post, config) { return output; } -async function writeImageFilesPromise(posts, config) { +async function writeImageFilesPromise(posts) { // collect image data from all posts into a single flattened array of payloads let skipCount = 0; let delay = 0; const payloads = posts.flatMap(post => { - const postPath = buildPostPath(post, config); + const postPath = buildPostPath(post); const imagesDir = path.join(path.dirname(postPath), 'images'); return post.imageUrls.flatMap(imageUrl => { const filename = shared.getFilenameFromUrl(imageUrl); @@ -132,7 +132,7 @@ async function writeImageFilesPromise(posts, config) { destinationPath, delay }; - delay += config.imageFileRequestDelay; + delay += shared.config.imageFileRequestDelay; return [payload]; } }); @@ -143,11 +143,11 @@ async function writeImageFilesPromise(posts, config) { console.log('\nNo images to download and save...'); } else { console.log(`\nDownloading and saving ${remainingCount} images (${skipCount} already exist)...`); - await processPayloadsPromise(payloads, loadImageFilePromise, config); + await processPayloadsPromise(payloads, loadImageFilePromise); } } -async function loadImageFilePromise(imageUrl, config) { +async function loadImageFilePromise(imageUrl) { // only encode the URL if it doesn't already have encoded characters const url = (/%[\da-f]{2}/i).test(imageUrl) ? imageUrl : encodeURI(imageUrl); @@ -160,7 +160,7 @@ async function loadImageFilePromise(imageUrl, config) { responseType: 'arraybuffer' }; - if (!config.strictSsl) { + if (!shared.config.strictSsl) { // custom agents to disable SSL errors (adding both http and https, just in case) requestConfig.httpAgent = new http.Agent({ rejectUnauthorized: false }); requestConfig.httpsAgent = new https.Agent({ rejectUnauthorized: false }); @@ -182,8 +182,8 @@ async function loadImageFilePromise(imageUrl, config) { return buffer; } -function buildPostPath(post, config) { - return shared.buildPostPath(config.output, post.type, post.date, post.slug, config); +function buildPostPath(post) { + return shared.buildPostPath(post.type, post.date, post.slug); } function checkFile(path) {