From 42d0688654183c63c79e3482a54d39198ab4b6dc Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sun, 9 Feb 2025 12:21:13 -0500 Subject: [PATCH 1/5] Stuff for drafts, handling missing slugs and dates --- src/frontmatter.js | 7 ++++++- src/intake.js | 7 ++++++- src/parser.js | 7 ++++--- src/questions.js | 2 +- src/shared.js | 43 +++++++++++++++++++++++++++++-------------- src/translator.js | 2 +- src/writer.js | 13 ++++++------- 7 files changed, 53 insertions(+), 28 deletions(-) diff --git a/src/frontmatter.js b/src/frontmatter.js index d513079..3e34b9e 100644 --- a/src/frontmatter.js +++ b/src/frontmatter.js @@ -24,11 +24,16 @@ export function coverImage(post) { } // get post date, previously saved as a luxon datetime object on post -// this value is also used for year/month folders, date prefixes, etc. as needed export function date(post) { return post.date; } +// get boolean indicating if post is a draft +// this will only be included if true, otherwise it's left off +export function draft(post) { + return post.isDraft ? true : undefined; +} + // get excerpt, not decoded, newlines collapsed export function excerpt(post) { return post.data.encoded[1].replace(/[\r\n]+/gm, ' '); diff --git a/src/intake.js b/src/intake.js index e16e946..676e648 100644 --- a/src/intake.js +++ b/src/intake.js @@ -156,5 +156,10 @@ function normalize(value, type, onError) { } export function buildSamplePostPath(overrideConfig) { - return shared.buildPostPath('', luxon.DateTime.now(), 'my-post', overrideConfig); + const samplePost = { + date: luxon.DateTime.now(), + slug: 'my-post' + }; + + return shared.buildPostPath(samplePost, overrideConfig); } diff --git a/src/parser.js b/src/parser.js index 212774f..f255ec4 100644 --- a/src/parser.js +++ b/src/parser.js @@ -58,7 +58,7 @@ function collectPosts(channelData, postTypes) { let allPosts = []; postTypes.forEach(postType => { const postsForType = getItemsOfType(channelData, postType) - .filter(postData => postData.status[0] !== 'trash' && postData.status[0] !== 'draft') + .filter(postData => postData.status[0] !== 'trash') .filter(postData => !(postType === 'page' && postData.post_name[0] === 'sample-page')) .map(postData => buildPost(postData, turndownService)); @@ -83,8 +83,9 @@ function buildPost(data, turndownService) { // these are not written to file, but help with other things type: data.post_type[0], id: data.post_id[0], + isDraft: data.status[0] === 'draft', slug: decodeURIComponent(data.post_name[0]), - date: luxon.DateTime.fromRFC2822(data.pubDate[0], { zone: shared.config.customDateTimezone }), + date: data.pubDate[0] ? luxon.DateTime.fromRFC2822(data.pubDate[0], { zone: shared.config.customDateTimezone }) : undefined, coverImageId: getPostMetaValue(data.postmeta, '_thumbnail_id'), // these are possibly set later in mergeImagesIntoPosts() @@ -171,7 +172,7 @@ function populateFrontmatter(posts) { throw `Could not find a frontmatter getter named "${key}".`; } - post.frontmatter[alias || key] = frontmatterGetter(post); + post.frontmatter[alias ?? key] = frontmatterGetter(post); }); }); } diff --git a/src/questions.js b/src/questions.js index f2783eb..088f886 100644 --- a/src/questions.js +++ b/src/questions.js @@ -109,7 +109,7 @@ export function load() { { name: 'frontmatter-fields', type: 'list', - default: ['title', 'date', 'categories', 'tags', 'coverImage'] + default: ['title', 'date', 'categories', 'tags', 'coverImage', 'draft'] }, { name: 'image-file-request-delay', diff --git a/src/shared.js b/src/shared.js index 3c6dc04..4e0ae75 100644 --- a/src/shared.js +++ b/src/shared.js @@ -7,31 +7,46 @@ export function camelCase(str) { return str.replace(/-(.)/g, (match) => match[1].toUpperCase()); } -export function buildPostPath(type, date, slug, overrideConfig) { +export function buildPostPath(post, overrideConfig) { const pathConfig = overrideConfig ?? config; - // start with base output dir and post type - const pathSegments = [pathConfig.output, type]; + // start with output folder + const pathSegments = [pathConfig.output]; - if (pathConfig.dateFolders === 'year' || pathConfig.dateFolders === 'year-month') { - pathSegments.push(date.toFormat('yyyy')); + // add folder for post type if exists + if (post.type) { + pathSegments.push(post.type); } - if (pathConfig.dateFolders === 'year-month') { - pathSegments.push(date.toFormat('LL')); + // add drafts folder if this is a draft post + if (post.isDraft) { + pathSegments.push('_drafts'); } - // create slug fragment, possibly date prefixed - let slugFragment = slug; - if (pathConfig.prefixDate) { - slugFragment = date.toFormat('yyyy-LL-dd') + '-' + slugFragment; + // add folders for date year/month as appropriate + if (post.date) { + if (pathConfig.dateFolders === 'year' || pathConfig.dateFolders === 'year-month') { + pathSegments.push(post.date.toFormat('yyyy')); + } + + if (pathConfig.dateFolders === 'year-month') { + pathSegments.push(post.date.toFormat('LL')); + } } - // use slug fragment as folder or filename as specified + // get slug with fallback + let slug = post.slug ? post.slug : 'id-' + post.id; + + // prepend date to slug as appropriate + if (pathConfig.prefixDate && post.date) { + slug = post.date.toFormat('yyyy-LL-dd') + '-' + slug; + } + + // use slug as folder or filename as specified if (pathConfig.postFolders) { - pathSegments.push(slugFragment, 'index.md'); + pathSegments.push(slug, 'index.md'); } else { - pathSegments.push(slugFragment + '.md'); + pathSegments.push(slug + '.md'); } return path.join(...pathSegments); diff --git a/src/translator.js b/src/translator.js index f1b8df0..3b727ea 100644 --- a/src/translator.js +++ b/src/translator.js @@ -87,7 +87,7 @@ export function initTurndownService() { return node.nodeName === 'PRE' && !node.querySelector('code'); }, replacement: (content, node) => { - const language = node.getAttribute('data-wetm-language') || ''; + const language = node.getAttribute('data-wetm-language') ?? ''; return '\n\n```' + language + '\n' + node.textContent + '\n```\n\n'; } }); diff --git a/src/writer.js b/src/writer.js index f289591..0c16f46 100644 --- a/src/writer.js +++ b/src/writer.js @@ -46,7 +46,7 @@ async function writeMarkdownFilesPromise(posts) { let skipCount = 0; let delay = 0; const payloads = posts.flatMap(post => { - const destinationPath = buildPostPath(post); + const destinationPath = shared.buildPostPath(post); if (checkFile(destinationPath)) { // already exists, don't need to save again skipCount++; @@ -96,9 +96,12 @@ async function loadMarkdownFilePromise(post) { if (shared.config.quoteDate) { outputValue = `"${outputValue}"`; } + } else if (typeof value === 'boolean') { + // output unquoted + outputValue = value.toString(); } else { // single string value - const escapedValue = (value || '').replace(/"/g, '\\"'); + const escapedValue = (value ?? '').replace(/"/g, '\\"'); if (escapedValue.length > 0) { outputValue = `"${escapedValue}"`; } @@ -118,7 +121,7 @@ async function writeImageFilesPromise(posts) { let skipCount = 0; let delay = 0; const payloads = posts.flatMap(post => { - const postPath = buildPostPath(post); + const postPath = shared.buildPostPath(post); const imagesDir = path.join(path.dirname(postPath), 'images'); return post.imageUrls.flatMap(imageUrl => { const filename = shared.getFilenameFromUrl(imageUrl); @@ -185,10 +188,6 @@ async function loadImageFilePromise(imageUrl) { return buffer; } -function buildPostPath(post) { - return shared.buildPostPath(post.type, post.date, post.slug); -} - function checkFile(path) { return fs.existsSync(path); } From cc09a41744533ee07372e79db88fa6b1600b511d Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Mon, 10 Feb 2025 16:06:04 -0500 Subject: [PATCH 2/5] Refactor turndown service init --- src/parser.js | 9 +++------ src/translator.js | 9 +++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/parser.js b/src/parser.js index f255ec4..f1291ab 100644 --- a/src/parser.js +++ b/src/parser.js @@ -52,15 +52,12 @@ function getItemsOfType(channelData, type) { } function collectPosts(channelData, postTypes) { - // this is passed into getPostContent() for the markdown conversion - const turndownService = translator.initTurndownService(); - let allPosts = []; postTypes.forEach(postType => { const postsForType = getItemsOfType(channelData, postType) .filter(postData => postData.status[0] !== 'trash') .filter(postData => !(postType === 'page' && postData.post_name[0] === 'sample-page')) - .map(postData => buildPost(postData, turndownService)); + .map(postData => buildPost(postData)); if (postsForType.length > 0) { console.log(`${postsForType.length} posts of type "${postType}" found.`); @@ -72,13 +69,13 @@ function collectPosts(channelData, postTypes) { return allPosts; } -function buildPost(data, turndownService) { +function buildPost(data) { return { // full raw post data data, // contents of the post in markdown - content: translator.getPostContent(data, turndownService), + content: translator.getPostContent(data.encoded[0]), // these are not written to file, but help with other things type: data.post_type[0], diff --git a/src/translator.js b/src/translator.js index 3b727ea..a4f3bb1 100644 --- a/src/translator.js +++ b/src/translator.js @@ -2,7 +2,10 @@ import turndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'; import turndown from 'turndown'; import * as shared from './shared.js'; -export function initTurndownService() { +// init single reusable turndown service object upon import +const turndownService = initTurndownService(); + +function initTurndownService() { const turndownService = new turndown({ headingStyle: 'atx', bulletListMarker: '-', @@ -95,9 +98,7 @@ export function initTurndownService() { return turndownService; } -export function getPostContent(postData, turndownService) { - let content = postData.encoded[0]; - +export function getPostContent(content) { // insert an empty div element between double line breaks // this nifty trick causes turndown to keep adjacent paragraphs separated // without mucking up content inside of other elements (like blocks) From cb9dd9255e49aa691f1b4b5e45724a4ac02653d9 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Mon, 10 Feb 2025 16:33:24 -0500 Subject: [PATCH 3/5] Gracefully handle invalid dates --- src/parser.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parser.js b/src/parser.js index f1291ab..5fa53a6 100644 --- a/src/parser.js +++ b/src/parser.js @@ -74,15 +74,15 @@ function buildPost(data) { // full raw post data data, - // contents of the post in markdown + // body content converted to markdown content: translator.getPostContent(data.encoded[0]), - // these are not written to file, but help with other things + // particularly useful values for all sorts of things type: data.post_type[0], id: data.post_id[0], isDraft: data.status[0] === 'draft', slug: decodeURIComponent(data.post_name[0]), - date: data.pubDate[0] ? luxon.DateTime.fromRFC2822(data.pubDate[0], { zone: shared.config.customDateTimezone }) : undefined, + date: getPostDate(data), coverImageId: getPostMetaValue(data.postmeta, '_thumbnail_id'), // these are possibly set later in mergeImagesIntoPosts() @@ -91,6 +91,11 @@ function buildPost(data) { }; } +function getPostDate(data) { + const date = luxon.DateTime.fromRFC2822(data.pubDate[0] ?? '', { zone: shared.config.customDateTimezone }); + return date.isValid ? date : undefined; +} + function getPostMetaValue(metas, key) { const meta = metas && metas.find((meta) => meta.meta_key[0] === key); return meta ? meta.meta_value[0] : undefined; From e8852a2900e035784f39550b0f015a6c1c2dc6c6 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 15 Feb 2025 10:22:13 -0500 Subject: [PATCH 4/5] Ignore more reserved post types --- src/parser.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index 5fa53a6..cd5cda9 100644 --- a/src/parser.js +++ b/src/parser.js @@ -41,8 +41,13 @@ function getPostTypes(channelData) { 'nav_menu_item', 'custom_css', 'customize_changeset', + 'oembed_cache', + 'user_request', + 'wp_block', 'wp_global_styles', - 'wp_navigation' + 'wp_navigation', + 'wp_template', + 'wp_template_part' ].includes(type)); return [...new Set(types)]; // remove duplicates } From c546cd47caf19debce3721fbc0382b9aac977cfa Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 15 Feb 2025 10:23:18 -0500 Subject: [PATCH 5/5] Show slug fallback when writing --- src/shared.js | 6 +++++- src/writer.js | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/shared.js b/src/shared.js index 4e0ae75..38316af 100644 --- a/src/shared.js +++ b/src/shared.js @@ -7,6 +7,10 @@ export function camelCase(str) { return str.replace(/-(.)/g, (match) => match[1].toUpperCase()); } +export function getSlugWithFallback(post) { + return post.slug ? post.slug : 'id-' + post.id; +} + export function buildPostPath(post, overrideConfig) { const pathConfig = overrideConfig ?? config; @@ -35,7 +39,7 @@ export function buildPostPath(post, overrideConfig) { } // get slug with fallback - let slug = post.slug ? post.slug : 'id-' + post.id; + let slug = getSlugWithFallback(post); // prepend date to slug as appropriate if (pathConfig.prefixDate && post.date) { diff --git a/src/writer.js b/src/writer.js index 0c16f46..796a445 100644 --- a/src/writer.js +++ b/src/writer.js @@ -55,7 +55,7 @@ async function writeMarkdownFilesPromise(posts) { const payload = { item: post, type: post.type, - name: post.slug, + name: shared.getSlugWithFallback(post), destinationPath, delay };