From 1ad4e2dfdf23694334ec08e9708569cb6e959ce8 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Fri, 23 Feb 2024 10:24:40 -0500 Subject: [PATCH 01/14] Refactor for post data and frontmatter --- src/parser.js | 106 ++++++++++++++++++++++++++-------------------- src/translator.js | 4 +- 2 files changed, 63 insertions(+), 47 deletions(-) diff --git a/src/parser.js b/src/parser.js index 1451485..921ed00 100644 --- a/src/parser.js +++ b/src/parser.js @@ -9,32 +9,34 @@ const translator = require('./translator'); async function parseFilePromise(config) { console.log('\nParsing...'); const content = await fs.promises.readFile(config.input, 'utf8'); - const data = await xml2js.parseStringPromise(content, { + const allData = await xml2js.parseStringPromise(content, { trim: true, tagNameProcessors: [xml2js.processors.stripPrefix] }); + const channelData = allData.rss.channel[0].item; - const postTypes = getPostTypes(data, config); - const posts = collectPosts(data, postTypes, config); + const postTypes = getPostTypes(channelData, config); + const posts = collectPosts(channelData, postTypes, config); const images = []; if (config.saveAttachedImages) { - images.push(...collectAttachedImages(data)); + images.push(...collectAttachedImages(channelData)); } if (config.saveScrapedImages) { - images.push(...collectScrapedImages(data, postTypes)); + images.push(...collectScrapedImages(channelData, postTypes)); } mergeImagesIntoPosts(images, posts); + populateFrontmatter(posts); return posts; } -function getPostTypes(data, config) { +function getPostTypes(channelData, config) { if (config.includeOtherTypes) { // search export file for all post types minus some default types we don't want // effectively this will be 'post', 'page', and custom post types - const types = data.rss.channel[0].item + const types = channelData .map(item => item.post_type[0]) .filter(type => !['attachment', 'revision', 'nav_menu_item', 'custom_css', 'customize_changeset'].includes(type)); return [...new Set(types)]; // remove duplicates @@ -44,34 +46,31 @@ function getPostTypes(data, config) { } } -function getItemsOfType(data, type) { - return data.rss.channel[0].item.filter(item => item.post_type[0] === type); +function getItemsOfType(channelData, type) { + return channelData.filter(item => item.post_type[0] === type); } -function collectPosts(data, postTypes, config) { +function collectPosts(channelData, postTypes, config) { // this is passed into getPostContent() for the markdown conversion const turndownService = translator.initTurndownService(); let allPosts = []; postTypes.forEach(postType => { - const postsForType = getItemsOfType(data, postType) - .filter(post => post.status[0] !== 'trash' && post.status[0] !== 'draft') - .map(post => ({ + const postsForType = getItemsOfType(channelData, postType) + .filter(postData => postData.status[0] !== 'trash' && postData.status[0] !== 'draft') + .map(postData => ({ + data: postData, + // meta data isn't written to file, but is used to help with other things meta: { - id: getPostId(post), - slug: getPostSlug(post), - coverImageId: getPostCoverImageId(post), + id: getPostId(postData), + slug: getPostSlug(postData), + coverImageId: getPostCoverImageId(postData), + coverImage: undefined, // possibly set later in mergeImagesIntoPosts() type: postType, imageUrls: [] }, - frontmatter: { - title: getPostTitle(post), - date: getPostDate(post), - categories: getCategories(post), - tags: getTags(post) - }, - content: translator.getPostContent(post, turndownService, config) + content: translator.getPostContent(postData, turndownService, config) })); if (postTypes.length > 1) { @@ -87,30 +86,30 @@ function collectPosts(data, postTypes, config) { return allPosts; } -function getPostId(post) { - return post.post_id[0]; +function getPostId(postData) { + return postData.post_id[0]; } -function getPostSlug(post) { - return decodeURIComponent(post.post_name[0]); +function getPostSlug(postData) { + return decodeURIComponent(postData.post_name[0]); } -function getPostCoverImageId(post) { - if (post.postmeta === undefined) { +function getPostCoverImageId(postData) { + if (postData.postmeta === undefined) { return undefined; } - const postmeta = post.postmeta.find(postmeta => postmeta.meta_key[0] === '_thumbnail_id'); + const postmeta = postData.postmeta.find(postmeta => postmeta.meta_key[0] === '_thumbnail_id'); const id = postmeta ? postmeta.meta_value[0] : undefined; return id; } function getPostTitle(post) { - return post.title[0]; + return post.data.title[0]; } function getPostDate(post) { - const dateTime = luxon.DateTime.fromRFC2822(post.pubDate[0], { zone: 'utc' }); + const dateTime = luxon.DateTime.fromRFC2822(post.data.pubDate[0], { zone: 'utc' }); if (settings.custom_date_formatting) { return dateTime.toFormat(settings.custom_date_formatting); @@ -122,26 +121,30 @@ function getPostDate(post) { } function getCategories(post) { - const categories = processCategoryTags(post, 'category'); + const categories = processCategoryTags(post.data, 'category'); return categories.filter(category => !settings.filter_categories.includes(category)); } function getTags(post) { - return processCategoryTags(post, 'post_tag'); + return processCategoryTags(post.data, 'post_tag'); } -function processCategoryTags(post, domain) { - if (!post.category) { +function getCoverImage(post) { + return post.meta.coverImage; +} + +function processCategoryTags(postData, domain) { + if (!postData.category) { return []; } - return post.category + return postData.category .filter(category => category.$.domain === domain) .map(({ $: attributes }) => decodeURIComponent(attributes.nicename)); } -function collectAttachedImages(data) { - const images = getItemsOfType(data, 'attachment') +function collectAttachedImages(channelData) { + const images = getItemsOfType(channelData, 'attachment') // filter to certain image file types .filter(attachment => (/\.(gif|jpe?g|png)$/i).test(attachment.attachment_url[0])) .map(attachment => ({ @@ -154,13 +157,13 @@ function collectAttachedImages(data) { return images; } -function collectScrapedImages(data, postTypes) { +function collectScrapedImages(channelData, postTypes) { const images = []; postTypes.forEach(postType => { - getItemsOfType(data, postType).forEach(post => { - const postId = post.post_id[0]; - const postContent = post.encoded[0]; - const postLink = post.link[0]; + getItemsOfType(channelData, postType).forEach(postData => { + const postId = postData.post_id[0]; + const postContent = postData.encoded[0]; + const postLink = postData.link[0]; const matches = [...postContent.matchAll(/]*src="(.+?\.(?:gif|jpe?g|png))"[^>]*>/gi)]; matches.forEach(match => { @@ -192,7 +195,7 @@ function mergeImagesIntoPosts(images, posts) { // this image was set as the featured image for this post if (image.id === post.meta.coverImageId) { shouldAttach = true; - post.frontmatter.coverImage = shared.getFilenameFromUrl(image.url); + post.meta.coverImage = shared.getFilenameFromUrl(image.url); } if (shouldAttach && !post.meta.imageUrls.includes(image.url)) { @@ -202,4 +205,17 @@ function mergeImagesIntoPosts(images, posts) { }); } +function populateFrontmatter(posts) { + posts.forEach(post => { + console.log(post); + post.frontmatter = { + title: getPostTitle(post), + date: getPostDate(post), + categories: getCategories(post), + tags: getTags(post), + coverImage: getCoverImage(post) + } + }); +} + exports.parseFilePromise = parseFilePromise; diff --git a/src/translator.js b/src/translator.js index 7fa6348..2a24452 100644 --- a/src/translator.js +++ b/src/translator.js @@ -94,8 +94,8 @@ function initTurndownService() { return turndownService; } -function getPostContent(post, turndownService, config) { - let content = post.encoded[0]; +function getPostContent(postData, turndownService, config) { + let content = postData.encoded[0]; // insert an empty div element between double line breaks // this nifty trick causes turndown to keep adjacent paragraphs separated From 3a5ea10cb9c42e199d4c23b7cc217fe280c3b1c5 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Fri, 23 Feb 2024 12:53:58 -0500 Subject: [PATCH 02/14] Split out frontmatter getters --- src/frontmatter/categories.js | 13 ++++++++ src/frontmatter/coverImage.js | 3 ++ src/frontmatter/date.js | 15 +++++++++ src/frontmatter/tags.js | 13 ++++++++ src/frontmatter/title.js | 3 ++ src/parser.js | 60 ++++++++--------------------------- src/settings.js | 8 +++++ 7 files changed, 68 insertions(+), 47 deletions(-) create mode 100644 src/frontmatter/categories.js create mode 100644 src/frontmatter/coverImage.js create mode 100644 src/frontmatter/date.js create mode 100644 src/frontmatter/tags.js create mode 100644 src/frontmatter/title.js diff --git a/src/frontmatter/categories.js b/src/frontmatter/categories.js new file mode 100644 index 0000000..c324403 --- /dev/null +++ b/src/frontmatter/categories.js @@ -0,0 +1,13 @@ +const settings = require('../settings'); + +module.exports = (post) => { + if (!post.data.category) { + return []; + } + + const categories = post.data.category + .filter(category => category.$.domain === 'category') + .map(({ $: attributes }) => decodeURIComponent(attributes.nicename)); + + return categories.filter(category => !settings.filter_categories.includes(category)); +}; diff --git a/src/frontmatter/coverImage.js b/src/frontmatter/coverImage.js new file mode 100644 index 0000000..1808ea5 --- /dev/null +++ b/src/frontmatter/coverImage.js @@ -0,0 +1,3 @@ +module.exports = (post) => { + return post.meta.coverImage; +}; diff --git a/src/frontmatter/date.js b/src/frontmatter/date.js new file mode 100644 index 0000000..16292b7 --- /dev/null +++ b/src/frontmatter/date.js @@ -0,0 +1,15 @@ +const luxon = require('luxon'); + +const settings = require('../settings'); + +module.exports = (post) => { + const dateTime = luxon.DateTime.fromRFC2822(post.data.pubDate[0], { zone: 'utc' }); + + if (settings.custom_date_formatting) { + return dateTime.toFormat(settings.custom_date_formatting); + } else if (settings.include_time_with_date) { + return dateTime.toISO(); + } else { + return dateTime.toISODate(); + } +}; diff --git a/src/frontmatter/tags.js b/src/frontmatter/tags.js new file mode 100644 index 0000000..63d984f --- /dev/null +++ b/src/frontmatter/tags.js @@ -0,0 +1,13 @@ +const settings = require('../settings'); + +module.exports = (post) => { + if (!post.data.category) { + return []; + } + + const categories = post.data.category + .filter(category => category.$.domain === 'post_tag') + .map(({ $: attributes }) => decodeURIComponent(attributes.nicename)); + + return categories; +}; diff --git a/src/frontmatter/title.js b/src/frontmatter/title.js new file mode 100644 index 0000000..2b34f6c --- /dev/null +++ b/src/frontmatter/title.js @@ -0,0 +1,3 @@ +module.exports = (post) => { + return post.data.title[0]; +}; diff --git a/src/parser.js b/src/parser.js index 921ed00..d530e80 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,11 +1,17 @@ const fs = require('fs'); -const luxon = require('luxon'); const xml2js = require('xml2js'); const shared = require('./shared'); -const settings = require('./settings'); const translator = require('./translator'); +const frontmatter = { + title: require('./frontmatter/title'), + date: require('./frontmatter/date'), + categories: require('./frontmatter/categories'), + tags: require('./frontmatter/tags'), + coverImage: require('./frontmatter/coverImage'), +}; + async function parseFilePromise(config) { console.log('\nParsing...'); const content = await fs.promises.readFile(config.input, 'utf8'); @@ -104,45 +110,6 @@ function getPostCoverImageId(postData) { return id; } -function getPostTitle(post) { - return post.data.title[0]; -} - -function getPostDate(post) { - const dateTime = luxon.DateTime.fromRFC2822(post.data.pubDate[0], { zone: 'utc' }); - - if (settings.custom_date_formatting) { - return dateTime.toFormat(settings.custom_date_formatting); - } else if (settings.include_time_with_date) { - return dateTime.toISO(); - } else { - return dateTime.toISODate(); - } -} - -function getCategories(post) { - const categories = processCategoryTags(post.data, 'category'); - return categories.filter(category => !settings.filter_categories.includes(category)); -} - -function getTags(post) { - return processCategoryTags(post.data, 'post_tag'); -} - -function getCoverImage(post) { - return post.meta.coverImage; -} - -function processCategoryTags(postData, domain) { - if (!postData.category) { - return []; - } - - return postData.category - .filter(category => category.$.domain === domain) - .map(({ $: attributes }) => decodeURIComponent(attributes.nicename)); -} - function collectAttachedImages(channelData) { const images = getItemsOfType(channelData, 'attachment') // filter to certain image file types @@ -207,13 +174,12 @@ function mergeImagesIntoPosts(images, posts) { function populateFrontmatter(posts) { posts.forEach(post => { - console.log(post); post.frontmatter = { - title: getPostTitle(post), - date: getPostDate(post), - categories: getCategories(post), - tags: getTags(post), - coverImage: getCoverImage(post) + title: frontmatter.title(post), + date: frontmatter.date(post), + categories: frontmatter.categories(post), + tags: frontmatter.tags(post), + coverImage: frontmatter.coverImage(post) } }); } diff --git a/src/settings.js b/src/settings.js index bf4e206..57087ce 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1,3 +1,11 @@ +exports.frontmatter_fields = [ + 'title', + 'date', + 'categories', + 'tags', + 'coverImage' +]; + // time in ms to wait between requesting image files // increase this if you see timeouts or server errors exports.image_file_request_delay = 500; From 3e8dd285b2c89d1c401d45d4b16d4d64f4a7783f Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Fri, 23 Feb 2024 13:07:20 -0500 Subject: [PATCH 03/14] Fix for coverImage empty string --- src/writer.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/writer.js b/src/writer.js index 0346a0f..b782756 100644 --- a/src/writer.js +++ b/src/writer.js @@ -85,7 +85,9 @@ async function loadMarkdownFilePromise(post) { } else { // single string value const escapedValue = (value || '').replace(/"/g, '\\"'); - outputValue = `"${escapedValue}"`; + if (escapedValue.length > 0) { + outputValue = `"${escapedValue}"`; + } } if (outputValue !== undefined) { From 641106ea82096eb44686307d583c844a06cab896 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Fri, 23 Feb 2024 13:35:01 -0500 Subject: [PATCH 04/14] Dynamic frontmatter loading --- package-lock.json | 9 +++++++++ package.json | 1 + src/parser.js | 23 +++++++++-------------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0bd48fe..b9b4786 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,6 +17,7 @@ "luxon": "^3.4.4", "request": "^2.88.2", "request-promise-native": "^1.0.8", + "require-directory": "^2.1.1", "turndown": "^7.1.2", "turndown-plugin-gfm": "^1.0.2", "xml2js": "^0.6.2" @@ -1545,6 +1546,14 @@ "request": "^2.34" } }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", diff --git a/package.json b/package.json index 506b8f7..0a5440f 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "luxon": "^3.4.4", "request": "^2.88.2", "request-promise-native": "^1.0.8", + "require-directory": "^2.1.1", "turndown": "^7.1.2", "turndown-plugin-gfm": "^1.0.2", "xml2js": "^0.6.2" diff --git a/src/parser.js b/src/parser.js index d530e80..892a7c0 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,16 +1,13 @@ const fs = require('fs'); +const requireDirectory = require('require-directory'); const xml2js = require('xml2js'); const shared = require('./shared'); +const settings = require('./settings'); const translator = require('./translator'); -const frontmatter = { - title: require('./frontmatter/title'), - date: require('./frontmatter/date'), - categories: require('./frontmatter/categories'), - tags: require('./frontmatter/tags'), - coverImage: require('./frontmatter/coverImage'), -}; +// dynamically requires all frontmatter loaders +const frontmatterLoaders = requireDirectory(module, './frontmatter', { recurse: false }); async function parseFilePromise(config) { console.log('\nParsing...'); @@ -174,13 +171,11 @@ function mergeImagesIntoPosts(images, posts) { function populateFrontmatter(posts) { posts.forEach(post => { - post.frontmatter = { - title: frontmatter.title(post), - date: frontmatter.date(post), - categories: frontmatter.categories(post), - tags: frontmatter.tags(post), - coverImage: frontmatter.coverImage(post) - } + post.frontmatter = {}; + settings.frontmatter_fields.forEach(field => { + [key, alias] = field.split(':'); + post.frontmatter[alias || key] = frontmatterLoaders[key](post); + }); }); } From a83c472c932c01b68ebe60da485528c8e7b76a19 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Fri, 23 Feb 2024 14:23:41 -0500 Subject: [PATCH 05/14] Comments --- src/frontmatter/tags.js | 2 -- src/settings.js | 26 +++++++++++++++----------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/frontmatter/tags.js b/src/frontmatter/tags.js index 63d984f..1a44d98 100644 --- a/src/frontmatter/tags.js +++ b/src/frontmatter/tags.js @@ -1,5 +1,3 @@ -const settings = require('../settings'); - module.exports = (post) => { if (!post.data.category) { return []; diff --git a/src/settings.js b/src/settings.js index 57087ce..b6a21e7 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1,3 +1,7 @@ +// Which fields to include in frontmatter. Look in /src/frontmatter to see available fields. +// Order is preserved. If a field has an empty value, it will not be included. You can rename a +// field by providing an alias after a ':'. For example, 'date:created' will include 'date' in +// frontmatter, but renamed to 'created'. exports.frontmatter_fields = [ 'title', 'date', @@ -6,23 +10,23 @@ exports.frontmatter_fields = [ 'coverImage' ]; -// time in ms to wait between requesting image files -// increase this if you see timeouts or server errors +// Time in ms to wait between requesting image files. Increase this if you see timeouts or +// server errors. exports.image_file_request_delay = 500; -// time in ms to wait between saving Markdown files -// increase this if your file system becomes overloaded +// Time in ms to wait between saving Markdown files. Increase this if your file system becomes +// overloaded. exports.markdown_file_write_delay = 25; -// enable this to include time with post dates -// for example, "2020-12-25" would become "2020-12-25T11:20:35.000Z" +// Enable this to include time with post dates. For example, "2020-12-25" would become +// "2020-12-25T11:20:35.000Z". exports.include_time_with_date = false; -// override post date formatting with a custom formatting string (for example: 'yyyy LLL dd') -// tokens are documented here: https://moment.github.io/luxon/#/parsing?id=table-of-tokens -// if set, this takes precedence over include_time_with_date +// Override post date formatting with a custom formatting string (for example: 'yyyy LLL dd'). +// Tokens are documented here: https://moment.github.io/luxon/#/parsing?id=table-of-tokens. If +// set, this takes precedence over include_time_with_date. exports.custom_date_formatting = ''; -// categories to be excluded from post frontmatter -// this does not filter out posts themselves, just the categories listed in their frontmatter +// Categories to be excluded from post frontmatter. This does not filter out posts themselves, +// just the categories listed in their frontmatter. exports.filter_categories = ['uncategorized']; From b75827a055212d27c9c5a96c76c75fe3474e5232 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Fri, 23 Feb 2024 17:44:03 -0500 Subject: [PATCH 06/14] Mention frontmatter customization --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 127f8ea..e326453 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,6 @@ Some WordPress sites make use of a `"page"` post type and/or custom post types. ## Advanced Settings -You can edit [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js) to tweak advanced settings. This includes things like throttling image downloads or customizing the date format in frontmatter. +You can edit [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js) to tweak advanced settings. This includes things like customizing frontmatter fields and throttling image downloads. You'll need to run the script locally (not using `npx`) to make use of advanced settings. From 815e67f135f081331cc8dbf380f77a4268d3bd98 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 08:00:23 -0500 Subject: [PATCH 07/14] Frontmatter getters comments --- src/frontmatter/categories.js | 1 + src/frontmatter/coverImage.js | 2 ++ src/frontmatter/date.js | 1 + src/frontmatter/tags.js | 1 + src/frontmatter/title.js | 1 + src/parser.js | 6 +++--- 6 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/frontmatter/categories.js b/src/frontmatter/categories.js index c324403..82aad78 100644 --- a/src/frontmatter/categories.js +++ b/src/frontmatter/categories.js @@ -1,5 +1,6 @@ const settings = require('../settings'); +// get array of categories for post, filtered as specified in settings module.exports = (post) => { if (!post.data.category) { return []; diff --git a/src/frontmatter/coverImage.js b/src/frontmatter/coverImage.js index 1808ea5..e293312 100644 --- a/src/frontmatter/coverImage.js +++ b/src/frontmatter/coverImage.js @@ -1,3 +1,5 @@ +// get cover image filename, previously set on post.meta +// this one is unique as it relies on logic executed by the parser module.exports = (post) => { return post.meta.coverImage; }; diff --git a/src/frontmatter/date.js b/src/frontmatter/date.js index 16292b7..8b5c11f 100644 --- a/src/frontmatter/date.js +++ b/src/frontmatter/date.js @@ -2,6 +2,7 @@ const luxon = require('luxon'); const settings = require('../settings'); +// get post date, optionally formatted as specified in settings module.exports = (post) => { const dateTime = luxon.DateTime.fromRFC2822(post.data.pubDate[0], { zone: 'utc' }); diff --git a/src/frontmatter/tags.js b/src/frontmatter/tags.js index 1a44d98..9cb2c28 100644 --- a/src/frontmatter/tags.js +++ b/src/frontmatter/tags.js @@ -1,3 +1,4 @@ +// get array of tags for post module.exports = (post) => { if (!post.data.category) { return []; diff --git a/src/frontmatter/title.js b/src/frontmatter/title.js index 2b34f6c..d9bdb45 100644 --- a/src/frontmatter/title.js +++ b/src/frontmatter/title.js @@ -1,3 +1,4 @@ +// get simple post title module.exports = (post) => { return post.data.title[0]; }; diff --git a/src/parser.js b/src/parser.js index 892a7c0..cdec4ce 100644 --- a/src/parser.js +++ b/src/parser.js @@ -6,8 +6,8 @@ const shared = require('./shared'); const settings = require('./settings'); const translator = require('./translator'); -// dynamically requires all frontmatter loaders -const frontmatterLoaders = requireDirectory(module, './frontmatter', { recurse: false }); +// dynamically requires all frontmatter getters +const frontmatterGetters = requireDirectory(module, './frontmatter', { recurse: false }); async function parseFilePromise(config) { console.log('\nParsing...'); @@ -174,7 +174,7 @@ function populateFrontmatter(posts) { post.frontmatter = {}; settings.frontmatter_fields.forEach(field => { [key, alias] = field.split(':'); - post.frontmatter[alias || key] = frontmatterLoaders[key](post); + post.frontmatter[alias || key] = frontmatterGetters[key](post); }); }); } From f65317713bcb070da4beb334fe607dadc13b66e0 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 08:58:56 -0500 Subject: [PATCH 08/14] Contributing guidelines --- CONTRIBUTING.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..1d14dc2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# How to Contribute + +Contributions are welcome! Thank you! + +## General Guidelines + +Some quick notes when making a pull request. + +- Match the style and formatting of the code you are editing. +- Each pull request should be focused on a single thing (a single bug fix, a single feature, etc.). This makes reviewing easier and minimizes merge conflicts. +- Include a description of the problem being solved and what your code does. Steps to reproduce the problem or example input/output are very helpful. + +## Adding Options + +Keeping the wizard as short as possible is a priority. Pull requests that add options to the wizard will probably not be accepted. Instead, you can add an advanced setting to [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js). + +## Adding Frontmatter Fields + +Similarly, default frontmatter output is limited to just a few widely used fields to avoid bloat. However, you may add new optional frontmatter fields. + +To do so, add a new `.js` file into the [/src/frontmatter](https://github.com/lonekorean/wordpress-export-to-markdown/tree/master/src/frontmatter) folder. The filename should be the name of the frontmatter field, camelcased. See the other files there for code examples. + +Users will be able to include your new frontmatter field by editing `frontmatter_fields` in [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js). From 002027e478728a92afa174e5bd826ae47c2ad892 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 09:12:07 -0500 Subject: [PATCH 09/14] Nicer error when frontmatter getter missing --- src/parser.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index cdec4ce..48985f5 100644 --- a/src/parser.js +++ b/src/parser.js @@ -174,7 +174,13 @@ function populateFrontmatter(posts) { post.frontmatter = {}; settings.frontmatter_fields.forEach(field => { [key, alias] = field.split(':'); - post.frontmatter[alias || key] = frontmatterGetters[key](post); + + let frontmatterGetter = frontmatterGetters[key]; + if (!frontmatterGetter) { + throw `Could not find a frontmatter getter named "${key}".`; + } + + post.frontmatter[alias || key] = frontmatterGetter(post); }); }); } From bb05a062803188ab369214978653354355eef006 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 10:02:40 -0500 Subject: [PATCH 10/14] Example/debug frontmatter getter --- src/frontmatter/debug.js | 19 +++++++++++++++++++ src/parser.js | 5 +++-- 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 src/frontmatter/debug.js diff --git a/src/frontmatter/debug.js b/src/frontmatter/debug.js new file mode 100644 index 0000000..348cc38 --- /dev/null +++ b/src/frontmatter/debug.js @@ -0,0 +1,19 @@ +/* + 1. Copy this file, rename to the frontmatter field name you want, camelcased + 2. Edit frontmatter_fields in settings.js to include your new field name + 3. Run the script to see post data dumps, to see what you can work with + 4. Write your code to get and return what you want + 5. Update "get whatever" comment to describe what you're getting + 6. Remove your field name from the default frontmatter_fields in settings.js + 7. Remove this comment block and the console debug code + 8. Make that pull request! +*/ + +// get whatever +module.exports = (post) => { + console.log('\nBEGIN POST DATA DUMP ===========================================================\n'); + console.dir(post, { depth: null }); + console.log('\nEND POST DATA DUMP =============================================================\n'); + + return 'DEBUG: ' + post.data.title[0]; +}; diff --git a/src/parser.js b/src/parser.js index 48985f5..3b9717d 100644 --- a/src/parser.js +++ b/src/parser.js @@ -171,7 +171,7 @@ function mergeImagesIntoPosts(images, posts) { function populateFrontmatter(posts) { posts.forEach(post => { - post.frontmatter = {}; + const frontmatter = {}; settings.frontmatter_fields.forEach(field => { [key, alias] = field.split(':'); @@ -180,8 +180,9 @@ function populateFrontmatter(posts) { throw `Could not find a frontmatter getter named "${key}".`; } - post.frontmatter[alias || key] = frontmatterGetter(post); + frontmatter[alias || key] = frontmatterGetter(post); }); + post.frontmatter = frontmatter; }); } From a0a1224d88706b6524abebf2a1ddde2dea362245 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 14:16:28 -0500 Subject: [PATCH 11/14] Rename debug.js to example.js --- CONTRIBUTING.md | 2 +- src/frontmatter/{debug.js => example.js} | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename src/frontmatter/{debug.js => example.js} (79%) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1d14dc2..1f0c0ec 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,6 +18,6 @@ Keeping the wizard as short as possible is a priority. Pull requests that add op Similarly, default frontmatter output is limited to just a few widely used fields to avoid bloat. However, you may add new optional frontmatter fields. -To do so, add a new `.js` file into the [/src/frontmatter](https://github.com/lonekorean/wordpress-export-to-markdown/tree/master/src/frontmatter) folder. The filename should be the name of the frontmatter field, camelcased. See the other files there for code examples. +To do so, follow the instructions in [/src/frontmatter/example.js](https://github.com/lonekorean/wordpress-export-to-markdown/tree/master/src/frontmatterexample.js). Users will be able to include your new frontmatter field by editing `frontmatter_fields` in [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js). diff --git a/src/frontmatter/debug.js b/src/frontmatter/example.js similarity index 79% rename from src/frontmatter/debug.js rename to src/frontmatter/example.js index 348cc38..9ecaf8e 100644 --- a/src/frontmatter/debug.js +++ b/src/frontmatter/example.js @@ -4,8 +4,8 @@ 3. Run the script to see post data dumps, to see what you can work with 4. Write your code to get and return what you want 5. Update "get whatever" comment to describe what you're getting - 6. Remove your field name from the default frontmatter_fields in settings.js - 7. Remove this comment block and the console debug code + 6. Remove your field name from frontmatter_fields in settings.js + 7. Remove this comment block and the debug console code 8. Make that pull request! */ @@ -15,5 +15,5 @@ module.exports = (post) => { console.dir(post, { depth: null }); console.log('\nEND POST DATA DUMP =============================================================\n'); - return 'DEBUG: ' + post.data.title[0]; + return 'EXAMPLE: ' + post.data.title[0]; }; From b0bf7775069dfe1785dcbc3f3570293ec5acb260 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 14:25:05 -0500 Subject: [PATCH 12/14] Ugh, tabs vs spaces --- src/frontmatter/coverImage.js | 2 +- src/frontmatter/example.js | 24 ++++++++++++------------ src/frontmatter/title.js | 2 +- src/settings.js | 10 +++++----- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/frontmatter/coverImage.js b/src/frontmatter/coverImage.js index e293312..a3a6f29 100644 --- a/src/frontmatter/coverImage.js +++ b/src/frontmatter/coverImage.js @@ -1,5 +1,5 @@ // get cover image filename, previously set on post.meta // this one is unique as it relies on logic executed by the parser module.exports = (post) => { - return post.meta.coverImage; + return post.meta.coverImage; }; diff --git a/src/frontmatter/example.js b/src/frontmatter/example.js index 9ecaf8e..2159d87 100644 --- a/src/frontmatter/example.js +++ b/src/frontmatter/example.js @@ -1,19 +1,19 @@ /* - 1. Copy this file, rename to the frontmatter field name you want, camelcased - 2. Edit frontmatter_fields in settings.js to include your new field name - 3. Run the script to see post data dumps, to see what you can work with - 4. Write your code to get and return what you want - 5. Update "get whatever" comment to describe what you're getting - 6. Remove your field name from frontmatter_fields in settings.js - 7. Remove this comment block and the debug console code - 8. Make that pull request! + 1. Copy this file, rename to the frontmatter field name you want, camelcased + 2. Edit frontmatter_fields in settings.js to include your new field name + 3. Run the script to see post data dumps, to see what you can work with + 4. Write your code to get and return what you want + 5. Update "get whatever" comment to describe what you're getting + 6. Remove your field name from frontmatter_fields in settings.js + 7. Remove this comment block and the debug console code + 8. Make that pull request! */ // get whatever module.exports = (post) => { - console.log('\nBEGIN POST DATA DUMP ===========================================================\n'); - console.dir(post, { depth: null }); - console.log('\nEND POST DATA DUMP =============================================================\n'); + console.log('\nBEGIN POST DATA DUMP ===========================================================\n'); + console.dir(post, { depth: null }); + console.log('\nEND POST DATA DUMP =============================================================\n'); - return 'EXAMPLE: ' + post.data.title[0]; + return 'EXAMPLE: ' + post.data.title[0]; }; diff --git a/src/frontmatter/title.js b/src/frontmatter/title.js index d9bdb45..d33e515 100644 --- a/src/frontmatter/title.js +++ b/src/frontmatter/title.js @@ -1,4 +1,4 @@ // get simple post title module.exports = (post) => { - return post.data.title[0]; + return post.data.title[0]; }; diff --git a/src/settings.js b/src/settings.js index b6a21e7..2088ecd 100644 --- a/src/settings.js +++ b/src/settings.js @@ -3,11 +3,11 @@ // field by providing an alias after a ':'. For example, 'date:created' will include 'date' in // frontmatter, but renamed to 'created'. exports.frontmatter_fields = [ - 'title', - 'date', - 'categories', - 'tags', - 'coverImage' + 'title', + 'date', + 'categories', + 'tags', + 'coverImage' ]; // Time in ms to wait between requesting image files. Increase this if you see timeouts or From 924a9553e92e56893f8dc8b961b9655147e6b2c8 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 14:30:26 -0500 Subject: [PATCH 13/14] More post parsing comments --- src/parser.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index 3b9717d..e1dafa9 100644 --- a/src/parser.js +++ b/src/parser.js @@ -62,6 +62,7 @@ function collectPosts(channelData, postTypes, config) { const postsForType = getItemsOfType(channelData, postType) .filter(postData => postData.status[0] !== 'trash' && postData.status[0] !== 'draft') .map(postData => ({ + // raw post data, used by frontmatter getters data: postData, // meta data isn't written to file, but is used to help with other things @@ -71,8 +72,10 @@ function collectPosts(channelData, postTypes, config) { coverImageId: getPostCoverImageId(postData), coverImage: undefined, // possibly set later in mergeImagesIntoPosts() type: postType, - imageUrls: [] + imageUrls: [] // possibly set later in mergeImagesIntoPosts() }, + + // contents of the post in markdown content: translator.getPostContent(postData, turndownService, config) })); From 491ca0151f6f5957952b0821332371c2895f6f76 Mon Sep 17 00:00:00 2001 From: Will Boyd Date: Sat, 24 Feb 2024 14:36:26 -0500 Subject: [PATCH 14/14] Fix example.js link --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1f0c0ec..dbfa64b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,6 +18,6 @@ Keeping the wizard as short as possible is a priority. Pull requests that add op Similarly, default frontmatter output is limited to just a few widely used fields to avoid bloat. However, you may add new optional frontmatter fields. -To do so, follow the instructions in [/src/frontmatter/example.js](https://github.com/lonekorean/wordpress-export-to-markdown/tree/master/src/frontmatterexample.js). +To do so, follow the instructions in [/src/frontmatter/example.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/frontmatter/example.js). Users will be able to include your new frontmatter field by editing `frontmatter_fields` in [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js).