mirror of
https://github.com/10h30/wordpress-export-to-markdown.git
synced 2026-06-05 15:09:59 +09:00
Merge pull request #110 from lonekorean/customizable-frontmatter
Customizable frontmatter
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
# How to Contribute
|
||||
|
||||
Contributions are welcome! Thank you!
|
||||
|
||||
## General Guidelines
|
||||
|
||||
Some quick notes when making a pull request.
|
||||
|
||||
- Match the style and formatting of the code you are editing.
|
||||
- Each pull request should be focused on a single thing (a single bug fix, a single feature, etc.). This makes reviewing easier and minimizes merge conflicts.
|
||||
- Include a description of the problem being solved and what your code does. Steps to reproduce the problem or example input/output are very helpful.
|
||||
|
||||
## Adding Options
|
||||
|
||||
Keeping the wizard as short as possible is a priority. Pull requests that add options to the wizard will probably not be accepted. Instead, you can add an advanced setting to [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js).
|
||||
|
||||
## Adding Frontmatter Fields
|
||||
|
||||
Similarly, default frontmatter output is limited to just a few widely used fields to avoid bloat. However, you may add new optional frontmatter fields.
|
||||
|
||||
To do so, follow the instructions in [/src/frontmatter/example.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/frontmatter/example.js).
|
||||
|
||||
Users will be able to include your new frontmatter field by editing `frontmatter_fields` in [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js).
|
||||
@@ -165,6 +165,6 @@ Some WordPress sites make use of a `"page"` post type and/or custom post types.
|
||||
|
||||
## Advanced Settings
|
||||
|
||||
You can edit [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js) to tweak advanced settings. This includes things like throttling image downloads or customizing the date format in frontmatter.
|
||||
You can edit [settings.js](https://github.com/lonekorean/wordpress-export-to-markdown/blob/master/src/settings.js) to tweak advanced settings. This includes things like customizing frontmatter fields and throttling image downloads.
|
||||
|
||||
You'll need to run the script locally (not using `npx`) to make use of advanced settings.
|
||||
|
||||
Generated
+9
@@ -17,6 +17,7 @@
|
||||
"luxon": "^3.4.4",
|
||||
"request": "^2.88.2",
|
||||
"request-promise-native": "^1.0.8",
|
||||
"require-directory": "^2.1.1",
|
||||
"turndown": "^7.1.2",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"xml2js": "^0.6.2"
|
||||
@@ -1545,6 +1546,14 @@
|
||||
"request": "^2.34"
|
||||
}
|
||||
},
|
||||
"node_modules/require-directory": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
||||
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/resolve-from": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
"luxon": "^3.4.4",
|
||||
"request": "^2.88.2",
|
||||
"request-promise-native": "^1.0.8",
|
||||
"require-directory": "^2.1.1",
|
||||
"turndown": "^7.1.2",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"xml2js": "^0.6.2"
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
const settings = require('../settings');
|
||||
|
||||
// get array of categories for post, filtered as specified in settings
|
||||
module.exports = (post) => {
|
||||
if (!post.data.category) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const categories = post.data.category
|
||||
.filter(category => category.$.domain === 'category')
|
||||
.map(({ $: attributes }) => decodeURIComponent(attributes.nicename));
|
||||
|
||||
return categories.filter(category => !settings.filter_categories.includes(category));
|
||||
};
|
||||
@@ -0,0 +1,5 @@
|
||||
// get cover image filename, previously set on post.meta
|
||||
// this one is unique as it relies on logic executed by the parser
|
||||
module.exports = (post) => {
|
||||
return post.meta.coverImage;
|
||||
};
|
||||
@@ -0,0 +1,16 @@
|
||||
const luxon = require('luxon');
|
||||
|
||||
const settings = require('../settings');
|
||||
|
||||
// get post date, optionally formatted as specified in settings
|
||||
module.exports = (post) => {
|
||||
const dateTime = luxon.DateTime.fromRFC2822(post.data.pubDate[0], { zone: 'utc' });
|
||||
|
||||
if (settings.custom_date_formatting) {
|
||||
return dateTime.toFormat(settings.custom_date_formatting);
|
||||
} else if (settings.include_time_with_date) {
|
||||
return dateTime.toISO();
|
||||
} else {
|
||||
return dateTime.toISODate();
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
1. Copy this file, rename to the frontmatter field name you want, camelcased
|
||||
2. Edit frontmatter_fields in settings.js to include your new field name
|
||||
3. Run the script to see post data dumps, to see what you can work with
|
||||
4. Write your code to get and return what you want
|
||||
5. Update "get whatever" comment to describe what you're getting
|
||||
6. Remove your field name from frontmatter_fields in settings.js
|
||||
7. Remove this comment block and the debug console code
|
||||
8. Make that pull request!
|
||||
*/
|
||||
|
||||
// get whatever
|
||||
module.exports = (post) => {
|
||||
console.log('\nBEGIN POST DATA DUMP ===========================================================\n');
|
||||
console.dir(post, { depth: null });
|
||||
console.log('\nEND POST DATA DUMP =============================================================\n');
|
||||
|
||||
return 'EXAMPLE: ' + post.data.title[0];
|
||||
};
|
||||
@@ -0,0 +1,12 @@
|
||||
// get array of tags for post
|
||||
module.exports = (post) => {
|
||||
if (!post.data.category) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const categories = post.data.category
|
||||
.filter(category => category.$.domain === 'post_tag')
|
||||
.map(({ $: attributes }) => decodeURIComponent(attributes.nicename));
|
||||
|
||||
return categories;
|
||||
};
|
||||
@@ -0,0 +1,4 @@
|
||||
// get simple post title
|
||||
module.exports = (post) => {
|
||||
return post.data.title[0];
|
||||
};
|
||||
+62
-75
@@ -1,40 +1,45 @@
|
||||
const fs = require('fs');
|
||||
const luxon = require('luxon');
|
||||
const requireDirectory = require('require-directory');
|
||||
const xml2js = require('xml2js');
|
||||
|
||||
const shared = require('./shared');
|
||||
const settings = require('./settings');
|
||||
const translator = require('./translator');
|
||||
|
||||
// dynamically requires all frontmatter getters
|
||||
const frontmatterGetters = requireDirectory(module, './frontmatter', { recurse: false });
|
||||
|
||||
async function parseFilePromise(config) {
|
||||
console.log('\nParsing...');
|
||||
const content = await fs.promises.readFile(config.input, 'utf8');
|
||||
const data = await xml2js.parseStringPromise(content, {
|
||||
const allData = await xml2js.parseStringPromise(content, {
|
||||
trim: true,
|
||||
tagNameProcessors: [xml2js.processors.stripPrefix]
|
||||
});
|
||||
const channelData = allData.rss.channel[0].item;
|
||||
|
||||
const postTypes = getPostTypes(data, config);
|
||||
const posts = collectPosts(data, postTypes, config);
|
||||
const postTypes = getPostTypes(channelData, config);
|
||||
const posts = collectPosts(channelData, postTypes, config);
|
||||
|
||||
const images = [];
|
||||
if (config.saveAttachedImages) {
|
||||
images.push(...collectAttachedImages(data));
|
||||
images.push(...collectAttachedImages(channelData));
|
||||
}
|
||||
if (config.saveScrapedImages) {
|
||||
images.push(...collectScrapedImages(data, postTypes));
|
||||
images.push(...collectScrapedImages(channelData, postTypes));
|
||||
}
|
||||
|
||||
mergeImagesIntoPosts(images, posts);
|
||||
populateFrontmatter(posts);
|
||||
|
||||
return posts;
|
||||
}
|
||||
|
||||
function getPostTypes(data, config) {
|
||||
function getPostTypes(channelData, config) {
|
||||
if (config.includeOtherTypes) {
|
||||
// search export file for all post types minus some default types we don't want
|
||||
// effectively this will be 'post', 'page', and custom post types
|
||||
const types = data.rss.channel[0].item
|
||||
const types = channelData
|
||||
.map(item => item.post_type[0])
|
||||
.filter(type => !['attachment', 'revision', 'nav_menu_item', 'custom_css', 'customize_changeset'].includes(type));
|
||||
return [...new Set(types)]; // remove duplicates
|
||||
@@ -44,34 +49,34 @@ function getPostTypes(data, config) {
|
||||
}
|
||||
}
|
||||
|
||||
function getItemsOfType(data, type) {
|
||||
return data.rss.channel[0].item.filter(item => item.post_type[0] === type);
|
||||
function getItemsOfType(channelData, type) {
|
||||
return channelData.filter(item => item.post_type[0] === type);
|
||||
}
|
||||
|
||||
function collectPosts(data, postTypes, config) {
|
||||
function collectPosts(channelData, postTypes, config) {
|
||||
// this is passed into getPostContent() for the markdown conversion
|
||||
const turndownService = translator.initTurndownService();
|
||||
|
||||
let allPosts = [];
|
||||
postTypes.forEach(postType => {
|
||||
const postsForType = getItemsOfType(data, postType)
|
||||
.filter(post => post.status[0] !== 'trash' && post.status[0] !== 'draft')
|
||||
.map(post => ({
|
||||
const postsForType = getItemsOfType(channelData, postType)
|
||||
.filter(postData => postData.status[0] !== 'trash' && postData.status[0] !== 'draft')
|
||||
.map(postData => ({
|
||||
// raw post data, used by frontmatter getters
|
||||
data: postData,
|
||||
|
||||
// meta data isn't written to file, but is used to help with other things
|
||||
meta: {
|
||||
id: getPostId(post),
|
||||
slug: getPostSlug(post),
|
||||
coverImageId: getPostCoverImageId(post),
|
||||
id: getPostId(postData),
|
||||
slug: getPostSlug(postData),
|
||||
coverImageId: getPostCoverImageId(postData),
|
||||
coverImage: undefined, // possibly set later in mergeImagesIntoPosts()
|
||||
type: postType,
|
||||
imageUrls: []
|
||||
imageUrls: [] // possibly set later in mergeImagesIntoPosts()
|
||||
},
|
||||
frontmatter: {
|
||||
title: getPostTitle(post),
|
||||
date: getPostDate(post),
|
||||
categories: getCategories(post),
|
||||
tags: getTags(post)
|
||||
},
|
||||
content: translator.getPostContent(post, turndownService, config)
|
||||
|
||||
// contents of the post in markdown
|
||||
content: translator.getPostContent(postData, turndownService, config)
|
||||
}));
|
||||
|
||||
if (postTypes.length > 1) {
|
||||
@@ -87,61 +92,26 @@ function collectPosts(data, postTypes, config) {
|
||||
return allPosts;
|
||||
}
|
||||
|
||||
function getPostId(post) {
|
||||
return post.post_id[0];
|
||||
function getPostId(postData) {
|
||||
return postData.post_id[0];
|
||||
}
|
||||
|
||||
function getPostSlug(post) {
|
||||
return decodeURIComponent(post.post_name[0]);
|
||||
function getPostSlug(postData) {
|
||||
return decodeURIComponent(postData.post_name[0]);
|
||||
}
|
||||
|
||||
function getPostCoverImageId(post) {
|
||||
if (post.postmeta === undefined) {
|
||||
function getPostCoverImageId(postData) {
|
||||
if (postData.postmeta === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const postmeta = post.postmeta.find(postmeta => postmeta.meta_key[0] === '_thumbnail_id');
|
||||
const postmeta = postData.postmeta.find(postmeta => postmeta.meta_key[0] === '_thumbnail_id');
|
||||
const id = postmeta ? postmeta.meta_value[0] : undefined;
|
||||
return id;
|
||||
}
|
||||
|
||||
function getPostTitle(post) {
|
||||
return post.title[0];
|
||||
}
|
||||
|
||||
function getPostDate(post) {
|
||||
const dateTime = luxon.DateTime.fromRFC2822(post.pubDate[0], { zone: 'utc' });
|
||||
|
||||
if (settings.custom_date_formatting) {
|
||||
return dateTime.toFormat(settings.custom_date_formatting);
|
||||
} else if (settings.include_time_with_date) {
|
||||
return dateTime.toISO();
|
||||
} else {
|
||||
return dateTime.toISODate();
|
||||
}
|
||||
}
|
||||
|
||||
function getCategories(post) {
|
||||
const categories = processCategoryTags(post, 'category');
|
||||
return categories.filter(category => !settings.filter_categories.includes(category));
|
||||
}
|
||||
|
||||
function getTags(post) {
|
||||
return processCategoryTags(post, 'post_tag');
|
||||
}
|
||||
|
||||
function processCategoryTags(post, domain) {
|
||||
if (!post.category) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return post.category
|
||||
.filter(category => category.$.domain === domain)
|
||||
.map(({ $: attributes }) => decodeURIComponent(attributes.nicename));
|
||||
}
|
||||
|
||||
function collectAttachedImages(data) {
|
||||
const images = getItemsOfType(data, 'attachment')
|
||||
function collectAttachedImages(channelData) {
|
||||
const images = getItemsOfType(channelData, 'attachment')
|
||||
// filter to certain image file types
|
||||
.filter(attachment => (/\.(gif|jpe?g|png)$/i).test(attachment.attachment_url[0]))
|
||||
.map(attachment => ({
|
||||
@@ -154,13 +124,13 @@ function collectAttachedImages(data) {
|
||||
return images;
|
||||
}
|
||||
|
||||
function collectScrapedImages(data, postTypes) {
|
||||
function collectScrapedImages(channelData, postTypes) {
|
||||
const images = [];
|
||||
postTypes.forEach(postType => {
|
||||
getItemsOfType(data, postType).forEach(post => {
|
||||
const postId = post.post_id[0];
|
||||
const postContent = post.encoded[0];
|
||||
const postLink = post.link[0];
|
||||
getItemsOfType(channelData, postType).forEach(postData => {
|
||||
const postId = postData.post_id[0];
|
||||
const postContent = postData.encoded[0];
|
||||
const postLink = postData.link[0];
|
||||
|
||||
const matches = [...postContent.matchAll(/<img[^>]*src="(.+?\.(?:gif|jpe?g|png))"[^>]*>/gi)];
|
||||
matches.forEach(match => {
|
||||
@@ -192,7 +162,7 @@ function mergeImagesIntoPosts(images, posts) {
|
||||
// this image was set as the featured image for this post
|
||||
if (image.id === post.meta.coverImageId) {
|
||||
shouldAttach = true;
|
||||
post.frontmatter.coverImage = shared.getFilenameFromUrl(image.url);
|
||||
post.meta.coverImage = shared.getFilenameFromUrl(image.url);
|
||||
}
|
||||
|
||||
if (shouldAttach && !post.meta.imageUrls.includes(image.url)) {
|
||||
@@ -202,4 +172,21 @@ function mergeImagesIntoPosts(images, posts) {
|
||||
});
|
||||
}
|
||||
|
||||
function populateFrontmatter(posts) {
|
||||
posts.forEach(post => {
|
||||
const frontmatter = {};
|
||||
settings.frontmatter_fields.forEach(field => {
|
||||
[key, alias] = field.split(':');
|
||||
|
||||
let frontmatterGetter = frontmatterGetters[key];
|
||||
if (!frontmatterGetter) {
|
||||
throw `Could not find a frontmatter getter named "${key}".`;
|
||||
}
|
||||
|
||||
frontmatter[alias || key] = frontmatterGetter(post);
|
||||
});
|
||||
post.frontmatter = frontmatter;
|
||||
});
|
||||
}
|
||||
|
||||
exports.parseFilePromise = parseFilePromise;
|
||||
|
||||
+23
-11
@@ -1,20 +1,32 @@
|
||||
// time in ms to wait between requesting image files
|
||||
// increase this if you see timeouts or server errors
|
||||
// Which fields to include in frontmatter. Look in /src/frontmatter to see available fields.
|
||||
// Order is preserved. If a field has an empty value, it will not be included. You can rename a
|
||||
// field by providing an alias after a ':'. For example, 'date:created' will include 'date' in
|
||||
// frontmatter, but renamed to 'created'.
|
||||
exports.frontmatter_fields = [
|
||||
'title',
|
||||
'date',
|
||||
'categories',
|
||||
'tags',
|
||||
'coverImage'
|
||||
];
|
||||
|
||||
// Time in ms to wait between requesting image files. Increase this if you see timeouts or
|
||||
// server errors.
|
||||
exports.image_file_request_delay = 500;
|
||||
|
||||
// time in ms to wait between saving Markdown files
|
||||
// increase this if your file system becomes overloaded
|
||||
// Time in ms to wait between saving Markdown files. Increase this if your file system becomes
|
||||
// overloaded.
|
||||
exports.markdown_file_write_delay = 25;
|
||||
|
||||
// enable this to include time with post dates
|
||||
// for example, "2020-12-25" would become "2020-12-25T11:20:35.000Z"
|
||||
// Enable this to include time with post dates. For example, "2020-12-25" would become
|
||||
// "2020-12-25T11:20:35.000Z".
|
||||
exports.include_time_with_date = false;
|
||||
|
||||
// override post date formatting with a custom formatting string (for example: 'yyyy LLL dd')
|
||||
// tokens are documented here: https://moment.github.io/luxon/#/parsing?id=table-of-tokens
|
||||
// if set, this takes precedence over include_time_with_date
|
||||
// Override post date formatting with a custom formatting string (for example: 'yyyy LLL dd').
|
||||
// Tokens are documented here: https://moment.github.io/luxon/#/parsing?id=table-of-tokens. If
|
||||
// set, this takes precedence over include_time_with_date.
|
||||
exports.custom_date_formatting = '';
|
||||
|
||||
// categories to be excluded from post frontmatter
|
||||
// this does not filter out posts themselves, just the categories listed in their frontmatter
|
||||
// Categories to be excluded from post frontmatter. This does not filter out posts themselves,
|
||||
// just the categories listed in their frontmatter.
|
||||
exports.filter_categories = ['uncategorized'];
|
||||
|
||||
+2
-2
@@ -94,8 +94,8 @@ function initTurndownService() {
|
||||
return turndownService;
|
||||
}
|
||||
|
||||
function getPostContent(post, turndownService, config) {
|
||||
let content = post.encoded[0];
|
||||
function getPostContent(postData, turndownService, config) {
|
||||
let content = postData.encoded[0];
|
||||
|
||||
// insert an empty div element between double line breaks
|
||||
// this nifty trick causes turndown to keep adjacent paragraphs separated
|
||||
|
||||
+3
-1
@@ -85,7 +85,9 @@ async function loadMarkdownFilePromise(post) {
|
||||
} else {
|
||||
// single string value
|
||||
const escapedValue = (value || '').replace(/"/g, '\\"');
|
||||
outputValue = `"${escapedValue}"`;
|
||||
if (escapedValue.length > 0) {
|
||||
outputValue = `"${escapedValue}"`;
|
||||
}
|
||||
}
|
||||
|
||||
if (outputValue !== undefined) {
|
||||
|
||||
Reference in New Issue
Block a user