mirror of
https://github.com/10h30/wordpress-export-to-markdown.git
synced 2026-06-05 15:09:59 +09:00
Refactor for post data and frontmatter
This commit is contained in:
+61
-45
@@ -9,32 +9,34 @@ const translator = require('./translator');
|
|||||||
async function parseFilePromise(config) {
|
async function parseFilePromise(config) {
|
||||||
console.log('\nParsing...');
|
console.log('\nParsing...');
|
||||||
const content = await fs.promises.readFile(config.input, 'utf8');
|
const content = await fs.promises.readFile(config.input, 'utf8');
|
||||||
const data = await xml2js.parseStringPromise(content, {
|
const allData = await xml2js.parseStringPromise(content, {
|
||||||
trim: true,
|
trim: true,
|
||||||
tagNameProcessors: [xml2js.processors.stripPrefix]
|
tagNameProcessors: [xml2js.processors.stripPrefix]
|
||||||
});
|
});
|
||||||
|
const channelData = allData.rss.channel[0].item;
|
||||||
|
|
||||||
const postTypes = getPostTypes(data, config);
|
const postTypes = getPostTypes(channelData, config);
|
||||||
const posts = collectPosts(data, postTypes, config);
|
const posts = collectPosts(channelData, postTypes, config);
|
||||||
|
|
||||||
const images = [];
|
const images = [];
|
||||||
if (config.saveAttachedImages) {
|
if (config.saveAttachedImages) {
|
||||||
images.push(...collectAttachedImages(data));
|
images.push(...collectAttachedImages(channelData));
|
||||||
}
|
}
|
||||||
if (config.saveScrapedImages) {
|
if (config.saveScrapedImages) {
|
||||||
images.push(...collectScrapedImages(data, postTypes));
|
images.push(...collectScrapedImages(channelData, postTypes));
|
||||||
}
|
}
|
||||||
|
|
||||||
mergeImagesIntoPosts(images, posts);
|
mergeImagesIntoPosts(images, posts);
|
||||||
|
populateFrontmatter(posts);
|
||||||
|
|
||||||
return posts;
|
return posts;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPostTypes(data, config) {
|
function getPostTypes(channelData, config) {
|
||||||
if (config.includeOtherTypes) {
|
if (config.includeOtherTypes) {
|
||||||
// search export file for all post types minus some default types we don't want
|
// search export file for all post types minus some default types we don't want
|
||||||
// effectively this will be 'post', 'page', and custom post types
|
// effectively this will be 'post', 'page', and custom post types
|
||||||
const types = data.rss.channel[0].item
|
const types = channelData
|
||||||
.map(item => item.post_type[0])
|
.map(item => item.post_type[0])
|
||||||
.filter(type => !['attachment', 'revision', 'nav_menu_item', 'custom_css', 'customize_changeset'].includes(type));
|
.filter(type => !['attachment', 'revision', 'nav_menu_item', 'custom_css', 'customize_changeset'].includes(type));
|
||||||
return [...new Set(types)]; // remove duplicates
|
return [...new Set(types)]; // remove duplicates
|
||||||
@@ -44,34 +46,31 @@ function getPostTypes(data, config) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function getItemsOfType(data, type) {
|
function getItemsOfType(channelData, type) {
|
||||||
return data.rss.channel[0].item.filter(item => item.post_type[0] === type);
|
return channelData.filter(item => item.post_type[0] === type);
|
||||||
}
|
}
|
||||||
|
|
||||||
function collectPosts(data, postTypes, config) {
|
function collectPosts(channelData, postTypes, config) {
|
||||||
// this is passed into getPostContent() for the markdown conversion
|
// this is passed into getPostContent() for the markdown conversion
|
||||||
const turndownService = translator.initTurndownService();
|
const turndownService = translator.initTurndownService();
|
||||||
|
|
||||||
let allPosts = [];
|
let allPosts = [];
|
||||||
postTypes.forEach(postType => {
|
postTypes.forEach(postType => {
|
||||||
const postsForType = getItemsOfType(data, postType)
|
const postsForType = getItemsOfType(channelData, postType)
|
||||||
.filter(post => post.status[0] !== 'trash' && post.status[0] !== 'draft')
|
.filter(postData => postData.status[0] !== 'trash' && postData.status[0] !== 'draft')
|
||||||
.map(post => ({
|
.map(postData => ({
|
||||||
|
data: postData,
|
||||||
|
|
||||||
// meta data isn't written to file, but is used to help with other things
|
// meta data isn't written to file, but is used to help with other things
|
||||||
meta: {
|
meta: {
|
||||||
id: getPostId(post),
|
id: getPostId(postData),
|
||||||
slug: getPostSlug(post),
|
slug: getPostSlug(postData),
|
||||||
coverImageId: getPostCoverImageId(post),
|
coverImageId: getPostCoverImageId(postData),
|
||||||
|
coverImage: undefined, // possibly set later in mergeImagesIntoPosts()
|
||||||
type: postType,
|
type: postType,
|
||||||
imageUrls: []
|
imageUrls: []
|
||||||
},
|
},
|
||||||
frontmatter: {
|
content: translator.getPostContent(postData, turndownService, config)
|
||||||
title: getPostTitle(post),
|
|
||||||
date: getPostDate(post),
|
|
||||||
categories: getCategories(post),
|
|
||||||
tags: getTags(post)
|
|
||||||
},
|
|
||||||
content: translator.getPostContent(post, turndownService, config)
|
|
||||||
}));
|
}));
|
||||||
|
|
||||||
if (postTypes.length > 1) {
|
if (postTypes.length > 1) {
|
||||||
@@ -87,30 +86,30 @@ function collectPosts(data, postTypes, config) {
|
|||||||
return allPosts;
|
return allPosts;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPostId(post) {
|
function getPostId(postData) {
|
||||||
return post.post_id[0];
|
return postData.post_id[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPostSlug(post) {
|
function getPostSlug(postData) {
|
||||||
return decodeURIComponent(post.post_name[0]);
|
return decodeURIComponent(postData.post_name[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPostCoverImageId(post) {
|
function getPostCoverImageId(postData) {
|
||||||
if (post.postmeta === undefined) {
|
if (postData.postmeta === undefined) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
const postmeta = post.postmeta.find(postmeta => postmeta.meta_key[0] === '_thumbnail_id');
|
const postmeta = postData.postmeta.find(postmeta => postmeta.meta_key[0] === '_thumbnail_id');
|
||||||
const id = postmeta ? postmeta.meta_value[0] : undefined;
|
const id = postmeta ? postmeta.meta_value[0] : undefined;
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPostTitle(post) {
|
function getPostTitle(post) {
|
||||||
return post.title[0];
|
return post.data.title[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPostDate(post) {
|
function getPostDate(post) {
|
||||||
const dateTime = luxon.DateTime.fromRFC2822(post.pubDate[0], { zone: 'utc' });
|
const dateTime = luxon.DateTime.fromRFC2822(post.data.pubDate[0], { zone: 'utc' });
|
||||||
|
|
||||||
if (settings.custom_date_formatting) {
|
if (settings.custom_date_formatting) {
|
||||||
return dateTime.toFormat(settings.custom_date_formatting);
|
return dateTime.toFormat(settings.custom_date_formatting);
|
||||||
@@ -122,26 +121,30 @@ function getPostDate(post) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function getCategories(post) {
|
function getCategories(post) {
|
||||||
const categories = processCategoryTags(post, 'category');
|
const categories = processCategoryTags(post.data, 'category');
|
||||||
return categories.filter(category => !settings.filter_categories.includes(category));
|
return categories.filter(category => !settings.filter_categories.includes(category));
|
||||||
}
|
}
|
||||||
|
|
||||||
function getTags(post) {
|
function getTags(post) {
|
||||||
return processCategoryTags(post, 'post_tag');
|
return processCategoryTags(post.data, 'post_tag');
|
||||||
}
|
}
|
||||||
|
|
||||||
function processCategoryTags(post, domain) {
|
function getCoverImage(post) {
|
||||||
if (!post.category) {
|
return post.meta.coverImage;
|
||||||
|
}
|
||||||
|
|
||||||
|
function processCategoryTags(postData, domain) {
|
||||||
|
if (!postData.category) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
return post.category
|
return postData.category
|
||||||
.filter(category => category.$.domain === domain)
|
.filter(category => category.$.domain === domain)
|
||||||
.map(({ $: attributes }) => decodeURIComponent(attributes.nicename));
|
.map(({ $: attributes }) => decodeURIComponent(attributes.nicename));
|
||||||
}
|
}
|
||||||
|
|
||||||
function collectAttachedImages(data) {
|
function collectAttachedImages(channelData) {
|
||||||
const images = getItemsOfType(data, 'attachment')
|
const images = getItemsOfType(channelData, 'attachment')
|
||||||
// filter to certain image file types
|
// filter to certain image file types
|
||||||
.filter(attachment => (/\.(gif|jpe?g|png)$/i).test(attachment.attachment_url[0]))
|
.filter(attachment => (/\.(gif|jpe?g|png)$/i).test(attachment.attachment_url[0]))
|
||||||
.map(attachment => ({
|
.map(attachment => ({
|
||||||
@@ -154,13 +157,13 @@ function collectAttachedImages(data) {
|
|||||||
return images;
|
return images;
|
||||||
}
|
}
|
||||||
|
|
||||||
function collectScrapedImages(data, postTypes) {
|
function collectScrapedImages(channelData, postTypes) {
|
||||||
const images = [];
|
const images = [];
|
||||||
postTypes.forEach(postType => {
|
postTypes.forEach(postType => {
|
||||||
getItemsOfType(data, postType).forEach(post => {
|
getItemsOfType(channelData, postType).forEach(postData => {
|
||||||
const postId = post.post_id[0];
|
const postId = postData.post_id[0];
|
||||||
const postContent = post.encoded[0];
|
const postContent = postData.encoded[0];
|
||||||
const postLink = post.link[0];
|
const postLink = postData.link[0];
|
||||||
|
|
||||||
const matches = [...postContent.matchAll(/<img[^>]*src="(.+?\.(?:gif|jpe?g|png))"[^>]*>/gi)];
|
const matches = [...postContent.matchAll(/<img[^>]*src="(.+?\.(?:gif|jpe?g|png))"[^>]*>/gi)];
|
||||||
matches.forEach(match => {
|
matches.forEach(match => {
|
||||||
@@ -192,7 +195,7 @@ function mergeImagesIntoPosts(images, posts) {
|
|||||||
// this image was set as the featured image for this post
|
// this image was set as the featured image for this post
|
||||||
if (image.id === post.meta.coverImageId) {
|
if (image.id === post.meta.coverImageId) {
|
||||||
shouldAttach = true;
|
shouldAttach = true;
|
||||||
post.frontmatter.coverImage = shared.getFilenameFromUrl(image.url);
|
post.meta.coverImage = shared.getFilenameFromUrl(image.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shouldAttach && !post.meta.imageUrls.includes(image.url)) {
|
if (shouldAttach && !post.meta.imageUrls.includes(image.url)) {
|
||||||
@@ -202,4 +205,17 @@ function mergeImagesIntoPosts(images, posts) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function populateFrontmatter(posts) {
|
||||||
|
posts.forEach(post => {
|
||||||
|
console.log(post);
|
||||||
|
post.frontmatter = {
|
||||||
|
title: getPostTitle(post),
|
||||||
|
date: getPostDate(post),
|
||||||
|
categories: getCategories(post),
|
||||||
|
tags: getTags(post),
|
||||||
|
coverImage: getCoverImage(post)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
exports.parseFilePromise = parseFilePromise;
|
exports.parseFilePromise = parseFilePromise;
|
||||||
|
|||||||
+2
-2
@@ -94,8 +94,8 @@ function initTurndownService() {
|
|||||||
return turndownService;
|
return turndownService;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPostContent(post, turndownService, config) {
|
function getPostContent(postData, turndownService, config) {
|
||||||
let content = post.encoded[0];
|
let content = postData.encoded[0];
|
||||||
|
|
||||||
// insert an empty div element between double line breaks
|
// insert an empty div element between double line breaks
|
||||||
// this nifty trick causes turndown to keep adjacent paragraphs separated
|
// this nifty trick causes turndown to keep adjacent paragraphs separated
|
||||||
|
|||||||
Reference in New Issue
Block a user