mirror of
https://github.com/10h30/wordpress-export-to-markdown.git
synced 2026-06-05 15:09:59 +09:00
Fill out the readme
This commit is contained in:
@@ -1,3 +1,135 @@
|
||||
# wp-to-gatsby-md
|
||||
# wordpress-export-to-markdown
|
||||
|
||||
Converts a WordPress export XML file into Markdown files suitable for a GatsbyJS site.
|
||||
Converts a WordPress export XML file into Markdown files.
|
||||
|
||||
Saves each post as a separate file with appropriate frontmatter. Also saves attached images and (optionally) any additional images found in post body content. Posts and images can be saved into a variety of folder structures.
|
||||
|
||||
## Why?
|
||||
|
||||
This is useful if you want to migrate from WordPress to a static site generator such as [Gatsby](https://www.gatsbyjs.org/) or [Hugo](https://gohugo.io/), among others.
|
||||
|
||||
## Quick Start
|
||||
|
||||
You just need two things to get started:
|
||||
- Node v10.12 or later
|
||||
- Your WordPress export file
|
||||
- Log into your WordPress admin site and go to Tools > Export > Download Export File
|
||||
- Save the file as `export.xml` inside this package's directory
|
||||
|
||||
Now open your terminal to this package's directory and run `node index.js`.
|
||||
|
||||
This will use default options to create an `/output` folder filled with your posts and images.
|
||||
|
||||
## Command Line Arguments
|
||||
|
||||
You can use command line arguments to control options for how the script runs. For example, this will give you [Jekyll](https://jekyllrb.com/)-style output in terms of folder structure and filenames:
|
||||
|
||||
```
|
||||
node index.js --postfolders false --prefixdate true
|
||||
```
|
||||
|
||||
### --input
|
||||
|
||||
- Type: String
|
||||
- Default: `export.xml`
|
||||
|
||||
The file to parse. This should be the WordPress export XML file that you downloaded.
|
||||
|
||||
### --output
|
||||
|
||||
- Type: String
|
||||
- Default: `output`
|
||||
|
||||
The output directory where Markdown and image files will be saved.
|
||||
|
||||
### --yearmonthfolders
|
||||
|
||||
- Type: Boolean
|
||||
- Default: `false`
|
||||
|
||||
Whether or not to organize output files into year and month folders.
|
||||
|
||||
/output
|
||||
/2017
|
||||
/01
|
||||
/02
|
||||
/2018
|
||||
/01
|
||||
|
||||
### --yearfolders
|
||||
|
||||
- Type: Boolean
|
||||
- Default: `false`
|
||||
|
||||
Whether or not to organize output files into year folders.
|
||||
|
||||
/output
|
||||
/2017
|
||||
/2018
|
||||
|
||||
### --postfolders
|
||||
|
||||
- Type: Boolean
|
||||
- Default: `true`
|
||||
|
||||
Whether or not to save files and images into post folders.
|
||||
|
||||
If `true`, the post slug is used for the folder name and the post's Markdown file is named `index.md`. Each post folder will have its own `/images` folder.
|
||||
|
||||
/output
|
||||
/first-post
|
||||
/images
|
||||
potato.png
|
||||
index.md
|
||||
/oh-look-another-post
|
||||
/images
|
||||
cat1.gif
|
||||
cat2.gif
|
||||
index.md
|
||||
|
||||
If `false`, the post slug is used to name the post's Markdown file. These files will be side-by-side and images will go into a shared `/images` folder.
|
||||
|
||||
/output
|
||||
/images
|
||||
cat1.gif
|
||||
cat2.gif
|
||||
potato.png
|
||||
first-post.md
|
||||
oh-look-another-post.md
|
||||
|
||||
Either way, this can be combined with with `--yearmonthfolderes` and `--yearfolders`, in which case the above output will be organized under the appropriate year and month folders.
|
||||
|
||||
### --prefixdate
|
||||
|
||||
- Type: Boolean
|
||||
- Default: `false`
|
||||
|
||||
Whether or not to prepend the post date to the post slug when naming a post's folder or file.
|
||||
|
||||
If `--postfolders` is `true`, this affects the folder.
|
||||
|
||||
/output
|
||||
/2017-01-14-first-post
|
||||
index.md
|
||||
/2017-01-23-oh-look-another-post
|
||||
index.md
|
||||
|
||||
If `--postfolders` is `false`, this affects the file.
|
||||
|
||||
/output
|
||||
2017-01-14-first-post.md
|
||||
2017-01-23-oh-look-another-post.md
|
||||
|
||||
### --saveimages
|
||||
|
||||
- Type: Boolean
|
||||
- Default: `true`
|
||||
|
||||
Whether or not to download and save images attached to posts. Generally speaking, these are images that were added by dragging/dropping or clicking **Add Media** or **Set Featured Image** when editing a post in WordPress. Images are saved into `/images`. See `--postfolders` for more details.
|
||||
|
||||
### --addcontentimages
|
||||
|
||||
- Type: Boolean
|
||||
- Default: `false`
|
||||
|
||||
Whether or not to also include images scraped from <img> tags in post body content. These images are downloaded and saved along with other images as dictated by `--saveimages`.
|
||||
|
||||
@@ -6,6 +6,7 @@ const request = require('request');
|
||||
const turndown = require('turndown');
|
||||
const xml2js = require('xml2js');
|
||||
|
||||
// global so various functions can access arguments
|
||||
let argv;
|
||||
|
||||
function init() {
|
||||
@@ -88,6 +89,8 @@ function collectImages(data) {
|
||||
function addContentImages(data, images) {
|
||||
// this regex isn't airtight, but seems to work well enough
|
||||
let regex = (/src="(.+?\.(gif|jpg|png))"/gi);
|
||||
let match;
|
||||
|
||||
getItemsOfType(data, 'post').forEach(post => {
|
||||
let postId = post.post_id[0];
|
||||
let postContent = post.encoded[0];
|
||||
@@ -95,7 +98,6 @@ function addContentImages(data, images) {
|
||||
|
||||
// reset lastIndex since we're reusing the same regex object
|
||||
regex.lastIndex = 0;
|
||||
let match;
|
||||
while ((match = regex.exec(postContent)) !== null) {
|
||||
// base the matched image URL relative to the post URL
|
||||
let url = new URL(match[1], postLink).href;
|
||||
@@ -105,21 +107,22 @@ function addContentImages(data, images) {
|
||||
if (!exists) {
|
||||
images.push({
|
||||
id: -1,
|
||||
postId: postId,
|
||||
postId: postId,
|
||||
url: url
|
||||
});
|
||||
console.log('Scraped ' + url + '.');
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function collectPosts(data) {
|
||||
// this is given to getPostContent() to do the markdown conversion
|
||||
// this is passed into getPostContent() for the markdown conversion
|
||||
turndownService = initTurndownService();
|
||||
|
||||
return getItemsOfType(data, 'post')
|
||||
.map(post => ({
|
||||
// meta data isn't output, but is used to help with other things
|
||||
// meta data isn't written to file, but is used to help with other things
|
||||
meta: {
|
||||
id: getPostId(post),
|
||||
coverImageId: getPostCoverImageId(post)
|
||||
@@ -142,7 +145,7 @@ function initTurndownService() {
|
||||
// preserve embedded scripts (for gists, codepens, etc.)
|
||||
turndownService.addRule('script', {
|
||||
filter: 'script',
|
||||
replacement: function(content, node) {
|
||||
replacement: (content, node) => {
|
||||
let html = node.outerHTML.replace('async=""', 'async')
|
||||
return '\n\n' + html + '\n\n';
|
||||
}
|
||||
@@ -150,12 +153,8 @@ function initTurndownService() {
|
||||
|
||||
// preserve embedded codepens
|
||||
turndownService.addRule('p', {
|
||||
filter: function(node) {
|
||||
return node.nodeName === 'P' && node.attributes['data-pen-title'];
|
||||
},
|
||||
replacement: function(content, node) {
|
||||
return '\n\n' + node.outerHTML + '\n\n';
|
||||
}
|
||||
filter: node => node.nodeName === 'P' && node.attributes['data-pen-title'],
|
||||
replacement: (content, node) => '\n\n' + node.outerHTML + '\n\n'
|
||||
});
|
||||
|
||||
return turndownService;
|
||||
@@ -210,7 +209,7 @@ function mergeImagesIntoPosts(images, posts) {
|
||||
post.meta.imageUrls.push(image.url);
|
||||
|
||||
if (image.id === post.meta.coverImageId) {
|
||||
// add cover image to frontmatter for output
|
||||
// save cover image filename to frontmatter
|
||||
post.frontmatter.coverImage = getFilenameFromUrl(image.url);
|
||||
}
|
||||
}
|
||||
|
||||
+1
-1
@@ -2,7 +2,7 @@
|
||||
"name": "wp-to-gatsby-md",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"description": "Converts a WordPress export XML file into Markdown files suitable for a GatsbyJS site.",
|
||||
"description": "Converts a WordPress export XML file into Markdown files.",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
|
||||
Reference in New Issue
Block a user