Fill out the readme

This commit is contained in:
Will Boyd
2018-10-25 17:09:16 -04:00
parent a77035c85a
commit bab7feb9ae
3 changed files with 146 additions and 15 deletions
+134 -2
View File
@@ -1,3 +1,135 @@
# wp-to-gatsby-md
# wordpress-export-to-markdown
Converts a WordPress export XML file into Markdown files suitable for a GatsbyJS site.
Converts a WordPress export XML file into Markdown files.
Saves each post as a separate file with appropriate frontmatter. Also saves attached images and (optionally) any additional images found in post body content. Posts and images can be saved into a variety of folder structures.
## Why?
This is useful if you want to migrate from WordPress to a static site generator such as [Gatsby](https://www.gatsbyjs.org/) or [Hugo](https://gohugo.io/), among others.
## Quick Start
You just need two things to get started:
- Node v10.12 or later
- Your WordPress export file
- Log into your WordPress admin site and go to Tools > Export > Download Export File
- Save the file as `export.xml` inside this package's directory
Now open your terminal to this package's directory and run `node index.js`.
This will use default options to create an `/output` folder filled with your posts and images.
## Command Line Arguments
You can use command line arguments to control options for how the script runs. For example, this will give you [Jekyll](https://jekyllrb.com/)-style output in terms of folder structure and filenames:
```
node index.js --postfolders false --prefixdate true
```
### --input
- Type: String
- Default: `export.xml`
The file to parse. This should be the WordPress export XML file that you downloaded.
### --output
- Type: String
- Default: `output`
The output directory where Markdown and image files will be saved.
### --yearmonthfolders
- Type: Boolean
- Default: `false`
Whether or not to organize output files into year and month folders.
/output
/2017
/01
/02
/2018
/01
### --yearfolders
- Type: Boolean
- Default: `false`
Whether or not to organize output files into year folders.
/output
/2017
/2018
### --postfolders
- Type: Boolean
- Default: `true`
Whether or not to save files and images into post folders.
If `true`, the post slug is used for the folder name and the post's Markdown file is named `index.md`. Each post folder will have its own `/images` folder.
/output
/first-post
/images
potato.png
index.md
/oh-look-another-post
/images
cat1.gif
cat2.gif
index.md
If `false`, the post slug is used to name the post's Markdown file. These files will be side-by-side and images will go into a shared `/images` folder.
/output
/images
cat1.gif
cat2.gif
potato.png
first-post.md
oh-look-another-post.md
Either way, this can be combined with with `--yearmonthfolderes` and `--yearfolders`, in which case the above output will be organized under the appropriate year and month folders.
### --prefixdate
- Type: Boolean
- Default: `false`
Whether or not to prepend the post date to the post slug when naming a post's folder or file.
If `--postfolders` is `true`, this affects the folder.
/output
/2017-01-14-first-post
index.md
/2017-01-23-oh-look-another-post
index.md
If `--postfolders` is `false`, this affects the file.
/output
2017-01-14-first-post.md
2017-01-23-oh-look-another-post.md
### --saveimages
- Type: Boolean
- Default: `true`
Whether or not to download and save images attached to posts. Generally speaking, these are images that were added by dragging/dropping or clicking **Add Media** or **Set Featured Image** when editing a post in WordPress. Images are saved into `/images`. See `--postfolders` for more details.
### --addcontentimages
- Type: Boolean
- Default: `false`
Whether or not to also include images scraped from <img> tags in post body content. These images are downloaded and saved along with other images as dictated by `--saveimages`.
+11 -12
View File
@@ -6,6 +6,7 @@ const request = require('request');
const turndown = require('turndown');
const xml2js = require('xml2js');
// global so various functions can access arguments
let argv;
function init() {
@@ -88,6 +89,8 @@ function collectImages(data) {
function addContentImages(data, images) {
// this regex isn't airtight, but seems to work well enough
let regex = (/src="(.+?\.(gif|jpg|png))"/gi);
let match;
getItemsOfType(data, 'post').forEach(post => {
let postId = post.post_id[0];
let postContent = post.encoded[0];
@@ -95,7 +98,6 @@ function addContentImages(data, images) {
// reset lastIndex since we're reusing the same regex object
regex.lastIndex = 0;
let match;
while ((match = regex.exec(postContent)) !== null) {
// base the matched image URL relative to the post URL
let url = new URL(match[1], postLink).href;
@@ -105,21 +107,22 @@ function addContentImages(data, images) {
if (!exists) {
images.push({
id: -1,
postId: postId,
postId: postId,
url: url
});
console.log('Scraped ' + url + '.');
}
}
});
}
function collectPosts(data) {
// this is given to getPostContent() to do the markdown conversion
// this is passed into getPostContent() for the markdown conversion
turndownService = initTurndownService();
return getItemsOfType(data, 'post')
.map(post => ({
// meta data isn't output, but is used to help with other things
// meta data isn't written to file, but is used to help with other things
meta: {
id: getPostId(post),
coverImageId: getPostCoverImageId(post)
@@ -142,7 +145,7 @@ function initTurndownService() {
// preserve embedded scripts (for gists, codepens, etc.)
turndownService.addRule('script', {
filter: 'script',
replacement: function(content, node) {
replacement: (content, node) => {
let html = node.outerHTML.replace('async=""', 'async')
return '\n\n' + html + '\n\n';
}
@@ -150,12 +153,8 @@ function initTurndownService() {
// preserve embedded codepens
turndownService.addRule('p', {
filter: function(node) {
return node.nodeName === 'P' && node.attributes['data-pen-title'];
},
replacement: function(content, node) {
return '\n\n' + node.outerHTML + '\n\n';
}
filter: node => node.nodeName === 'P' && node.attributes['data-pen-title'],
replacement: (content, node) => '\n\n' + node.outerHTML + '\n\n'
});
return turndownService;
@@ -210,7 +209,7 @@ function mergeImagesIntoPosts(images, posts) {
post.meta.imageUrls.push(image.url);
if (image.id === post.meta.coverImageId) {
// add cover image to frontmatter for output
// save cover image filename to frontmatter
post.frontmatter.coverImage = getFilenameFromUrl(image.url);
}
}
+1 -1
View File
@@ -2,7 +2,7 @@
"name": "wp-to-gatsby-md",
"version": "1.0.0",
"private": true,
"description": "Converts a WordPress export XML file into Markdown files suitable for a GatsbyJS site.",
"description": "Converts a WordPress export XML file into Markdown files.",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"