diff --git a/src/parser.js b/src/parser.js index d49ddd3..cf3e5d4 100644 --- a/src/parser.js +++ b/src/parser.js @@ -92,7 +92,7 @@ function getPostId(post) { } function getPostSlug(post) { - return decodeURI(post.post_name[0]); + return decodeURIComponent(post.post_name[0]); } function getPostCoverImageId(post) { @@ -137,7 +137,7 @@ function processCategoryTags(post, domain) { return post.category .filter(category => category.$.domain === domain) - .map(({ $: attributes }) => decodeURI(attributes.nicename)); + .map(({ $: attributes }) => decodeURIComponent(attributes.nicename)); } function collectAttachedImages(data) { @@ -147,7 +147,7 @@ function collectAttachedImages(data) { .map(attachment => ({ id: attachment.post_id[0], postId: attachment.post_parent[0], - url: decodeURI(attachment.attachment_url[0]) + url: attachment.attachment_url[0] })); console.log(images.length + ' attached images found.'); @@ -168,7 +168,7 @@ function collectScrapedImages(data) { images.push({ id: -1, postId: postId, - url: decodeURI(url) + url }); }); }); diff --git a/src/shared.js b/src/shared.js index b3e6165..5175848 100644 --- a/src/shared.js +++ b/src/shared.js @@ -1,5 +1,5 @@ function getFilenameFromUrl(url) { - return url.split('/').slice(-1)[0]; + return decodeURIComponent(url.split('/').slice(-1)[0]); } exports.getFilenameFromUrl = getFilenameFromUrl; diff --git a/src/writer.js b/src/writer.js index a891aad..9750b43 100644 --- a/src/writer.js +++ b/src/writer.js @@ -134,10 +134,13 @@ async function writeImageFilesPromise(posts, config) { } async function loadImageFilePromise(imageUrl) { + // only encode the URL if it doesn't already have encoded characters + const url = (/%[\da-f]{2}/i).test(imageUrl) ? imageUrl : encodeURI(imageUrl); + let buffer; try { buffer = await requestPromiseNative.get({ - url: encodeURI(imageUrl), + url, encoding: null, // preserves binary encoding headers: { 'User-Agent': 'wordpress-export-to-markdown'