diff --git a/src/parser.js b/src/parser.js index 683344c..99ca1ba 100644 --- a/src/parser.js +++ b/src/parser.js @@ -113,7 +113,7 @@ function getPostCoverImageId(postData) { function collectAttachedImages(channelData) { const images = getItemsOfType(channelData, 'attachment') // filter to certain image file types - .filter(attachment => attachment.attachment_url && (/\.(gif|jpe?g|png)$/i).test(attachment.attachment_url[0])) + .filter(attachment => attachment.attachment_url && (/\.(gif|jpe?g|png|webp)$/i).test(attachment.attachment_url[0])) .map(attachment => ({ id: attachment.post_id[0], postId: attachment.post_parent[0], @@ -132,7 +132,7 @@ function collectScrapedImages(channelData, postTypes) { const postContent = postData.encoded[0]; const postLink = postData.link[0]; - const matches = [...postContent.matchAll(/]*src="(.+?\.(?:gif|jpe?g|png))"[^>]*>/gi)]; + const matches = [...postContent.matchAll(/]*src="(.+?\.(?:gif|jpe?g|png|webp))"[^>]*>/gi)]; matches.forEach(match => { // base the matched image URL relative to the post URL const url = new URL(match[1], postLink).href; diff --git a/src/translator.js b/src/translator.js index 2a24452..c974599 100644 --- a/src/translator.js +++ b/src/translator.js @@ -105,7 +105,7 @@ function getPostContent(postData, turndownService, config) { if (config.saveScrapedImages) { // writeImageFile() will save all content images to a relative /images // folder so update references in post content to match - content = content.replace(/(]*src=").*?([^/"]+\.(?:gif|jpe?g|png))("[^>]*>)/gi, '$1images/$2$3'); + content = content.replace(/(]*src=").*?([^/"]+\.(?:gif|jpe?g|png|webp))("[^>]*>)/gi, '$1images/$2$3'); } // preserve "more" separator, max one per post, optionally with custom label diff --git a/src/wizard.js b/src/wizard.js index 5285b8d..88a722d 100644 --- a/src/wizard.js +++ b/src/wizard.js @@ -74,6 +74,12 @@ const options = [ type: 'boolean', description: 'Include custom post types and pages', default: false + }, + { + name: 'disable-strict-ssl', + type: 'boolean', + description: 'Strict SSL prevents image retrieval from self-signed servers', + default: false } ]; diff --git a/src/writer.js b/src/writer.js index b782756..fa02ce1 100644 --- a/src/writer.js +++ b/src/writer.js @@ -16,7 +16,7 @@ async function processPayloadsPromise(payloads, loadFunc) { const promises = payloads.map(payload => new Promise((resolve, reject) => { setTimeout(async () => { try { - const data = await loadFunc(payload.item); + const data = await loadFunc(payload.item, payload.strictSSL); await writeFile(payload.destinationPath, data); console.log(chalk.green('[OK]') + ' ' + payload.name); resolve(); @@ -55,6 +55,7 @@ async function writeMarkdownFilesPromise(posts, config ) { const payload = { item: post, name: (config.includeOtherTypes ? post.meta.type + ' - ' : '') + post.meta.slug, + strictSSL: !config.disableStrictSsl, destinationPath, delay }; @@ -72,7 +73,7 @@ async function writeMarkdownFilesPromise(posts, config ) { } } -async function loadMarkdownFilePromise(post) { +async function loadMarkdownFilePromise(post, strictSSL) { let output = '---\n'; Object.entries(post.frontmatter).forEach(([key, value]) => { @@ -117,6 +118,7 @@ async function writeImageFilesPromise(posts, config) { const payload = { item: imageUrl, name: filename, + strictSSL: !config.disableStrictSsl, destinationPath, delay }; @@ -135,7 +137,7 @@ async function writeImageFilesPromise(posts, config) { } } -async function loadImageFilePromise(imageUrl) { +async function loadImageFilePromise(imageUrl, strictSSL) { // only encode the URL if it doesn't already have encoded characters const url = (/%[\da-f]{2}/i).test(imageUrl) ? imageUrl : encodeURI(imageUrl); @@ -146,7 +148,8 @@ async function loadImageFilePromise(imageUrl) { encoding: null, // preserves binary encoding headers: { 'User-Agent': 'wordpress-export-to-markdown' - } + }, + strictSSL: strictSSL }); } catch (ex) { if (ex.name === 'StatusCodeError') {