mirror of
https://github.com/10h30/wordpress-export-to-markdown.git
synced 2026-06-05 15:09:59 +09:00
Better image scraping regex
This commit is contained in:
+1
-1
@@ -128,7 +128,7 @@ function collectScrapedImages(allPostData, postTypes) {
|
||||
const postId = postData.childValue('post_id');
|
||||
|
||||
const postContent = postData.childValue('encoded');
|
||||
const scrapedUrls = [...postContent.matchAll(/<img\s[^>]*?src="(.+?\.(?:gif|jpe?g|png|webp))"[^>]*>/gi)].map((match) => match[1]);
|
||||
const scrapedUrls = [...postContent.matchAll(/<img(?=\s)[^>]+?(?<=\s)src="(.+?)"[^>]*>/gi)].map((match) => match[1]);
|
||||
scrapedUrls.forEach((scrapedUrl) => {
|
||||
let url;
|
||||
if (isAbsoluteUrl(scrapedUrl)) {
|
||||
|
||||
+1
-1
@@ -117,7 +117,7 @@ export function getPostContent(content) {
|
||||
if (shared.config.saveImages === 'scraped' || shared.config.saveImages === 'all') {
|
||||
// writeImageFile() will save all content images to a relative /images
|
||||
// folder so update references in post content to match
|
||||
content = content.replace(/(<img\s[^>]*?src=").*?([^/"]+\.(?:gif|jpe?g|png|webp))("[^>]*>)/gi, '$1images/$2$3');
|
||||
content = content.replace(/(<img(?=\s)[^>]+?(?<=\s)src=")[^"]*?([^/"]+)("[^>]*>)/gi, '$1images/$2$3');
|
||||
}
|
||||
|
||||
// preserve "more" separator, max one per post, optionally with custom label
|
||||
|
||||
Reference in New Issue
Block a user