mirror of
https://github.com/10h30/wordpress-export-to-markdown.git
synced 2026-06-05 15:09:59 +09:00
Prevent adjacent paragraphs from being merged together
This commit is contained in:
@@ -187,7 +187,6 @@ function initTurndownService() {
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
return turndownService;
|
||||
}
|
||||
|
||||
@@ -220,9 +219,14 @@ function getPostDate(post) {
|
||||
function getPostContent(post, turndownService) {
|
||||
let content = post.encoded[0].trim();
|
||||
|
||||
// insert an empty div element between double line breaks
|
||||
// this nifty trick causes turndown to keep adjacent paragraphs separated
|
||||
// without mucking up content inside of other elemnts (like <code> blocks)
|
||||
content = content.replace(/(\r?\n){2}/g, '\n<div></div>\n');
|
||||
|
||||
if (argv.addcontentimages) {
|
||||
// writeImageFile() will save all content images to a relative /images folder
|
||||
// so update references in post content to match
|
||||
// writeImageFile() will save all content images to a relative /images
|
||||
// folder so update references in post content to match
|
||||
content = content.replace(/(<img[^>]*src=").*?([^\/"]+\.(?:gif|jpg|png))("[^>]*>)/gi, '$1images/$2$3');
|
||||
}
|
||||
|
||||
@@ -231,10 +235,13 @@ function getPostContent(post, turndownService) {
|
||||
// (using turndown's blankRule() and keep() solution did not work for me)
|
||||
content = content.replace(/(<\/iframe>)/gi, '.$1');
|
||||
|
||||
content = turndownService.turndown(content)
|
||||
.replace(/-\s+/g, '- '); // clean up extra spaces
|
||||
// use turndown to convert HTML to Markdown
|
||||
content = turndownService.turndown(content);
|
||||
|
||||
// clean up the "." from the hack above
|
||||
// clean up extra spaces in list items
|
||||
content = content.replace(/- +/g, '- ');
|
||||
|
||||
// clean up the "." from the iframe hack above
|
||||
content = content.replace(/\.(<\/iframe>)/gi, '$1');
|
||||
|
||||
return content;
|
||||
|
||||
Reference in New Issue
Block a user