Prevent adjacent paragraphs from being merged together

This commit is contained in:
Will Boyd
2018-11-24 11:43:26 -05:00
parent 9307066570
commit de470859c4
+13 -6
View File
@@ -187,7 +187,6 @@ function initTurndownService() {
}
});
return turndownService;
}
@@ -220,9 +219,14 @@ function getPostDate(post) {
function getPostContent(post, turndownService) {
let content = post.encoded[0].trim();
// insert an empty div element between double line breaks
// this nifty trick causes turndown to keep adjacent paragraphs separated
// without mucking up content inside of other elemnts (like <code> blocks)
content = content.replace(/(\r?\n){2}/g, '\n<div></div>\n');
if (argv.addcontentimages) {
// writeImageFile() will save all content images to a relative /images folder
// so update references in post content to match
// writeImageFile() will save all content images to a relative /images
// folder so update references in post content to match
content = content.replace(/(<img[^>]*src=").*?([^\/"]+\.(?:gif|jpg|png))("[^>]*>)/gi, '$1images/$2$3');
}
@@ -231,10 +235,13 @@ function getPostContent(post, turndownService) {
// (using turndown's blankRule() and keep() solution did not work for me)
content = content.replace(/(<\/iframe>)/gi, '.$1');
content = turndownService.turndown(content)
.replace(/-\s+/g, '- '); // clean up extra spaces
// use turndown to convert HTML to Markdown
content = turndownService.turndown(content);
// clean up the "." from the hack above
// clean up extra spaces in list items
content = content.replace(/- +/g, '- ');
// clean up the "." from the iframe hack above
content = content.replace(/\.(<\/iframe>)/gi, '$1');
return content;