Merge pull request #107 from lonekorean/keep-figure-figcaption

Preserve <figure> and <figcaption>
This commit is contained in:
Will Boyd
2024-02-22 08:06:29 -05:00
committed by GitHub
+28 -10
View File
@@ -44,15 +44,41 @@ function initTurndownService() {
}
});
// preserve iframes (common for embedded audio/video)
// iframe boolean attributes do not need to be set to empty string
turndownService.addRule('iframe', {
filter: 'iframe',
replacement: (content, node) => {
const html = node.outerHTML.replace('allowfullscreen=""', 'allowfullscreen');
const html = node.outerHTML
.replace('allowfullscreen=""', 'allowfullscreen')
.replace('allowpaymentrequest=""', 'allowpaymentrequest');
return '\n\n' + html + '\n\n';
}
});
// preserve <figure> when it contains a <figcaption>
turndownService.addRule('figure', {
filter: 'figure',
replacement: (content, node) => {
if (node.querySelector('figcaption')) {
// extra newlines are necessary for markdown and HTML to render correctly together
const result = '\n\n<figure>\n\n' + content + '\n\n</figure>\n\n';
return result.replace('\n\n\n\n', '\n\n'); // collapse quadruple newlines
} else {
// does not contain <figcaption>, do not preserve
return content;
}
}
});
// preserve <figcaption>
turndownService.addRule('figcaption', {
filter: 'figcaption',
replacement: (content, node) => {
// extra newlines are necessary for markdown and HTML to render correctly together
return '\n\n<figcaption>\n\n' + content + '\n\n</figcaption>\n\n';
}
});
return turndownService;
}
@@ -70,11 +96,6 @@ function getPostContent(post, turndownService, config) {
content = content.replace(/(<img[^>]*src=").*?([^/"]+\.(?:gif|jpe?g|png))("[^>]*>)/gi, '$1images/$2$3');
}
// this is a hack to make <iframe> nodes non-empty by inserting a "." which
// allows the iframe rule declared in initTurndownService() to take effect
// (using turndown's blankRule() and keep() solution did not work for me)
content = content.replace(/(<\/iframe>)/gi, '.$1');
// preserve "more" separator, max one per post, optionally with custom label
// by escaping angle brackets (will be unescaped during turndown conversion)
content = content.replace(/<(!--more( .*)?--)>/, '&lt;$1&gt;');
@@ -85,9 +106,6 @@ function getPostContent(post, turndownService, config) {
// clean up extra spaces in list items
content = content.replace(/(-|\d+\.) +/g, '$1 ');
// clean up the "." from the iframe hack above
content = content.replace(/\.(<\/iframe>)/gi, '$1');
return content;
}