Remove iframe non-empty hack (fixed in turndown)

This commit is contained in:
Will Boyd
2024-02-21 13:33:16 -05:00
parent 9cbda10d15
commit bcc0c5ce3e
+5 -10
View File
@@ -9,6 +9,7 @@ function initTurndownService() {
});
turndownService.use(turndownPluginGfm.tables);
turndownService.keep(['iframe']);
// preserve embedded tweets
turndownService.addRule('tweet', {
@@ -44,11 +45,13 @@ function initTurndownService() {
}
});
// preserve iframes (common for embedded audio/video)
// iframe boolean attributes do not need to be set to empty string
turndownService.addRule('iframe', {
filter: 'iframe',
replacement: (content, node) => {
const html = node.outerHTML.replace('allowfullscreen=""', 'allowfullscreen');
const html = node.outerHTML
.replace('allowfullscreen=""', 'allowfullscreen')
.replace('allowpaymentrequest=""', 'allowpaymentrequest');
return '\n\n' + html + '\n\n';
}
});
@@ -70,11 +73,6 @@ function getPostContent(post, turndownService, config) {
content = content.replace(/(<img[^>]*src=").*?([^/"]+\.(?:gif|jpe?g|png))("[^>]*>)/gi, '$1images/$2$3');
}
// this is a hack to make <iframe> nodes non-empty by inserting a "." which
// allows the iframe rule declared in initTurndownService() to take effect
// (using turndown's blankRule() and keep() solution did not work for me)
content = content.replace(/(<\/iframe>)/gi, '.$1');
// preserve "more" separator, max one per post, optionally with custom label
// by escaping angle brackets (will be unescaped during turndown conversion)
content = content.replace(/<(!--more( .*)?--)>/, '&lt;$1&gt;');
@@ -85,9 +83,6 @@ function getPostContent(post, turndownService, config) {
// clean up extra spaces in list items
content = content.replace(/(-|\d+\.) +/g, '$1 ');
// clean up the "." from the iframe hack above
content = content.replace(/\.(<\/iframe>)/gi, '$1');
return content;
}