From 6ccdf2378a932c9abf16c536e3a8a76edb5b8d7c Mon Sep 17 00:00:00 2001 From: thuanbui Date: Thu, 12 Mar 2026 12:53:35 +0900 Subject: [PATCH] feat: add custom taxonomy support to frontmatter and export taxonomy metadata - Extract custom taxonomy terms per-post and inject into frontmatter automatically - Export taxonomy metadata (categories, tags, custom taxonomies) to JSON files - Provides display names and metadata for use in Astro or other static site generators --- app.js | 4 +-- src/parser.js | 79 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/writer.js | 21 +++++++++++++- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/app.js b/app.js index 806bc53..4e4b7f4 100644 --- a/app.js +++ b/app.js @@ -23,10 +23,10 @@ import * as writer from './src/writer.js'; await intake.getConfig(); // parse data from XML and do Markdown translations - const posts = await parser.parseFilePromise() + const { posts, taxonomies } = await parser.parseFilePromise() // write files and download images - await writer.writeFilesPromise(posts); + await writer.writeFilesPromise(posts, taxonomies); // happy goodbye console.log('\nAll done!'); diff --git a/src/parser.js b/src/parser.js index 3363c05..a166e70 100644 --- a/src/parser.js +++ b/src/parser.js @@ -10,7 +10,10 @@ export async function parseFilePromise() { shared.logHeading('Parsing'); const content = await fs.promises.readFile(shared.config.input, 'utf8'); const rssData = await data.load(content); - const allPostData = rssData.child('channel').children('item'); + const channel = rssData.child('channel'); + const allPostData = channel.children('item'); + + const taxonomies = collectTaxonomyMetadata(channel); const postTypes = getPostTypes(allPostData); const posts = collectPosts(allPostData, postTypes); @@ -26,7 +29,7 @@ export async function parseFilePromise() { mergeImagesIntoPosts(images, posts); populateFrontmatter(posts); - return posts; + return { posts, taxonomies }; } function getPostTypes(allPostData) { @@ -85,6 +88,18 @@ function collectPosts(allPostData, postTypes) { } function buildPost(data) { + // collect custom taxonomy term slugs keyed by taxonomy slug + const customTaxonomies = {}; + data.children('category') + .filter((cat) => cat.attribute('domain') !== 'category' && cat.attribute('domain') !== 'post_tag') + .forEach((cat) => { + const domain = cat.attribute('domain'); + if (!customTaxonomies[domain]) { + customTaxonomies[domain] = []; + } + customTaxonomies[domain].push(decodeURIComponent(cat.attribute('nicename'))); + }); + return { // full raw post data data, @@ -102,7 +117,10 @@ function buildPost(data) { // these are possibly set later in mergeImagesIntoPosts() coverImage: undefined, - imageUrls: [] + imageUrls: [], + + // custom taxonomy terms keyed by taxonomy slug + customTaxonomies }; } @@ -204,9 +222,64 @@ function populateFrontmatter(posts) { post.frontmatter[alias ?? key] = frontmatterGetter(post); }); + + // inject custom taxonomy slugs into frontmatter, each taxonomy as its own field + Object.entries(post.customTaxonomies).forEach(([domain, slugs]) => { + if (slugs.length > 0) { + post.frontmatter[domain] = slugs; + } + }); }); } +function collectTaxonomyMetadata(channel) { + const taxonomies = {}; + + // channel-level elements (stripped to 'category') + const wpCategories = channel.children('category'); + if (wpCategories.length > 0) { + taxonomies.category = wpCategories.map((cat) => ({ + termId: parseInt(cat.optionalChildValue('term_id')), + slug: cat.optionalChildValue('category_nicename'), + name: cat.optionalChildValue('cat_name'), + parent: cat.optionalChildValue('category_parent') || null, + description: cat.optionalChildValue('category_description') || null + })); + } + + // channel-level elements (stripped to 'tag') + const wpTags = channel.children('tag'); + if (wpTags.length > 0) { + taxonomies.post_tag = wpTags.map((tag) => ({ + termId: parseInt(tag.optionalChildValue('term_id')), + slug: tag.optionalChildValue('tag_slug'), + name: tag.optionalChildValue('tag_name'), + description: tag.optionalChildValue('tag_description') || null + })); + } + + // channel-level elements (stripped to 'term') — custom taxonomies + const wpTerms = channel.children('term'); + wpTerms.forEach((term) => { + const taxonomy = term.optionalChildValue('term_taxonomy'); + if (!taxonomy || taxonomy === 'category' || taxonomy === 'post_tag') { + return; + } + if (!taxonomies[taxonomy]) { + taxonomies[taxonomy] = []; + } + taxonomies[taxonomy].push({ + termId: parseInt(term.optionalChildValue('term_id')), + slug: term.optionalChildValue('term_slug'), + name: term.optionalChildValue('term_name'), + parent: term.optionalChildValue('term_parent') || null, + description: term.optionalChildValue('term_description') || null + }); + }); + + return taxonomies; +} + function prioritizePostType(postTypes, postType) { const index = postTypes.indexOf(postType); if (index !== -1) { diff --git a/src/writer.js b/src/writer.js index cb73b13..c0fe6ed 100644 --- a/src/writer.js +++ b/src/writer.js @@ -7,9 +7,10 @@ import * as luxon from 'luxon'; import path from 'path'; import * as shared from './shared.js'; -export async function writeFilesPromise(posts) { +export async function writeFilesPromise(posts, taxonomies) { await writeMarkdownFilesPromise(posts); await writeImageFilesPromise(posts); + await writeTaxonomyFilesPromise(taxonomies); } async function processPayloadsPromise(payloads, loadFunc) { @@ -189,6 +190,24 @@ function logSavingMessage(things, existingCount, remainingCount) { } } +async function writeTaxonomyFilesPromise(taxonomies) { + shared.logHeading('Saving taxonomy data'); + + const entries = Object.entries(taxonomies); + if (entries.length === 0) { + console.log('No taxonomy data to save.'); + return; + } + + const taxonomyDir = path.join(shared.config.output, 'taxonomies'); + for (const [taxonomyName, terms] of entries) { + const filePath = path.join(taxonomyDir, `${taxonomyName}.json`); + const content = JSON.stringify(terms, null, '\t'); + await writeFile(filePath, content); + console.log(`${chalk.green('✓')} ${chalk.gray('[taxonomy]')} ${taxonomyName}.json (${terms.length} terms)`); + } +} + function logPayloadResult(payload, errorMessage) { const messageBits = [ errorMessage ? chalk.red('✗') : chalk.green('✓'),