feat: add custom taxonomy support to frontmatter and export taxonomy metadata

- Extract custom taxonomy terms per-post and inject into frontmatter automatically
- Export taxonomy metadata (categories, tags, custom taxonomies) to JSON files
- Provides display names and metadata for use in Astro or other static site generators
This commit is contained in:
2026-03-12 12:53:35 +09:00
parent 1476e46ed5
commit 6ccdf2378a
3 changed files with 98 additions and 6 deletions
+2 -2
View File
@@ -23,10 +23,10 @@ import * as writer from './src/writer.js';
await intake.getConfig();
// parse data from XML and do Markdown translations
const posts = await parser.parseFilePromise()
const { posts, taxonomies } = await parser.parseFilePromise()
// write files and download images
await writer.writeFilesPromise(posts);
await writer.writeFilesPromise(posts, taxonomies);
// happy goodbye
console.log('\nAll done!');
+76 -3
View File
@@ -10,7 +10,10 @@ export async function parseFilePromise() {
shared.logHeading('Parsing');
const content = await fs.promises.readFile(shared.config.input, 'utf8');
const rssData = await data.load(content);
const allPostData = rssData.child('channel').children('item');
const channel = rssData.child('channel');
const allPostData = channel.children('item');
const taxonomies = collectTaxonomyMetadata(channel);
const postTypes = getPostTypes(allPostData);
const posts = collectPosts(allPostData, postTypes);
@@ -26,7 +29,7 @@ export async function parseFilePromise() {
mergeImagesIntoPosts(images, posts);
populateFrontmatter(posts);
return posts;
return { posts, taxonomies };
}
function getPostTypes(allPostData) {
@@ -85,6 +88,18 @@ function collectPosts(allPostData, postTypes) {
}
function buildPost(data) {
// collect custom taxonomy term slugs keyed by taxonomy slug
const customTaxonomies = {};
data.children('category')
.filter((cat) => cat.attribute('domain') !== 'category' && cat.attribute('domain') !== 'post_tag')
.forEach((cat) => {
const domain = cat.attribute('domain');
if (!customTaxonomies[domain]) {
customTaxonomies[domain] = [];
}
customTaxonomies[domain].push(decodeURIComponent(cat.attribute('nicename')));
});
return {
// full raw post data
data,
@@ -102,7 +117,10 @@ function buildPost(data) {
// these are possibly set later in mergeImagesIntoPosts()
coverImage: undefined,
imageUrls: []
imageUrls: [],
// custom taxonomy terms keyed by taxonomy slug
customTaxonomies
};
}
@@ -204,9 +222,64 @@ function populateFrontmatter(posts) {
post.frontmatter[alias ?? key] = frontmatterGetter(post);
});
// inject custom taxonomy slugs into frontmatter, each taxonomy as its own field
Object.entries(post.customTaxonomies).forEach(([domain, slugs]) => {
if (slugs.length > 0) {
post.frontmatter[domain] = slugs;
}
});
});
}
function collectTaxonomyMetadata(channel) {
const taxonomies = {};
// channel-level <wp:category> elements (stripped to 'category')
const wpCategories = channel.children('category');
if (wpCategories.length > 0) {
taxonomies.category = wpCategories.map((cat) => ({
termId: parseInt(cat.optionalChildValue('term_id')),
slug: cat.optionalChildValue('category_nicename'),
name: cat.optionalChildValue('cat_name'),
parent: cat.optionalChildValue('category_parent') || null,
description: cat.optionalChildValue('category_description') || null
}));
}
// channel-level <wp:tag> elements (stripped to 'tag')
const wpTags = channel.children('tag');
if (wpTags.length > 0) {
taxonomies.post_tag = wpTags.map((tag) => ({
termId: parseInt(tag.optionalChildValue('term_id')),
slug: tag.optionalChildValue('tag_slug'),
name: tag.optionalChildValue('tag_name'),
description: tag.optionalChildValue('tag_description') || null
}));
}
// channel-level <wp:term> elements (stripped to 'term') — custom taxonomies
const wpTerms = channel.children('term');
wpTerms.forEach((term) => {
const taxonomy = term.optionalChildValue('term_taxonomy');
if (!taxonomy || taxonomy === 'category' || taxonomy === 'post_tag') {
return;
}
if (!taxonomies[taxonomy]) {
taxonomies[taxonomy] = [];
}
taxonomies[taxonomy].push({
termId: parseInt(term.optionalChildValue('term_id')),
slug: term.optionalChildValue('term_slug'),
name: term.optionalChildValue('term_name'),
parent: term.optionalChildValue('term_parent') || null,
description: term.optionalChildValue('term_description') || null
});
});
return taxonomies;
}
function prioritizePostType(postTypes, postType) {
const index = postTypes.indexOf(postType);
if (index !== -1) {
+20 -1
View File
@@ -7,9 +7,10 @@ import * as luxon from 'luxon';
import path from 'path';
import * as shared from './shared.js';
export async function writeFilesPromise(posts) {
export async function writeFilesPromise(posts, taxonomies) {
await writeMarkdownFilesPromise(posts);
await writeImageFilesPromise(posts);
await writeTaxonomyFilesPromise(taxonomies);
}
async function processPayloadsPromise(payloads, loadFunc) {
@@ -189,6 +190,24 @@ function logSavingMessage(things, existingCount, remainingCount) {
}
}
async function writeTaxonomyFilesPromise(taxonomies) {
shared.logHeading('Saving taxonomy data');
const entries = Object.entries(taxonomies);
if (entries.length === 0) {
console.log('No taxonomy data to save.');
return;
}
const taxonomyDir = path.join(shared.config.output, 'taxonomies');
for (const [taxonomyName, terms] of entries) {
const filePath = path.join(taxonomyDir, `${taxonomyName}.json`);
const content = JSON.stringify(terms, null, '\t');
await writeFile(filePath, content);
console.log(`${chalk.green('✓')} ${chalk.gray('[taxonomy]')} ${taxonomyName}.json (${terms.length} terms)`);
}
}
function logPayloadResult(payload, errorMessage) {
const messageBits = [
errorMessage ? chalk.red('✗') : chalk.green('✓'),