| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 |
- #!/usr/bin/env node
- /**
- * build-graph.mjs — generate a {nodes, links} JSON of the site's internal link
- * graph for the on-site force-directed graph (src/pages/graph).
- *
- * Why this exists: md-graph (the VS Code extension) only resolves relative .md
- * links, so it misses this site's cross-section links (posts → docs/notes/lists),
- * which are written as absolute Docusaurus route URLs (/notes/house/helene).
- * Here we normalize BOTH link styles to canonical routes, so those edges appear.
- *
- * Routes mirror Docusaurus, including the folder-index convention: a doc named
- * `index`, `README`, OR the same as its parent folder (dogs/dogs.md) becomes the
- * folder's route (/notes/dogs) — not /notes/dogs/dogs.
- *
- * Underscore files (_posts.md, _computers.md, …) are Docusaurus "partials" with
- * no published route, but they're hand-maintained index/hub files that link to
- * many pages. We include them as non-navigable "index" nodes so those hub
- * connections show up (this is what md-graph does too).
- *
- * Output: src/data/graph.json → imported by the graph component.
- * Run: node scripts/build-graph.mjs (or `npm run graph`; DEBUG=1 for diagnostics)
- */
- import { readFileSync, writeFileSync, mkdirSync, readdirSync, statSync } from 'fs';
- import { join, relative, dirname, basename } from 'path';
- import { fileURLToPath } from 'url';
- const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
- const DEBUG = process.env.DEBUG === '1';
- const DOC_SECTIONS = [
- { dir: 'docs', base: '/docs', group: 'docs' },
- { dir: 'notes', base: '/notes', group: 'notes' },
- { dir: 'lists', base: '/lists', group: 'lists' },
- ];
- const BLOG_SECTIONS = [{ dir: 'posts', base: '/posts', group: 'posts' }];
- const PAGES_DIR = 'src/pages'; // about.md, ai.mdx, map.md, …
- const MANUAL_NODES = [ // routes with no .md in src/pages
- { route: '/graph', label: 'Graph', group: 'page' }, // graph.jsx
- { route: '/posts', label: 'Posts (blog index)', group: 'page' },
- ];
- // ---------- helpers ----------
- function walk(dir) {
- const out = [];
- let entries;
- try { entries = readdirSync(dir); } catch { return out; }
- for (const name of entries) {
- if (name.startsWith('.') || name === 'node_modules') continue; // skip .obsidian etc.
- const full = join(dir, name);
- const st = statSync(full);
- if (st.isDirectory()) out.push(...walk(full));
- else if (/\.mdx?$/.test(name)) out.push(full); // include _partials (hubs)
- }
- return out;
- }
- function parseFrontmatter(raw) {
- if (!raw.startsWith('---')) return { data: {}, body: raw };
- const end = raw.indexOf('\n---', 3);
- if (end === -1) return { data: {}, body: raw };
- const data = {};
- for (const line of raw.slice(3, end).split('\n')) {
- const m = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
- if (m) data[m[1]] = m[2].trim().replace(/^["']|["']$/g, '');
- }
- return { data, body: raw.slice(end + 4) };
- }
- const isTrue = (v) => v === true || v === 'true';
- const tidy = (r) => (r.replace(/\/{2,}/g, '/').replace(/(.)\/+$/, '$1') || '/');
- function titleOf(data, body, fallback) {
- if (data.title) return data.title;
- const h1 = body.match(/^#\s+(.+)$/m);
- return h1 ? h1[1].trim() : fallback;
- }
- const partialLabel = (rel) => {
- const b = basename(rel).replace(/^_/, '');
- return `${b.charAt(0).toUpperCase()}${b.slice(1)} (index)`;
- };
- // Docusaurus folder-index: index | README | same-name-as-folder → folder route.
- function folderIndex(rel) {
- const parts = rel.split('/');
- const last = parts[parts.length - 1].toLowerCase();
- const parent = parts.length >= 2 ? parts[parts.length - 2].toLowerCase() : null;
- const isIndex = last === 'index' || last === 'readme' || (parent && last === parent);
- return { isIndex, dir: parts.slice(0, -1).join('/') };
- }
- function docRoute(base, rel, slug) {
- if (slug && slug.startsWith('/')) return tidy(base + slug);
- const fi = folderIndex(rel);
- if (slug) {
- const dir = fi.isIndex ? fi.dir : rel.split('/').slice(0, -1).join('/');
- return tidy(`${base}/${dir}/${slug}`);
- }
- if (fi.isIndex) return tidy(fi.dir ? `${base}/${fi.dir}` : base);
- return tidy(`${base}/${rel}`);
- }
- // ---------- pass 1: nodes + a section-relative path → route map ----------
- const nodes = new Map(); // route -> {id,label,group,partial?}
- const byLc = new Map(); // lower(route) -> route
- const fileRoute = new Map(); // `${base}|${relPathNoExt}` -> route (+ folder aliases)
- const files = []; // {base, rel, route, body}
- const addNode = (route, label, group, partial = false) => {
- if (!nodes.has(route)) nodes.set(route, partial ? { id: route, label, group, partial: true } : { id: route, label, group });
- if (!byLc.has(route.toLowerCase())) byLc.set(route.toLowerCase(), route);
- };
- MANUAL_NODES.forEach((p) => addNode(p.route, p.label, p.group));
- let skipped = 0, noSlug = 0;
- for (const { dir, base, group } of DOC_SECTIONS) {
- for (const file of walk(join(ROOT, dir))) {
- const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
- if (isTrue(data.draft) || isTrue(data.unlisted) || isTrue(data.private)) { skipped++; continue; }
- const rel = relative(join(ROOT, dir), file).replace(/\.mdx?$/, '');
- if (basename(file).startsWith('_')) { // hub/index partial
- const id = `${base}/${rel}`;
- addNode(id, partialLabel(rel), 'index', true);
- files.push({ base, rel, route: id, body });
- continue;
- }
- const route = docRoute(base, rel, data.slug);
- addNode(route, titleOf(data, body, basename(rel)), group);
- fileRoute.set(`${base}|${rel}`, route);
- const fi = folderIndex(rel);
- if (fi.isIndex) fileRoute.set(`${base}|${fi.dir}`, route); // link to the folder itself
- files.push({ base, rel, route, body });
- }
- }
- for (const { dir, base, group } of BLOG_SECTIONS) {
- for (const file of walk(join(ROOT, dir))) {
- const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
- if (isTrue(data.draft) || isTrue(data.unlisted)) { skipped++; continue; }
- const rel = relative(join(ROOT, dir), file).replace(/\.mdx?$/, '');
- if (basename(file).startsWith('_')) { // _posts.md hub
- const id = `${base}/${rel}`;
- addNode(id, partialLabel(rel), 'index', true);
- files.push({ base, rel, route: id, body });
- continue;
- }
- let slug = data.slug;
- if (!slug) { slug = rel.replace(/^\d{4}-\d{2}-\d{2}-/, ''); noSlug++; }
- const route = tidy(`${base}/${slug}`);
- addNode(route, titleOf(data, body, slug), group);
- fileRoute.set(`${base}|${rel}`, route); // by filename (for _posts.md links)
- fileRoute.set(`${base}|${slug}`, route);
- files.push({ base, rel, route, body });
- }
- }
- // standalone pages (src/pages/*.md|mdx) — parse their bodies so About/Map/AI/Home connect
- for (const file of walk(join(ROOT, PAGES_DIR))) {
- const name = basename(file).replace(/\.mdx?$/, '');
- if (name.startsWith('_')) continue;
- const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
- const route = name === 'index' ? '/' : `/${name}`;
- addNode(route, titleOf(data, body, name), 'page');
- files.push({ base: '', rel: name, route, body }); // base '' → relative links resolve from site root
- }
- // ---------- link resolution ----------
- const matchRoute = (r) => (r == null ? null : nodes.has(r) ? r : byLc.get(r.toLowerCase()) || null);
- function resolveLink(href, base, rel) {
- if (!href) return null;
- let h = href.trim();
- if (/^https?:\/\//i.test(h)) {
- const m = h.match(/^https?:\/\/davidawindham\.com(\/.*)?$/i);
- if (!m) return null;
- h = m[1] || '/';
- }
- if (h.startsWith('mailto:') || h.startsWith('#')) return null;
- h = h.split('#')[0].split('?')[0];
- if (!h) return null;
- if (h.startsWith('/til/')) h = h.slice(4);
- else if (h === '/til') h = '/';
- h = h.replace(/\.mdx?$/i, '');
- if (h.startsWith('/')) { // absolute route or file path
- const exact = matchRoute(tidy(h));
- if (exact) return exact;
- const seg = h.replace(/^\//, '').split('/'); // .md link to a folder-index doc
- return fileRoute.get(`/${seg[0]}|${seg.slice(1).join('/')}`) || null;
- }
- const parts = (dirname(rel) + '/' + h).split('/'); // relative → resolve via file dir
- const st = [];
- for (const p of parts) { if (p === '' || p === '.') continue; if (p === '..') st.pop(); else st.push(p); }
- return fileRoute.get(`${base}|${st.join('/')}`) || matchRoute(tidy(base + '/' + st.join('/')));
- }
- // ---------- pass 2: edges ----------
- const seen = new Set();
- const links = [];
- const unresolved = new Map();
- const LINK_RE = /\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g;
- const HREF_RE = /href=["']([^"']+)["']/g;
- const CLICK_RE = /^\s*click\s+\S+\s+"([^"]+)"/gm; // mermaid: click NODE "url" (map.md)
- for (const { base, rel, route, body } of files) {
- const targets = new Set();
- let m;
- while ((m = LINK_RE.exec(body))) targets.add(m[1]);
- while ((m = HREF_RE.exec(body))) targets.add(m[1]);
- while ((m = CLICK_RE.exec(body))) targets.add(m[1]);
- for (const t of targets) {
- const dest = resolveLink(t, base, rel);
- if (!dest) {
- if (DEBUG && (t.startsWith('/') || (!/^https?:|^mailto:|^#/.test(t) && /\.mdx?($|#)/.test(t)))) {
- const k = t.split('#')[0]; unresolved.set(k, (unresolved.get(k) || 0) + 1);
- }
- continue;
- }
- if (dest === route) continue;
- const key = `${route}|${dest}`;
- if (seen.has(key)) continue;
- seen.add(key);
- links.push({ source: route, target: dest });
- }
- }
- // size nodes by total degree so hubs read as hubs
- const deg = new Map();
- for (const e of links) { deg.set(e.source, (deg.get(e.source) || 0) + 1); deg.set(e.target, (deg.get(e.target) || 0) + 1); }
- const nodeList = [...nodes.values()].map((n) => ({ ...n, val: 1 + (deg.get(n.id) || 0) }));
- mkdirSync(join(ROOT, 'src', 'data'), { recursive: true });
- writeFileSync(join(ROOT, 'src', 'data', 'graph.json'), JSON.stringify({ nodes: nodeList, links }, null, 0) + '\n');
- // ---------- report ----------
- const g = (grp) => nodeList.filter((n) => n.group === grp).length;
- const orphans = nodeList.filter((n) => !deg.has(n.id));
- console.log(`graph.json → ${nodeList.length} nodes, ${links.length} edges`);
- console.log(` nodes: docs=${g('docs')} notes=${g('notes')} lists=${g('lists')} posts=${g('posts')} pages=${g('page')} index=${g('index')}`);
- console.log(` post → page edges: ${links.filter((e) => e.source.startsWith('/posts/')).length}`);
- console.log(` orphans (no links in or out): ${orphans.length}`);
- console.log(` skipped drafts: ${skipped}; posts w/o slug: ${noSlug}`);
- if (DEBUG) {
- const top = [...unresolved.entries()].sort((a, b) => b[1] - a[1]).slice(0, 30);
- console.log('\n unresolved internal-looking targets:');
- for (const [t, c] of top) console.log(` ${c}× ${t}`);
- }
|