build-graph.mjs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. #!/usr/bin/env node
  2. /**
  3. * build-graph.mjs — generate a {nodes, links} JSON of the site's internal link
  4. * graph for the on-site force-directed graph (src/pages/graph).
  5. *
  6. * Why this exists: md-graph (the VS Code extension) only resolves relative .md
  7. * links, so it misses this site's cross-section links (posts → docs/notes/lists),
  8. * which are written as absolute Docusaurus route URLs (/notes/house/helene).
  9. * Here we normalize BOTH link styles to canonical routes, so those edges appear.
  10. *
  11. * Routes mirror Docusaurus, including the folder-index convention: a doc named
  12. * `index`, `README`, OR the same as its parent folder (dogs/dogs.md) becomes the
  13. * folder's route (/notes/dogs) — not /notes/dogs/dogs.
  14. *
  15. * Underscore files (_posts.md, _computers.md, …) are Docusaurus "partials" with
  16. * no published route, but they're hand-maintained index/hub files that link to
  17. * many pages. We include them as non-navigable "index" nodes so those hub
  18. * connections show up (this is what md-graph does too).
  19. *
  20. * Output: src/data/graph.json → imported by the graph component.
  21. * Run: node scripts/build-graph.mjs (or `npm run graph`; DEBUG=1 for diagnostics)
  22. */
  23. import { readFileSync, writeFileSync, mkdirSync, readdirSync, statSync } from 'fs';
  24. import { join, relative, dirname, basename } from 'path';
  25. import { fileURLToPath } from 'url';
  26. const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
  27. const DEBUG = process.env.DEBUG === '1';
  28. const DOC_SECTIONS = [
  29. { dir: 'docs', base: '/docs', group: 'docs' },
  30. { dir: 'notes', base: '/notes', group: 'notes' },
  31. { dir: 'lists', base: '/lists', group: 'lists' },
  32. ];
  33. const BLOG_SECTIONS = [{ dir: 'posts', base: '/posts', group: 'posts' }];
  34. const PAGES = [
  35. { route: '/', label: 'Home', group: 'page' },
  36. { route: '/ai', label: 'AI Assistant', group: 'page' },
  37. { route: '/help', label: 'Help', group: 'page' },
  38. { route: '/map', label: 'Map', group: 'page' },
  39. { route: '/about', label: 'About', group: 'page' },
  40. { route: '/posts', label: 'Posts (blog index)', group: 'page' },
  41. ];
  42. // ---------- helpers ----------
  43. function walk(dir) {
  44. const out = [];
  45. let entries;
  46. try { entries = readdirSync(dir); } catch { return out; }
  47. for (const name of entries) {
  48. if (name.startsWith('.') || name === 'node_modules') continue; // skip .obsidian etc.
  49. const full = join(dir, name);
  50. const st = statSync(full);
  51. if (st.isDirectory()) out.push(...walk(full));
  52. else if (/\.mdx?$/.test(name)) out.push(full); // include _partials (hubs)
  53. }
  54. return out;
  55. }
  56. function parseFrontmatter(raw) {
  57. if (!raw.startsWith('---')) return { data: {}, body: raw };
  58. const end = raw.indexOf('\n---', 3);
  59. if (end === -1) return { data: {}, body: raw };
  60. const data = {};
  61. for (const line of raw.slice(3, end).split('\n')) {
  62. const m = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
  63. if (m) data[m[1]] = m[2].trim().replace(/^["']|["']$/g, '');
  64. }
  65. return { data, body: raw.slice(end + 4) };
  66. }
  67. const isTrue = (v) => v === true || v === 'true';
  68. const tidy = (r) => (r.replace(/\/{2,}/g, '/').replace(/(.)\/+$/, '$1') || '/');
  69. function titleOf(data, body, fallback) {
  70. if (data.title) return data.title;
  71. const h1 = body.match(/^#\s+(.+)$/m);
  72. return h1 ? h1[1].trim() : fallback;
  73. }
  74. const partialLabel = (rel) => {
  75. const b = basename(rel).replace(/^_/, '');
  76. return `${b.charAt(0).toUpperCase()}${b.slice(1)} (index)`;
  77. };
  78. // Docusaurus folder-index: index | README | same-name-as-folder → folder route.
  79. function folderIndex(rel) {
  80. const parts = rel.split('/');
  81. const last = parts[parts.length - 1].toLowerCase();
  82. const parent = parts.length >= 2 ? parts[parts.length - 2].toLowerCase() : null;
  83. const isIndex = last === 'index' || last === 'readme' || (parent && last === parent);
  84. return { isIndex, dir: parts.slice(0, -1).join('/') };
  85. }
  86. function docRoute(base, rel, slug) {
  87. if (slug && slug.startsWith('/')) return tidy(base + slug);
  88. const fi = folderIndex(rel);
  89. if (slug) {
  90. const dir = fi.isIndex ? fi.dir : rel.split('/').slice(0, -1).join('/');
  91. return tidy(`${base}/${dir}/${slug}`);
  92. }
  93. if (fi.isIndex) return tidy(fi.dir ? `${base}/${fi.dir}` : base);
  94. return tidy(`${base}/${rel}`);
  95. }
  96. // ---------- pass 1: nodes + a section-relative path → route map ----------
  97. const nodes = new Map(); // route -> {id,label,group,partial?}
  98. const byLc = new Map(); // lower(route) -> route
  99. const fileRoute = new Map(); // `${base}|${relPathNoExt}` -> route (+ folder aliases)
  100. const files = []; // {base, rel, route, body}
  101. const addNode = (route, label, group, partial = false) => {
  102. if (!nodes.has(route)) nodes.set(route, partial ? { id: route, label, group, partial: true } : { id: route, label, group });
  103. if (!byLc.has(route.toLowerCase())) byLc.set(route.toLowerCase(), route);
  104. };
  105. PAGES.forEach((p) => addNode(p.route, p.label, p.group));
  106. let skipped = 0, noSlug = 0;
  107. for (const { dir, base, group } of DOC_SECTIONS) {
  108. for (const file of walk(join(ROOT, dir))) {
  109. const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
  110. if (isTrue(data.draft) || isTrue(data.unlisted) || isTrue(data.private)) { skipped++; continue; }
  111. const rel = relative(join(ROOT, dir), file).replace(/\.mdx?$/, '');
  112. if (basename(file).startsWith('_')) { // hub/index partial
  113. const id = `${base}/${rel}`;
  114. addNode(id, partialLabel(rel), 'index', true);
  115. files.push({ base, rel, route: id, body });
  116. continue;
  117. }
  118. const route = docRoute(base, rel, data.slug);
  119. addNode(route, titleOf(data, body, basename(rel)), group);
  120. fileRoute.set(`${base}|${rel}`, route);
  121. const fi = folderIndex(rel);
  122. if (fi.isIndex) fileRoute.set(`${base}|${fi.dir}`, route); // link to the folder itself
  123. files.push({ base, rel, route, body });
  124. }
  125. }
  126. for (const { dir, base, group } of BLOG_SECTIONS) {
  127. for (const file of walk(join(ROOT, dir))) {
  128. const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
  129. if (isTrue(data.draft) || isTrue(data.unlisted)) { skipped++; continue; }
  130. const rel = relative(join(ROOT, dir), file).replace(/\.mdx?$/, '');
  131. if (basename(file).startsWith('_')) { // _posts.md hub
  132. const id = `${base}/${rel}`;
  133. addNode(id, partialLabel(rel), 'index', true);
  134. files.push({ base, rel, route: id, body });
  135. continue;
  136. }
  137. let slug = data.slug;
  138. if (!slug) { slug = rel.replace(/^\d{4}-\d{2}-\d{2}-/, ''); noSlug++; }
  139. const route = tidy(`${base}/${slug}`);
  140. addNode(route, titleOf(data, body, slug), group);
  141. fileRoute.set(`${base}|${rel}`, route); // by filename (for _posts.md links)
  142. fileRoute.set(`${base}|${slug}`, route);
  143. files.push({ base, rel, route, body });
  144. }
  145. }
  146. // ---------- link resolution ----------
  147. const matchRoute = (r) => (r == null ? null : nodes.has(r) ? r : byLc.get(r.toLowerCase()) || null);
  148. function resolveLink(href, base, rel) {
  149. if (!href) return null;
  150. let h = href.trim();
  151. if (/^https?:\/\//i.test(h)) {
  152. const m = h.match(/^https?:\/\/davidawindham\.com(\/.*)?$/i);
  153. if (!m) return null;
  154. h = m[1] || '/';
  155. }
  156. if (h.startsWith('mailto:') || h.startsWith('#')) return null;
  157. h = h.split('#')[0].split('?')[0];
  158. if (!h) return null;
  159. if (h.startsWith('/til/')) h = h.slice(4);
  160. else if (h === '/til') h = '/';
  161. h = h.replace(/\.mdx?$/i, '');
  162. if (h.startsWith('/')) { // absolute route or file path
  163. const exact = matchRoute(tidy(h));
  164. if (exact) return exact;
  165. const seg = h.replace(/^\//, '').split('/'); // .md link to a folder-index doc
  166. return fileRoute.get(`/${seg[0]}|${seg.slice(1).join('/')}`) || null;
  167. }
  168. const parts = (dirname(rel) + '/' + h).split('/'); // relative → resolve via file dir
  169. const st = [];
  170. for (const p of parts) { if (p === '' || p === '.') continue; if (p === '..') st.pop(); else st.push(p); }
  171. return fileRoute.get(`${base}|${st.join('/')}`) || matchRoute(tidy(base + '/' + st.join('/')));
  172. }
  173. // ---------- pass 2: edges ----------
  174. const seen = new Set();
  175. const links = [];
  176. const unresolved = new Map();
  177. const LINK_RE = /\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g;
  178. const HREF_RE = /href=["']([^"']+)["']/g;
  179. for (const { base, rel, route, body } of files) {
  180. const targets = new Set();
  181. let m;
  182. while ((m = LINK_RE.exec(body))) targets.add(m[1]);
  183. while ((m = HREF_RE.exec(body))) targets.add(m[1]);
  184. for (const t of targets) {
  185. const dest = resolveLink(t, base, rel);
  186. if (!dest) {
  187. if (DEBUG && (t.startsWith('/') || (!/^https?:|^mailto:|^#/.test(t) && /\.mdx?($|#)/.test(t)))) {
  188. const k = t.split('#')[0]; unresolved.set(k, (unresolved.get(k) || 0) + 1);
  189. }
  190. continue;
  191. }
  192. if (dest === route) continue;
  193. const key = `${route}|${dest}`;
  194. if (seen.has(key)) continue;
  195. seen.add(key);
  196. links.push({ source: route, target: dest });
  197. }
  198. }
  199. // size nodes by total degree so hubs read as hubs
  200. const deg = new Map();
  201. for (const e of links) { deg.set(e.source, (deg.get(e.source) || 0) + 1); deg.set(e.target, (deg.get(e.target) || 0) + 1); }
  202. const nodeList = [...nodes.values()].map((n) => ({ ...n, val: 1 + (deg.get(n.id) || 0) }));
  203. mkdirSync(join(ROOT, 'src', 'data'), { recursive: true });
  204. writeFileSync(join(ROOT, 'src', 'data', 'graph.json'), JSON.stringify({ nodes: nodeList, links }, null, 0) + '\n');
  205. // ---------- report ----------
  206. const g = (grp) => nodeList.filter((n) => n.group === grp).length;
  207. const orphans = nodeList.filter((n) => !deg.has(n.id));
  208. console.log(`graph.json → ${nodeList.length} nodes, ${links.length} edges`);
  209. console.log(` nodes: docs=${g('docs')} notes=${g('notes')} lists=${g('lists')} posts=${g('posts')} pages=${g('page')} index=${g('index')}`);
  210. console.log(` post → page edges: ${links.filter((e) => e.source.startsWith('/posts/')).length}`);
  211. console.log(` orphans (no links in or out): ${orphans.length}`);
  212. console.log(` skipped drafts: ${skipped}; posts w/o slug: ${noSlug}`);
  213. if (DEBUG) {
  214. const top = [...unresolved.entries()].sort((a, b) => b[1] - a[1]).slice(0, 30);
  215. console.log('\n unresolved internal-looking targets:');
  216. for (const [t, c] of top) console.log(` ${c}× ${t}`);
  217. }