build-graph.mjs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. #!/usr/bin/env node
  2. /**
  3. * build-graph.mjs — generate a {nodes, links} JSON of the site's internal link
  4. * graph for the on-site force-directed graph (src/pages/graph).
  5. *
  6. * Why this exists: md-graph (the VS Code extension) only resolves relative .md
  7. * links, so it misses this site's cross-section links (posts → docs/notes/lists),
  8. * which are written as absolute Docusaurus route URLs (/notes/house/helene).
  9. * Here we normalize BOTH link styles to canonical routes, so those edges appear.
  10. *
  11. * Routes mirror Docusaurus, including the folder-index convention: a doc named
  12. * `index`, `README`, OR the same as its parent folder (dogs/dogs.md) becomes the
  13. * folder's route (/notes/dogs) — not /notes/dogs/dogs.
  14. *
  15. * Underscore files (_posts.md, _computers.md, …) are Docusaurus "partials" with
  16. * no published route, but they're hand-maintained index/hub files that link to
  17. * many pages. We include them as non-navigable "index" nodes so those hub
  18. * connections show up (this is what md-graph does too).
  19. *
  20. * Output: src/data/graph.json → imported by the graph component.
  21. * Run: node scripts/build-graph.mjs (or `npm run graph`; DEBUG=1 for diagnostics)
  22. */
  23. import { readFileSync, writeFileSync, mkdirSync, readdirSync, statSync } from 'fs';
  24. import { join, relative, dirname, basename } from 'path';
  25. import { fileURLToPath } from 'url';
  26. const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
  27. const DEBUG = process.env.DEBUG === '1';
  28. const DOC_SECTIONS = [
  29. { dir: 'docs', base: '/docs', group: 'docs' },
  30. { dir: 'notes', base: '/notes', group: 'notes' },
  31. { dir: 'lists', base: '/lists', group: 'lists' },
  32. ];
  33. const BLOG_SECTIONS = [{ dir: 'posts', base: '/posts', group: 'posts' }];
  34. const PAGES_DIR = 'src/pages'; // about.md, ai.mdx, map.md, …
  35. const MANUAL_NODES = [ // routes with no .md in src/pages
  36. { route: '/graph', label: 'Graph', group: 'page' }, // graph.jsx
  37. { route: '/posts', label: 'Posts (blog index)', group: 'page' },
  38. ];
  39. // ---------- helpers ----------
  40. function walk(dir) {
  41. const out = [];
  42. let entries;
  43. try { entries = readdirSync(dir); } catch { return out; }
  44. for (const name of entries) {
  45. if (name.startsWith('.') || name === 'node_modules') continue; // skip .obsidian etc.
  46. const full = join(dir, name);
  47. const st = statSync(full);
  48. if (st.isDirectory()) out.push(...walk(full));
  49. else if (/\.mdx?$/.test(name)) out.push(full); // include _partials (hubs)
  50. }
  51. return out;
  52. }
  53. function parseFrontmatter(raw) {
  54. if (!raw.startsWith('---')) return { data: {}, body: raw };
  55. const end = raw.indexOf('\n---', 3);
  56. if (end === -1) return { data: {}, body: raw };
  57. const data = {};
  58. for (const line of raw.slice(3, end).split('\n')) {
  59. const m = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
  60. if (m) data[m[1]] = m[2].trim().replace(/^["']|["']$/g, '');
  61. }
  62. return { data, body: raw.slice(end + 4) };
  63. }
  64. const isTrue = (v) => v === true || v === 'true';
  65. const tidy = (r) => (r.replace(/\/{2,}/g, '/').replace(/(.)\/+$/, '$1') || '/');
  66. function titleOf(data, body, fallback) {
  67. if (data.title) return data.title;
  68. const h1 = body.match(/^#\s+(.+)$/m);
  69. return h1 ? h1[1].trim() : fallback;
  70. }
  71. const partialLabel = (rel) => {
  72. const b = basename(rel).replace(/^_/, '');
  73. return `${b.charAt(0).toUpperCase()}${b.slice(1)} (index)`;
  74. };
  75. // Docusaurus folder-index: index | README | same-name-as-folder → folder route.
  76. function folderIndex(rel) {
  77. const parts = rel.split('/');
  78. const last = parts[parts.length - 1].toLowerCase();
  79. const parent = parts.length >= 2 ? parts[parts.length - 2].toLowerCase() : null;
  80. const isIndex = last === 'index' || last === 'readme' || (parent && last === parent);
  81. return { isIndex, dir: parts.slice(0, -1).join('/') };
  82. }
  83. function docRoute(base, rel, slug) {
  84. if (slug && slug.startsWith('/')) return tidy(base + slug);
  85. const fi = folderIndex(rel);
  86. if (slug) {
  87. const dir = fi.isIndex ? fi.dir : rel.split('/').slice(0, -1).join('/');
  88. return tidy(`${base}/${dir}/${slug}`);
  89. }
  90. if (fi.isIndex) return tidy(fi.dir ? `${base}/${fi.dir}` : base);
  91. return tidy(`${base}/${rel}`);
  92. }
  93. // ---------- pass 1: nodes + a section-relative path → route map ----------
  94. const nodes = new Map(); // route -> {id,label,group,partial?}
  95. const byLc = new Map(); // lower(route) -> route
  96. const fileRoute = new Map(); // `${base}|${relPathNoExt}` -> route (+ folder aliases)
  97. const files = []; // {base, rel, route, body}
  98. const addNode = (route, label, group, partial = false) => {
  99. if (!nodes.has(route)) nodes.set(route, partial ? { id: route, label, group, partial: true } : { id: route, label, group });
  100. if (!byLc.has(route.toLowerCase())) byLc.set(route.toLowerCase(), route);
  101. };
  102. MANUAL_NODES.forEach((p) => addNode(p.route, p.label, p.group));
  103. let skipped = 0, noSlug = 0;
  104. for (const { dir, base, group } of DOC_SECTIONS) {
  105. for (const file of walk(join(ROOT, dir))) {
  106. const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
  107. if (isTrue(data.draft) || isTrue(data.unlisted) || isTrue(data.private)) { skipped++; continue; }
  108. const rel = relative(join(ROOT, dir), file).replace(/\.mdx?$/, '');
  109. if (basename(file).startsWith('_')) { // hub/index partial
  110. const id = `${base}/${rel}`;
  111. addNode(id, partialLabel(rel), 'index', true);
  112. files.push({ base, rel, route: id, body });
  113. continue;
  114. }
  115. const route = docRoute(base, rel, data.slug);
  116. addNode(route, titleOf(data, body, basename(rel)), group);
  117. fileRoute.set(`${base}|${rel}`, route);
  118. const fi = folderIndex(rel);
  119. if (fi.isIndex) fileRoute.set(`${base}|${fi.dir}`, route); // link to the folder itself
  120. files.push({ base, rel, route, body });
  121. }
  122. }
  123. for (const { dir, base, group } of BLOG_SECTIONS) {
  124. for (const file of walk(join(ROOT, dir))) {
  125. const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
  126. if (isTrue(data.draft) || isTrue(data.unlisted)) { skipped++; continue; }
  127. const rel = relative(join(ROOT, dir), file).replace(/\.mdx?$/, '');
  128. if (basename(file).startsWith('_')) { // _posts.md hub
  129. const id = `${base}/${rel}`;
  130. addNode(id, partialLabel(rel), 'index', true);
  131. files.push({ base, rel, route: id, body });
  132. continue;
  133. }
  134. let slug = data.slug;
  135. if (!slug) { slug = rel.replace(/^\d{4}-\d{2}-\d{2}-/, ''); noSlug++; }
  136. const route = tidy(`${base}/${slug}`);
  137. addNode(route, titleOf(data, body, slug), group);
  138. fileRoute.set(`${base}|${rel}`, route); // by filename (for _posts.md links)
  139. fileRoute.set(`${base}|${slug}`, route);
  140. files.push({ base, rel, route, body });
  141. }
  142. }
  143. // standalone pages (src/pages/*.md|mdx) — parse their bodies so About/Map/AI/Home connect
  144. for (const file of walk(join(ROOT, PAGES_DIR))) {
  145. const name = basename(file).replace(/\.mdx?$/, '');
  146. if (name.startsWith('_')) continue;
  147. const { data, body } = parseFrontmatter(readFileSync(file, 'utf8'));
  148. const route = name === 'index' ? '/' : `/${name}`;
  149. addNode(route, titleOf(data, body, name), 'page');
  150. files.push({ base: '', rel: name, route, body }); // base '' → relative links resolve from site root
  151. }
  152. // ---------- link resolution ----------
  153. const matchRoute = (r) => (r == null ? null : nodes.has(r) ? r : byLc.get(r.toLowerCase()) || null);
  154. function resolveLink(href, base, rel) {
  155. if (!href) return null;
  156. let h = href.trim();
  157. if (/^https?:\/\//i.test(h)) {
  158. const m = h.match(/^https?:\/\/davidawindham\.com(\/.*)?$/i);
  159. if (!m) return null;
  160. h = m[1] || '/';
  161. }
  162. if (h.startsWith('mailto:') || h.startsWith('#')) return null;
  163. h = h.split('#')[0].split('?')[0];
  164. if (!h) return null;
  165. if (h.startsWith('/til/')) h = h.slice(4);
  166. else if (h === '/til') h = '/';
  167. h = h.replace(/\.mdx?$/i, '');
  168. if (h.startsWith('/')) { // absolute route or file path
  169. const exact = matchRoute(tidy(h));
  170. if (exact) return exact;
  171. const seg = h.replace(/^\//, '').split('/'); // .md link to a folder-index doc
  172. return fileRoute.get(`/${seg[0]}|${seg.slice(1).join('/')}`) || null;
  173. }
  174. const parts = (dirname(rel) + '/' + h).split('/'); // relative → resolve via file dir
  175. const st = [];
  176. for (const p of parts) { if (p === '' || p === '.') continue; if (p === '..') st.pop(); else st.push(p); }
  177. return fileRoute.get(`${base}|${st.join('/')}`) || matchRoute(tidy(base + '/' + st.join('/')));
  178. }
  179. // ---------- pass 2: edges ----------
  180. const seen = new Set();
  181. const links = [];
  182. const unresolved = new Map();
  183. const LINK_RE = /\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g;
  184. const HREF_RE = /href=["']([^"']+)["']/g;
  185. const CLICK_RE = /^\s*click\s+\S+\s+"([^"]+)"/gm; // mermaid: click NODE "url" (map.md)
  186. for (const { base, rel, route, body } of files) {
  187. const targets = new Set();
  188. let m;
  189. while ((m = LINK_RE.exec(body))) targets.add(m[1]);
  190. while ((m = HREF_RE.exec(body))) targets.add(m[1]);
  191. while ((m = CLICK_RE.exec(body))) targets.add(m[1]);
  192. for (const t of targets) {
  193. const dest = resolveLink(t, base, rel);
  194. if (!dest) {
  195. if (DEBUG && (t.startsWith('/') || (!/^https?:|^mailto:|^#/.test(t) && /\.mdx?($|#)/.test(t)))) {
  196. const k = t.split('#')[0]; unresolved.set(k, (unresolved.get(k) || 0) + 1);
  197. }
  198. continue;
  199. }
  200. if (dest === route) continue;
  201. const key = `${route}|${dest}`;
  202. if (seen.has(key)) continue;
  203. seen.add(key);
  204. links.push({ source: route, target: dest });
  205. }
  206. }
  207. // size nodes by total degree so hubs read as hubs
  208. const deg = new Map();
  209. for (const e of links) { deg.set(e.source, (deg.get(e.source) || 0) + 1); deg.set(e.target, (deg.get(e.target) || 0) + 1); }
  210. const nodeList = [...nodes.values()].map((n) => ({ ...n, val: 1 + (deg.get(n.id) || 0) }));
  211. mkdirSync(join(ROOT, 'src', 'data'), { recursive: true });
  212. writeFileSync(join(ROOT, 'src', 'data', 'graph.json'), JSON.stringify({ nodes: nodeList, links }, null, 0) + '\n');
  213. // ---------- report ----------
  214. const g = (grp) => nodeList.filter((n) => n.group === grp).length;
  215. const orphans = nodeList.filter((n) => !deg.has(n.id));
  216. console.log(`graph.json → ${nodeList.length} nodes, ${links.length} edges`);
  217. console.log(` nodes: docs=${g('docs')} notes=${g('notes')} lists=${g('lists')} posts=${g('posts')} pages=${g('page')} index=${g('index')}`);
  218. console.log(` post → page edges: ${links.filter((e) => e.source.startsWith('/posts/')).length}`);
  219. console.log(` orphans (no links in or out): ${orphans.length}`);
  220. console.log(` skipped drafts: ${skipped}; posts w/o slug: ${noSlug}`);
  221. if (DEBUG) {
  222. const top = [...unresolved.entries()].sort((a, b) => b[1] - a[1]).slice(0, 30);
  223. console.log('\n unresolved internal-looking targets:');
  224. for (const [t, c] of top) console.log(` ${c}× ${t}`);
  225. }