1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/** @jsxImportSource https://esm.sh/react */
import cheerio from "https://esm.sh/cheerio@1.0.0-rc.12";
import dagre from "https://esm.sh/cytoscape-dagre@2.5.0";
import cytoscape from "https://esm.sh/cytoscape@3.23.0";
import React, { useEffect, useRef, useState } from "https://esm.sh/react";
import { createRoot } from "https://esm.sh/react-dom/client";
cytoscape.use(dagre);
const TITLE_SELECTOR = "#firstHeading > span";
function labelPredicate(label) {
return label === "Father" || label === "Mother" || /^Parent/.test(label) || label === "Children" || /^Child/.test(label);
}
async function* crawlWikipedia(
url: string,
generation: number = 0,
visited: Set<string> = new Set(),
): AsyncGenerator<[string, string, string[], number], void, unknown> {
const isDescendant = generation < 0;
if (Math.abs(generation) > 10) return; // Cap at 10 generations in either direction
if (visited.has(url)) {
return;
}
visited.add(url);
const response = await fetch(url);
const html = await response.text();
const $ = cheerio.load(html);
const title = $(TITLE_SELECTOR).text().trim();
const familyLinks: string[] = [];
$("table.infobox.vcard > tbody > tr").each((_, row) => {
const $row = $(row);
const label = $row.find("th.infobox-label").text().trim();
if (labelPredicate(label)) {
const links = $row.find("td a:not(.extiw)").map((_, el) => $(el).attr("href")).get();
familyLinks.push(...links);
}
});
yield [url, title, familyLinks, generation];
for (const link of familyLinks) {
const nextUrl = new URL(link, url).toString();
if (isDescendant) {
yield* crawlWikipedia(nextUrl, generation - 1, visited);
} else {
yield* crawlWikipedia(nextUrl, generation + 1, visited);
}
}
// Crawl descendants for the initial person
if (generation === 0) {
yield* crawlWikipedia(url, -1, new Set());
}
}
function App() {
const [url, setUrl] = useState("");
const [nodes, setNodes] = useState([]);
const [edges, setEdges] = useState([]);
const cyRef = useRef(null);
const [isLoading, setIsLoading] = useState(false);
const [isCytoscapeDisabled, setIsCytoscapeDisabled] = useState(false);
const [treeDepth, setTreeDepth] = useState({ ancestors: 0, descendants: 0 });
const [maxGenerations, setMaxGenerations] = useState(20);
const handleSubmit = async (e, submittedUrl = null) => {
if (e) e.preventDefault();
setIsLoading(true);
const urlToSubmit = submittedUrl || url;
const encodedUrl = encodeURIComponent(urlToSubmit);
setNodes([]);
setIsCytoscapeDisabled(true);
setEdges([]);
setTreeDepth({ ancestors: 0, descendants: 0 });
const eventSource = new EventSource(`/stream?url=${encodedUrl}`);
const existingNodes = new Set();
eventSource.onmessage = (event) => {
const [nodeUrl, name, parentLinks, generation] = JSON.parse(event.data);
if (!existingNodes.has(nodeUrl)) {
setNodes(prevNodes => [...prevNodes, { data: { id: nodeUrl, label: name } }]);
existingNodes.add(nodeUrl);
}
if (treeDepth.ancestors + treeDepth.descendants >= maxGenerations) {
eventSource.close();
}
setTreeDepth(prevDepth => ({
ancestors: generation >= 0 ? Math.max(prevDepth.ancestors, generation) : prevDepth.ancestors,
descendants: generation < 0 ? Math.max(prevDepth.descendants, Math.abs(generation)) : prevDepth.descendants,