Back

Version 3

9/2/2024
/** @jsxImportSource https://esm.sh/react */
import cheerio from "https://esm.sh/cheerio@1.0.0-rc.12";
import dagre from "https://esm.sh/cytoscape-dagre@2.5.0";
import cytoscape from "https://esm.sh/cytoscape@3.23.0";
import React, { useEffect, useRef, useState } from "https://esm.sh/react";
import { createRoot } from "https://esm.sh/react-dom/client";

cytoscape.use(dagre);

const TITLE_SELECTOR = "#firstHeading > span";

function labelPredicate(label) {
return label === "Father" || label === "Mother" || /^Parent/.test(label);
}

async function* crawlWikipedia(
url: string,
generation: number = 0,
visited: Set<string> = new Set(),
): AsyncGenerator<[string, string, string[], number], void, unknown> {
if (visited.has(url)) {
return;
}
visited.add(url);

const response = await fetch(url);
const html = await response.text();
const $ = cheerio.load(html);

const title = $(TITLE_SELECTOR).text().trim();
const parentLinks: string[] = [];

$("table.infobox.vcard > tbody > tr").each((_, row) => {
const $row = $(row);
const label = $row.find("th.infobox-label").text().trim();

stevekrouse-infoboxcrawler.web.val.run
Updated: September 2, 2024