Public
HTTP (deprecated)
Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
Readme

fetch the contents of the Wikipedia "On this day in history" page. defaults to JSON output, but specify ?format=textor ?format=html for other outputs. e.g.

#wikipedia

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
import { format } from "npm:date-fns";
// As of July 20, 2024, Wikipedia has switched these URLs to e.g. "July_20" and "Selected_anniversaries/July_20"
// Previously: https://en.wikipedia.org/wiki/Wikipedia:On_this_day/Today
const todayFormatted = format(new Date(), "MMMM d").replace(" ", "_");
// const onThisDayUrl = `https://en.wikipedia.org/wiki/Wikipedia:Selected_anniversaries/${todayFormatted}`;
const onThisDayUrl = `https://en.wikipedia.org/wiki/${todayFormatted}`;
console.log({ todayFormatted, onThisDayUrl });
// Doing some homework on other interesting pages...
//
// Homepage sections:
// https://wikipedia.org
// - Today's Featured Article (TFA): https://en.wikipedia.org/w/index.php?title=Wikipedia:TFA
// - Did you know...
// - In The News: https://en.wikipedia.org/wiki/Portal:Current_events
// - On This Day in History: https://en.wikipedia.org/wiki/Wikipedia:On_this_day/Today
//
// Special pages:
// https://en.wikipedia.org/wiki/Special:SpecialPages
// - Recent additions: https://en.wikipedia.org/wiki/Wikipedia:Recent_additions
// - Random article: https://en.wikipedia.org/wiki/Special:Random
// - Random article in category (e.g. Internet memes): https://en.wikipedia.org/wiki/Special:RandomInCategory?wpcategory=Internet+memes
// - Search: TODO
//
// More ideas...
// Content portals: https://en.wikipedia.org/wiki/Wikipedia:Contents/Portals
//
const genericBodyParse = ($) => {
const body = $("#mw-content-text").first().text();
console.log("body", body);
const bodySplit = body.split(".mw-parser-output");
let parsedBody = bodySplit[0] ?? bodySplit[1] ?? bodySplit[2]; // awkward
console.log("parsedBody1", parsedBody);
parsedBody = parsedBody.split("\n").slice(1, -1).join("\n").trim();
console.log("parsedBody2", parsedBody);
return parsedBody;
};
const fetchAndParsePage = async (url: string) => {
const html = await fetchText(url);
const $ = load(html);
let parsedBody;
if (url == onThisDayUrl) {
// pull all <ul>'s out and combine together
// this is fetching all 3 major sections (Events, Births, Deaths) but I really only care about Events
parsedBody = $(".mw-content-ltr > ul").text();
// FIXME this isn't quite working but is what I want...
// parsedBody = $(".mw-heading").first().nextUntil(".mw-heading", "ul").text();
// FIXME in the meantime, hack to truncate :-(
const maxLength = 9_000;
parsedBody = parsedBody.slice(0, maxLength);
} else {
// rough default WP page parsing. pretty inconsistent
parsedBody = genericBodyParse($);
}
console.log({ url, parsedBody });
return parsedBody;
};
const allowedFormats = ["html", "json", "text"];
export const wikipediaToday = async (req: Request) => {
const searchParams = new URL(req.url).searchParams;
const url = searchParams.get("url") ?? onThisDayUrl;
const format = searchParams.get("format") ?? "html";
if (!allowedFormats.includes(format)) {
return new Response(
`unsupported format '${format}'. allowed formats: ${
allowedFormats.join(
", ",
)
}`,
{ status: 400 },
);
}
const data = await fetchAndParsePage(url);
if (format == "json") {
return Response.json({ data: data.split("\n") });
} else if (format == "html") {
return new Response(
`<!DOCTYPE html><html lang="en"><body><pre>${data}</pre></body></html>`,
{
headers: { "Content-Type": "text/html" },
},
);
} else if (format == "text") {
return new Response(data, { headers: { "Content-Type": "text/plain" } });
} else {
throw new Error("unreachable");
}
};
jamiedubs-wikipediatoday.web.val.run
August 14, 2024