Public
HTTP (deprecated)
Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { html } from "https://esm.town/v/stevekrouse/html?v=5";
import { Readability } from "npm:@mozilla/readability";
// @ts-expect-error
import jsdom from "npm:jsdom";
type Block = {
type: string;
text?: string;
content?: any;
};
type Article = {
title: string;
content: string | Array<Block>;
textContent: string;
length: number;
excerpt: string;
byline: string;
dir: string;
siteName: string;
lang: string;
publishedTime: string;
url: string;
};
export default async function(req: Request): Promise<Response> {
let article_url;
try {
let req_url = new URL(req.url)?.pathname?.substring(1);
article_url = new URL(req_url);
} catch {
let random_articles = await fetch("https://api.getmatter.com/tools/api/rsw_entries/?format=json");
let json = await random_articles.json();
let article = json.results[Math.floor(Math.random() * json.results.length)];
article_url = new URL(article.url);
}
let article_page = await fetch(article_url);
const JSDOM = jsdom.JSDOM;
let article_body = await article_page.text();
let dom = new JSDOM(article_body);
let reader = new Readability(dom.window.document);
let article: Article = { ...reader.parse(), url: article_url.href };
let readability_dom = new JSDOM(article.content);
const readability_dom_root = readability_dom.window.document.querySelector("#readability-page-1");
const sanitized_readability_article = parseDOM(readability_dom_root);
article.content = sanitized_readability_article;
// const original_article = walkDOM(readability_dom_root);
// const result = [article_json, original_article];
return Response.json(article);
}
function parseDOM(element): Array<Block> {
let type: string = element.tagName.toLowerCase();
switch (type) {
case "h1":
case "h2":
case "h3":
case "h4":
case "h5":
case "h6":
case "p":
case "li":
case "i":
case "em":
case "b":
case "strong":
case "blockquote":
{
let text = element.textContent;
text = text.replace(/\\(["'\\])/g, `$1`); // remove escaped characters
const isRichText = /^(i|em|b|strong|li)$/;
if (isRichText.test(type)) type = "p";
let paragraphs = text.split("\n");
paragraphs = paragraphs.map((p) => p.trim());
if (text.length) {
return [{
type,
text,
}];
} else {
return [];
}
}
case "ul":
case "ol": {
let list_items = [];
for (let child of element.childNodes) {
if (child.nodeType === 1) { // Node.ELEMENT_NODE == 1
thesolarmonk-readability.web.val.run
September 9, 2024