1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import { loadPageContent } from "https://esm.town/v/charlypoly/browserbaseUtils";
import { OpenAI } from "https://esm.town/v/std/openai?v=4";
import { z } from "npm:zod";
import { zodToJsonSchema } from "npm:zod-to-json-schema";
import { email } from "https://esm.town/v/std/email?v=12";
let html = await loadPageContent("https://news.ycombinator.com/", { textContent: false });
html = html.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ""); // remove useless context
// Then, we define the shape of the data to extract
// with a `zod` schema
const schema = z.object({
top: z
.array(
z.object({
title: z.string(),
points: z.number(),
by: z.string(),
url: z.string(),
}),
)
.length(5)
.describe("Top 5 stories on Hacker News"),
});
// we create a OpenAI Tool that takes our schema as argument
const extractContentTool: any = {
type: "function",
function: {
name: "extract_content",
description: "Extracts the content from the given webpage(s)",
// we convert our zod Schema to a JSON Schema
parameters: zodToJsonSchema(schema),
},
};
const openai = new OpenAI();
// We ask OpenAI to extract the content from the given web page.
// The model will reach out to our `extract_content` tool and
// by doing so, the model will extract the required data to satisfy
// the requirement of `extract_content`s argument.
const completion = await openai.chat.completions.create({
model: "gpt-4-turbo",
messages: [
{
role: "system",
content: "Extract the content from the given webpage(s)",
},
{ role: "user", content: html },
],
tools: [extractContentTool],
tool_choice: "auto",
});
// we retrieve the serialized arguments generated by OpenAI
const result = completion.choices[0].message.tool_calls![0].function.arguments;
// the serialized arguments are parsed into a valid JavaScript array of objects
const parsed = schema.parse(JSON.parse(result));
email({
subject: "Top 5 hacker news of the day",
text: JSON.stringify(parsed.top, null, 2)
})