1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import puppeteer, { Page } from "https://deno.land/x/puppeteer@16.2.0/mod.ts";
export interface LoadPageOptions {
textContent: boolean;
}
export async function loadPageContent(url: string, options: LoadPageOptions = { textContent: false }) {
const browser = await puppeteer.connect({
browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${Deno.env.get("BROWSERBASE_API_KEY")}`,
});
const pages = await browser.pages();
const page = pages[0];
await page.goto(url);
let html = await page.content();
if (options.textContent) {
const readable: { title?: string; textContent?: string } = await page.evaluate(`
import('https://cdn.skypack.dev/@mozilla/readability').then(readability => {
return new readability.Readability(document).parse()
})`);
html = `${readable.title}\n${readable.textContent}`;
}
await page.close();
await browser.close();
return html;
}
export interface ScreenshotOptions {
fullPage: boolean;
}
export async function screenshotPage(url: string, options: ScreenshotOptions = { fullPage: true }) {
const browser = await puppeteer.connect({
browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${Deno.env.get("BROWSERBASE_API_KEY")}`,
});
const pages = await browser.pages();
const page = pages[0];
await page.goto(url);
const buffer = await page.screenshot({ fullPage: options.fullPage });
await page.close();
await browser.close();
return buffer;
}