import puppeteer, { Page } from "https://deno.land/x/puppeteer@16.2.0/mod.ts";
export interface LoadPageOptions {
textContent: boolean;
}
export async function loadPageContent(url: string, options: LoadPageOptions = { textContent: false }) {
const browser = await puppeteer.connect({
browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${Deno.env.get("BROWSERBASE_API_KEY")}`,
});
const pages = await browser.pages();
const page = pages[0];
await page.goto(url);
let html = await page.content();
if (options.textContent) {
const readable: { title?: string; textContent?: string } = await page.evaluate(`
import('https://cdn.skypack.dev/@mozilla/readability').then(readability => {
return new readability.Readability(document).parse()
})`);
html = `${readable.title}\n${readable.textContent}`;
}
await page.close();
await browser.close();
return html;
}
export interface ScreenshotOptions {
fullPage: boolean;
}
export async function screenshotPage(url: string, options: ScreenshotOptions = { fullPage: true }) {
const browser = await puppeteer.connect({
browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${Deno.env.get("BROWSERBASE_API_KEY")}`,
});
const pages = await browser.pages();
const page = pages[0];
await page.goto(url);
const buffer = await page.screenshot({ fullPage: options.fullPage });
await page.close();
await browser.close();
return buffer;
}