Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
Readme

Use Browserbase and Puppeteer to monitor and scrape reddit search results.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import { PuppeteerDeno } from "https://deno.land/x/puppeteer@16.2.0/src/deno/Puppeteer.ts";
import { convertRelativeDateToString } from "https://esm.town/v/sarahxc/convertRelativeDateToString";
interface ThreadResult {
source: string;
url: string;
date_published: string;
title: string;
}
interface RedditSearchOptions {
query: string;
apiKey?: string;
}
// Use Browserbase (with proxy) to search and scrape Reddit results
export async function redditSearch({
query,
apiKey = Deno.env.get("BROWSERBASE_API_KEY"),
}: RedditSearchOptions): Promise<ThreadResult[]> {
if (!apiKey) {
throw new Error("BrowserBase API key is required");
}
const puppeteer = new PuppeteerDeno({ productName: "chrome" });
const browser = await puppeteer.connect({
browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${apiKey}&enableProxy=true`,
ignoreHTTPSErrors: true,
});
try {
const page = await browser.newPage();
const url = constructSearchUrl(query);
await page.goto(url, { waitUntil: "networkidle0" });
await page.waitForSelector("div[data-testid=\"search-post-unit\"]", { timeout: 30000 });
const postData = await extractPostData(page);
return await processPostData(postData);
} catch (error) {
console.error("Error occurred during Reddit search:", error);
const noResultsFound: ThreadResult[] = [];
return noResultsFound;
} finally {
await browser.close();
}
}
function constructSearchUrl(query: string): string {
const encodedQuery = encodeURIComponent(query).replace(/%20/g, "+");
return `https://www.reddit.com/search/?q=${encodedQuery}&type=link&t=week`;
}
async function extractPostData(page: any): Promise<Partial<ThreadResult>[]> {
return page.evaluate(() => {
const posts = document.querySelectorAll("div[data-testid=\"search-post-unit\"]");
return Array.from(posts).map(post => {
const titleElement = post.querySelector("a[id^=\"search-post-title\"]");
const timeElement = post.querySelector("faceplate-timeago");
return {
source: "Reddit",
title: titleElement?.textContent?.trim() || "",
url: titleElement?.href || "",
date_published: timeElement?.textContent?.trim() || "",
};
});
});
}
async function processPostData(postData: Partial<ThreadResult>[]): Promise<ThreadResult[]> {
const processedData: ThreadResult[] = [];
for (const post of postData) {
if (post.title && post.url && post.date_published) {
const date_published = await convertRelativeDateToString({ relativeDate: post.date_published });
processedData.push({
source: "Reddit",
title: post.title,
url: post.url,
date_published,
});
}
}
return processedData;
}
sarahxc-redditsearch.web.val.run
September 4, 2024