Public
Back
Version 28
1/31/2025
import { blob } from "https://esm.town/v/std/blob";
import { email } from "https://esm.town/v/std/email";
import { fetch } from "https://esm.town/v/std/fetch";
import ogs from "npm:open-graph-scraper";
const ignoreRegexes: RegExp[] = [
new RegExp(/https:\/\/thenewstack\.io\/author\/[a-z\-]*\//),
new RegExp(/mailchi/),
new RegExp(/youtube.*sub_confirmation/),
new RegExp(/linkedin.*company/),
new RegExp(/hachyderm\.io/),
new RegExp(/twitter\.com/),
new RegExp(/facebook\.com/),
new RegExp(/instagram\.com/),
new RegExp(/no-cache\/newsletter-feedback/),
new RegExp(/unsubscribe/),
];
export type OgsResult = Awaited<ReturnType<typeof ogs>>["result"];
// Extract URLs from text using regex
async function extractUrls(text: string): Promise<OgsResult[]> {
const urlRegex = /(https?:\/\/[^\s]+)/g;
const urls = (text.match(urlRegex) || []).filter((url, index, self) => self.indexOf(url) === index // Remove duplicates
);
const followedUrls = await Promise.all(urls.map(async url => {
try {
const resp = await fetch(url, { redirect: "follow" });
const text = await resp.text();
console.log(text);
const { result, error, response } = await ogs({ html: text });
if (error) {
console.error("Failed to fetch " + url, response);
throw new Error("Failed to fetch " + url);
}
Updated: February 1, 2025