import { jsx } from "https://deno.land/x/hono@v3.11.7/middleware.ts";
import { Hono } from "https://deno.land/x/hono@v3.11.7/mod.ts";
import { ai } from "https://esm.town/v/yawnxyz/ai";
import { getUrlMetadata } from "https://esm.town/v/yawnxyz/urlMetadata";
import { getHtmlMetadata } from "https://esm.town/v/yawnxyz/htmlMetadata";
import { getCitation } from "https://esm.town/v/yawnxyz/citation";
import { blobby } from "https://esm.town/v/yawnxyz/blobby";
import { transcribeAudio } from "https://esm.town/v/yawnxyz/stt";
import stringHash from 'npm:string-hash';
const app = new Hono();
const DOI_REGEX = /\b(10\.\d{4,9}\/[-._;()\/:\w]+)\b/i;
export const getJinaContent = async (url, opts = {}) => {
const baseUrl = 'https://r.jina.ai/';
if (!url.includes('r.jina.ai')) {
url = baseUrl + url;
}
const fullUrl = new URL(url);
const headers = {
...(opts.withImagesSummary && { 'X-With-Images-Summary': 'true' }),
...(opts.withGeneratedAlt && { 'X-With-Generated-Alt': 'true' }),
...(opts.withLinksSummary && { 'X-With-Links-Summary': 'true' }),
...(opts.noCache && { 'X-No-Cache': 'true' }),
...(opts.accept && { 'Accept': opts.accept }),
...(opts.targetSelector && { 'X-Target-Selector': opts.targetSelector }),
...(opts.timeout && { 'X-Timeout': opts.timeout.toString() }),
...(opts.waitForSelector && { 'X-Wait-For-Selector': opts.waitForSelector }),
...(opts.returnFormat && { 'X-Return-Format': opts.returnFormat }),
};
console.log('[getJinaContent] Fetching:', fullUrl.toString(), headers);
try {
const response = await fetch(fullUrl.toString(), {
method: 'GET',
headers: headers,
});
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
if (headers['Accept'] === 'text/event-stream') {
return response.body;
}
const text = await response.text();
try {
return JSON.parse(text);
} catch {
if (headers['X-Return-Format'] === 'screenshot') {
return JSON.parse(text);
} else {
return text;
}
}
} catch (error) {
console.error('Error fetching from Jina:', error);
throw error;
}
}
export const getFirecrawlContent = async (url, opts = {}) => {
const apiKey = Deno.env.get("FIRECRAWL_API_KEY");
if (!apiKey) {
throw new Error("API key not found. Please set FIRECRAWL_API_KEY in your environment.");
}
const body = {
url: url,
pageOptions: {
onlyMainContent: opts.onlyMainContent || true,
includeHtml: opts.includeHtml || false,
includeRawHtml: opts.includeRawHtml || false,
screenshot: opts.screenshot || false,
waitFor: opts.waitFor || 0,
removeTags: opts.removeTags || [],
onlyIncludeTags: opts.onlyIncludeTags || [],
replaceAllPathsWithAbsolutePaths: opts.replaceAllPathsWithAbsolutePaths || true,
parsePDF: opts.parsePDF || false
},
extractorOptions: {
mode: opts.mode,
extractionPrompt: opts.extractionPrompt || "",
extractionSchema: opts.extractionSchema || {}
},
timeout: opts.timeout || 30000
};
console.log('[getFirecrawlContent] Fetching:', url, body);
try {
const response = await fetch("https://api.firecrawl.dev/v0/scrape", {
method: 'POST',