Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/** @jsx jsx */
import { jsx } from "https://deno.land/x/hono@v3.11.7/middleware.ts";
import { Hono } from "https://deno.land/x/hono@v3.11.7/mod.ts";
import { ai } from "https://esm.town/v/yawnxyz/ai";
import { getUrlMetadata } from "https://esm.town/v/yawnxyz/urlMetadata";
import { getHtmlMetadata } from "https://esm.town/v/yawnxyz/htmlMetadata";
import { getCitation } from "https://esm.town/v/yawnxyz/citation";
import { blobby } from "https://esm.town/v/yawnxyz/blobby";
import { transcribeAudio } from "https://esm.town/v/yawnxyz/stt";
import stringHash from 'npm:string-hash';
const app = new Hono();
// https://www.crossref.org/blog/dois-and-matching-regular-expressions/
const DOI_REGEX = /\b(10\.\d{4,9}\/[-._;()\/:\w]+)\b/i;
export const getJinaContent = async (url, opts = {}) => {
const baseUrl = 'https://r.jina.ai/';
if (!url.includes('r.jina.ai')) {
url = baseUrl + url;
}
const fullUrl = new URL(url);
const headers = {
...(opts.withImagesSummary && { 'X-With-Images-Summary': 'true' }),
...(opts.withGeneratedAlt && { 'X-With-Generated-Alt': 'true' }),
...(opts.withLinksSummary && { 'X-With-Links-Summary': 'true' }),
...(opts.noCache && { 'X-No-Cache': 'true' }),
...(opts.accept && { 'Accept': opts.accept }),
...(opts.targetSelector && { 'X-Target-Selector': opts.targetSelector }),
...(opts.timeout && { 'X-Timeout': opts.timeout.toString() }),
...(opts.waitForSelector && { 'X-Wait-For-Selector': opts.waitForSelector }),
...(opts.returnFormat && { 'X-Return-Format': opts.returnFormat }),
};
console.log('[getJinaContent] Fetching:', fullUrl.toString(), headers);
try {
const response = await fetch(fullUrl.toString(), {
method: 'GET',
headers: headers,
});
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
if (headers['Accept'] === 'text/event-stream') {
return response.body;
}
const text = await response.text();
try {
return JSON.parse(text);
} catch {
if (headers['X-Return-Format'] === 'screenshot') {
return JSON.parse(text);
} else {
return text;
}
}
} catch (error) {
console.error('Error fetching from Jina:', error);
throw error;
}
}
export const getFirecrawlContent = async (url, opts = {}) => {
const apiKey = Deno.env.get("FIRECRAWL_API_KEY");
if (!apiKey) {
throw new Error("API key not found. Please set FIRECRAWL_API_KEY in your environment.");
}
const body = {
url: url,
pageOptions: {
onlyMainContent: opts.onlyMainContent || true,
includeHtml: opts.includeHtml || false,
includeRawHtml: opts.includeRawHtml || false,
screenshot: opts.screenshot || false,
waitFor: opts.waitFor || 0,
removeTags: opts.removeTags || [],
onlyIncludeTags: opts.onlyIncludeTags || [],
replaceAllPathsWithAbsolutePaths: opts.replaceAllPathsWithAbsolutePaths || true,
parsePDF: opts.parsePDF || false
},
extractorOptions: {
// mode: opts.mode || "llm-extraction",
mode: opts.mode,
extractionPrompt: opts.extractionPrompt || "",
extractionSchema: opts.extractionSchema || {}
},
timeout: opts.timeout || 30000
};
console.log('[getFirecrawlContent] Fetching:', url, body);
try {
const response = await fetch("https://api.firecrawl.dev/v0/scrape", {
method: 'POST',
yawnxyz-getcontentfromurl.web.val.run
August 24, 2024