1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/** @jsxImportSource https://esm.sh/react */
import React, { useState } from "https://esm.sh/react";
import { createRoot } from "https://esm.sh/react-dom/client";
// last stable version is v138
function App() {
const [url, setUrl] = useState("");
const [result, setResult] = useState(null);
const [loading, setLoading] = useState(false);
const handleSubmit = async (e) => {
e.preventDefault();
setLoading(true);
console.log(`Submitting URL for scraping: ${url}`);
try {
const response = await fetch("/scrape", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ url }),
});
console.log(`Received response for URL: ${url}. Status: ${response.status}`);
const data = await response.json();
setResult(data);
} catch (error) {
console.log(`Error occurred while scraping URL: ${url}. Error details: ${error.message}`);
setResult({ error: error.message });
}
setLoading(false);
};
return (
<div>
<h1>FanFic Scraper</h1>
<form onSubmit={handleSubmit}>
<input
type="url"
value={url}
onChange={(e) => setUrl(e.target.value)}
placeholder="Enter URL to scrape"
required
/>
<button type="submit" disabled={loading}>
{loading ? "Scraping..." : "Scrape"}
</button>
</form>
{result && (
<div>
<h2>Result:</h2>
<h3>{result.title}</h3>
<h3>Content:</h3>
<div dangerouslySetInnerHTML={{ __html: result.content }} />
<h3>Chapters:</h3>
<ul>
{result.articles && result.articles.map((article, index) => (
<li key={index}>
<p>Threadmark: {article.threadmark}</p>
<p>Date: {article.date}</p>
<p>Author: {article.author}</p>
<div dangerouslySetInnerHTML={{ __html: article.content }} />
</li>
))}
</ul>
{result.error && <p>Error: {result.error}</p>}
</div>
)}
</div>
);
}
function client() {
console.log("Initializing client-side React application");
createRoot(document.getElementById("root")).render(<App />);
}
if (typeof document !== "undefined") {
client();
}
async function scrapePage(url) {
console.log(`Starting to scrape page: ${url}`);
const apiKey = Deno.env.get("ScrapingBeeAPIkey");
if (!apiKey) {
console.log("ScrapingBee API key not found in environment variables");
throw new Error("ScrapingBee API key not found in environment variables");
}
const extractRules = {
title: {
selector: "h1.p-title-value",
output: "text",
},
prefixTags: {
selector: "div.p-body-header a[href*='prefix_id']",
type: "list",
output: "text",
},
firstPost: {
selector: "article.message--post:not(.sticky-container)",
output: {
body: {