1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
export default function prepareChapterForAlgolia(chapterTitle, chapterNumber, chapterText) {
const generateUniqueID = (() => {
let counter = 0;
return () => `chunk_${Date.now()}_${counter++}`;
})();
function isWordBoundary(text, index) {
if (index === 0 || index === text.length) return true;
const prevChar = text[index - 1];
const currChar = text[index];
if (currChar === " ") return true;
if (prevChar === "-" && /[a-zA-Z]/.test(currChar)) return false;
if (currChar === "-" && /[a-zA-Z]/.test(prevChar)) return false;
if (prevChar === "'" && /[a-zA-Z]/.test(currChar)) return false;
if (/\w/.test(prevChar) && !/\w/.test(currChar)) return true;
return false;
}
function createAlgoliaRecord(content, startIndex, endIndex, chapterTitle, chapterNumber, chunkIndex) {
return {
objectID: generateUniqueID(),
chapterTitle: chapterTitle,
chapterNumber: chapterNumber,
content: content,
startIndex: startIndex,
endIndex: endIndex,
chunkIndex: chunkIndex,
_tags: [`chapter:${chapterTitle}`],
};
}
function processChapter(chapterTitle, chapterNumber, chapterText) {
let algoliaRecords = [];
let currentChunk = "";
let startIndex = 0;
let chunkIndex = 0;
for (let i = 0; i < chapterText.length; i++) {
currentChunk += chapterText[i];
if (currentChunk.length > 8240 && isWordBoundary(chapterText, i)) {
algoliaRecords.push(
createAlgoliaRecord(currentChunk, startIndex, i, chapterTitle, chapterNumber, chunkIndex++),
);
// Find the start of a word about 500 characters back
let overlapStart = Math.max(0, i - 500);
while (overlapStart > 0 && !isWordBoundary(chapterText, overlapStart)) {
overlapStart--;
}
startIndex = overlapStart;
currentChunk = chapterText.slice(startIndex, i + 1);
}
}
// Process any remaining content
if (currentChunk.length > 0) {
algoliaRecords.push(
createAlgoliaRecord(currentChunk, startIndex, chapterText.length, chapterTitle, chapterNumber, chunkIndex),
);
}
return algoliaRecords;
}
const records = processChapter(chapterTitle, chapterNumber, chapterText);
console.log(JSON.stringify(records, null, 2));
return records;
}
const chapterTitle = "A Side Ws";
const chapterNumber = 1;
const chapterText = `"Okay, I think that did it." Everett drew back from under the console. "Try it now Vince."
Leaning back in his seat, Vincent pressed one of the controls before him and the monitor flickered on. Everett flinched at the sight of the winged blade on the screen, emblazoned with a single word.
Veda.
"The system appears to be in order," the voice said. Everett hadn't noticed how much it sounded like Newtype before. "My apologies for the error, though I note that if someone had bothered to ask I could have made the adjustments easily."
"Yeah," Vince drawled. "Sorry 'bout that."
"It's fine," StarGazer replied. Everett found that easier than 'Veda.' "I'll leave you to your business."
The monitor switched to black and then loaded the normal console display.
"Think she's still there?" Vince folded his hands behind his head. "Maybe she's a voyeur."
She. Right, StarGazer did identify as female.
Everett supposed that was fair but it really didn't make anything any less weird. Was that racist of him, or racist and sexist? He wasn't trying to be, it was just so weird.
StarGazer seemed so… Well, not normal, but normal enough. Any weird thing she did never made him think she was anything but a Case-53 like everyone thought she was.
But she wasn't. She was a machine with...what? Processors for a brain? That just felt so weird to consider. Was she really that smart or was she simply programmed to act smart? Maybe there wasn't much difference but it still sent him reeling.
Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
Comments
Nobody has commented on this val yet: be the first!
September 9, 2024