1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { embed, embedMany } from "npm:ai";
import { openai } from "npm:@ai-sdk/openai";
import lunr from "https://cdn.skypack.dev/lunr";
// Step 1: Get Embeddings
// Function to get a single embedding
async function getEmbedding(text) {
console.log(`Getting embedding for: ${text}`);
const { embedding } = await embed({
model: openai.embedding('text-embedding-3-small'),
value: text,
});
console.log(`Embedding: ${embedding}`);
return embedding;
}
// Function to get embeddings for multiple texts
async function getEmbeddings(texts) {
console.log(`Getting embeddings for texts: ${texts}`);
const { embeddings } = await embedMany({
model: openai.embedding('text-embedding-3-small'),
values: texts,
});
console.log(`Embeddings: ${embeddings}`);
return embeddings;
}
// Step 2: Store Embeddings with Documents
const documents = [
{ id: 1, content: 'cats dogs' },
{ id: 2, content: 'elephants giraffes lions tigers' },
{ id: 3, content: 'edam camembert cheddar' },
{ id: 3, content: '7878292929241' }
];
async function prepareDocumentsWithEmbeddings() {
const contents = documents.map(doc => doc.content);
const embeddings = await getEmbeddings(contents);
documents.forEach((doc, index) => {
doc.embedding = embeddings[index];
});
// console.log('Documents with embeddings:', documents);
}
await prepareDocumentsWithEmbeddings();
// Step 3: Nearest Neighbor Search
function cosineSimilarity(a, b) {
const dotProduct = a.reduce((sum, val, idx) => sum + val * b[idx], 0);
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}
function findNearestNeighbors(embedding, k = 1) {
const neighbors = documents
.map(doc => ({ doc, similarity: cosineSimilarity(doc.embedding, embedding) }))
.sort((a, b) => b.similarity - a.similarity)
.slice(0, k)
.map(item => item.doc);
// console.log(`Nearest neighbors for embedding: ${embedding} are:`, neighbors);
return neighbors;
}
// Step 4: Use Lunr.js for Full-Text Search
const idx = lunr(function () {
this.ref('id');
this.field('content');
documents.forEach(doc => {
this.add(doc);
});
});
// Combined Search Function
async function search(query, similarityThreshold = 0.2) {
console.log(`Searching for query: ${query}`);
const queryEmbedding = await getEmbedding(query);
const nearestDocs = findNearestNeighbors(queryEmbedding, 1); // Find the top nearest document
if (nearestDocs.length > 0 && cosineSimilarity(nearestDocs[0].embedding, queryEmbedding) >= similarityThreshold) {
// Remove the embedding field from the search results
const resultsWithoutEmbeddings = nearestDocs.map(doc => {
const { embedding, ...rest } = doc;
return rest;
});
console.log('Cosine similarity results:', resultsWithoutEmbeddings);
return resultsWithoutEmbeddings;
} else {
const results = idx.search(query);
// Remove the embedding field from the search results
const resultsWithoutEmbeddings = results.map(result => {
const doc = documents.find(doc => doc.id.toString() === result.ref);
const { embedding, ...rest } = doc;
return rest;
});
console.log('Lunr search results:', resultsWithoutEmbeddings);
return resultsWithoutEmbeddings;
}
}