import { embed, embedMany } from "npm:ai";
import { openai } from "npm:@ai-sdk/openai";
import lunr from "https://cdn.skypack.dev/lunr";
async function getEmbedding(text) {
console.log(`Getting embedding for: ${text}`);
const { embedding } = await embed({
model: openai.embedding('text-embedding-3-small'),
value: text,
});
console.log(`Embedding: ${embedding}`);
return embedding;
}
async function getEmbeddings(texts) {
console.log(`Getting embeddings for texts: ${texts}`);
const { embeddings } = await embedMany({
model: openai.embedding('text-embedding-3-small'),
values: texts,
});
console.log(`Embeddings: ${embeddings}`);
return embeddings;
}
const documents = [
{ id: 1, content: 'cats dogs' },
{ id: 2, content: 'elephants giraffes lions tigers' },
{ id: 3, content: 'edam camembert cheddar' },
{ id: 3, content: '7878292929241' }
];
async function prepareDocumentsWithEmbeddings() {
const contents = documents.map(doc => doc.content);
const embeddings = await getEmbeddings(contents);
documents.forEach((doc, index) => {
doc.embedding = embeddings[index];
});
}
await prepareDocumentsWithEmbeddings();
function cosineSimilarity(a, b) {
const dotProduct = a.reduce((sum, val, idx) => sum + val * b[idx], 0);
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}
function findNearestNeighbors(embedding, k = 1) {
const neighbors = documents
.map(doc => ({ doc, similarity: cosineSimilarity(doc.embedding, embedding) }))
.sort((a, b) => b.similarity - a.similarity)
.slice(0, k)
.map(item => item.doc);
return neighbors;
}
const idx = lunr(function () {
this.ref('id');
this.field('content');
documents.forEach(doc => {
this.add(doc);
});
});
async function search(query, similarityThreshold = 0.2) {
console.log(`Searching for query: ${query}`);
const queryEmbedding = await getEmbedding(query);
const nearestDocs = findNearestNeighbors(queryEmbedding, 1);
if (nearestDocs.length > 0 && cosineSimilarity(nearestDocs[0].embedding, queryEmbedding) >= similarityThreshold) {
const resultsWithoutEmbeddings = nearestDocs.map(doc => {
const { embedding, ...rest } = doc;
return rest;
});
console.log('Cosine similarity results:', resultsWithoutEmbeddings);
return resultsWithoutEmbeddings;
} else {
const results = idx.search(query);
const resultsWithoutEmbeddings = results.map(result => {
const doc = documents.find(doc => doc.id.toString() === result.ref);
const { embedding, ...rest } = doc;
return rest;
});
console.log('Lunr search results:', resultsWithoutEmbeddings);
return resultsWithoutEmbeddings;
}
}