Readme

In-memory semantic search; load it up with valtown KV.

This is a "dumb" version of vector search, for prototyping RAG responses and UIs — with both regular search (w/ Lunr) and vector search (with OpenAI embeddings + cosine similarity)

Usage:

import { semanticSearch } from "https://esm.town/v/yawnxyz/semanticSearch";

const documents = [
  { id: 1, content: 'cats dogs' },
  { id: 2, content: 'elephants giraffes lions tigers' },
  { id: 3, content: 'edam camembert cheddar' }
];

async function runExample() {
  // Add documents to the semantic search instance
  await semanticSearch.addDocuments(documents);

  const results = await semanticSearch.search('animals', 0, 3);
  console.log('Top 3 search results for "animals":');
  console.log(results);
}

runExample();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { Hono } from "npm:hono@3";
import { cors } from "npm:hono/cors";
import { embed, embedMany } from "npm:ai";
import { openai } from "npm:@ai-sdk/openai";
import lunr from "https://cdn.skypack.dev/lunr";
const app = new Hono();
app.use('*', cors({
origin: '*',
allowMethods: ['GET', 'POST'],
allowHeaders: ['Content-Type'],
}));
openai.apiKey = Deno.env.get("OPENAI_API_KEY");
class SemanticSearch {
constructor() {
this.documents = [];
}
async addDocuments({ documents, fields = 'content', modelName = 'text-embedding-3-large' }) {
const documentsWithoutEmbeddings = documents?.filter(doc => !doc.embedding) || [];
const documentsWithEmbeddings = documents?.filter(doc => doc.embedding) || [];
if (documentsWithoutEmbeddings.length > 0) {
const contents = documentsWithoutEmbeddings.map(doc => doc.content);
const embeddings = await this.getEmbeddings(contents, modelName);
documentsWithoutEmbeddings.forEach((doc, index) => {
doc.embedding = embeddings[index];
this.documents.push(doc);
});
}
documentsWithEmbeddings.forEach(doc => {
this.documents.push(doc);
});
const fieldList = fields.split(',').map(field => field.trim());
this.idx = lunr(function () {
this.ref('id');
fieldList.forEach(field => {
this.field(field);
});
documents.forEach(doc => {
this.add(doc);
});
});
}
async getEmbedding(text, modelName) {
const { embedding } = await embed({
model: openai.embedding(modelName),
value: text,
});
return embedding;
}
async getEmbeddings(texts, modelName) {
const { embeddings } = await embedMany({
model: openai.embedding(modelName),
values: texts,
});
return embeddings;
}
cosineSimilarity(a, b) {
const dotProduct = a.reduce((sum, val, idx) => sum + val * b[idx], 0);
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}
findNearestNeighbors(embedding, k = 1) {
const neighbors = this.documents
.map(doc => ({ doc, similarity: this.cosineSimilarity(doc.embedding, embedding) }))
.sort((a, b) => b.similarity - a.similarity)
.slice(0, k)
.map(item => item.doc);
return neighbors;
}
normalizeScores(results) {
const maxScore = Math.max(...results.map(r => r.score));
const minScore = Math.min(...results.map(r => r.score));
return results.map(result => ({
...result,
normalizedScore: (result.score - minScore) / (maxScore - minScore)
}));
}
async search({ query, fields = 'content', modelName = 'text-embedding-3-large', similarityThreshold = 0.7, maxResults = 5 }) {
const queryEmbedding = await this.getEmbedding(query, modelName);
const nearestDocs = this.findNearestNeighbors(queryEmbedding, maxResults);
Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
Comments
Nobody has commented on this val yet: be the first!
June 12, 2024