Back

Version 32

5/29/2024
import { decode as base64Decode, encode as base64Encode } from "https://deno.land/std@0.166.0/encoding/base64.ts";
import { Client } from "https://deno.land/x/postgres/mod.ts";
import { sqlToJSON } from "https://esm.town/v/nbbaier/sqliteExportHelpers?v=22";
import { db as allValsDb } from "https://esm.town/v/sqlite/db?v=9";
import { blob } from "https://esm.town/v/std/blob";
import OpenAI from "npm:openai";
import { truncateMessage } from "npm:openai-tokens";

const dimensions = 1536;

const client = new Client(Deno.env.get("NEON_URL_VALSEMBEDDINGS"));
await client.connect();
// const result = await client
// .queryObject`CREATE TABLE IF NOT EXISTS vals_embeddings (id TEXT PRIMARY KEY, embedding VECTOR(${dimensions}))`;
// console.log(result);

const allVals = await sqlToJSON(
await allValsDb.execute("SELECT author_username, name, version FROM vals WHERE LENGTH(code) > 10 ORDER BY name"),
) as any;

const existingEmbeddingsIds = new Set(
(await client.queryObject`SELECT id FROM vals_embeddings`).rows.map(row => row.id),
);

function idForVal(val: any): string {
return `${val.author_username}!!${val.name}!!${val.version}`;
}

const newValsBatches = [[]];
let currentBatch = newValsBatches[0];
for (const val of allVals) {
const id = idForVal(val);
if (!existingEmbeddingsIds.has(id)) {
currentBatch.push(val);
}
if (currentBatch.length >= 100) {
Updated: June 17, 2024