Back

Version 26

5/29/2024
import { decode as base64Decode, encode as base64Encode } from "https://deno.land/std@0.166.0/encoding/base64.ts";
import { sqlToJSON } from "https://esm.town/v/nbbaier/sqliteExportHelpers?v=22";
import { db as allValsDb } from "https://esm.town/v/sqlite/db?v=9";
import { blob } from "https://esm.town/v/std/blob";
import OpenAI from "npm:openai";
import { truncateMessage } from "npm:openai-tokens";

const allVals = await sqlToJSON(
await allValsDb.execute("SELECT author_username, name, version FROM vals WHERE LENGTH(code) > 10 ORDER BY name"),
) as any;

// const allValsBlobEmbeddingsMeta = (await blob.getJSON("allValsBlobEmbeddingsMeta")) ?? {};
const allValsBlobEmbeddingsMeta = {};
const existingEmbeddingsIds = new Set(Object.keys(allValsBlobEmbeddingsMeta));

function idForVal(val: any): string {
return `${val.author_username}!!${val.name}!!${val.version}`;
}

const newValsBatches = [[]];
let currentBatch = newValsBatches[0];
for (const val of allVals) {
const id = idForVal(val);
if (!existingEmbeddingsIds.has(id)) {
currentBatch.push(val);
}
if (currentBatch.length >= 1000) {
currentBatch = [];
newValsBatches.push(currentBatch);
}
}

let nextDataIndex = Math.max(
0,
...Object.values(allValsBlobEmbeddingsMeta).map((item: any) => item.batchDataIndex + 1),
);
Updated: June 17, 2024