stevekrouse-subaudio.web.val.run
Readme

sub.audio – generate subtitles and chapters for any audio URL.

Speech-to-text and chapter summaries powered by Substrate

🪩 To fork, sign up for Substrate to get your own API key and $50 free credits

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { ComputeJSON, sb, Substrate, TranscribeSpeech } from "npm:substrate";
import { extractValInfo } from "https://esm.town/v/pomdtr/extractValInfo?v=29";
export const listChapters = `Please provide a list of sections for the following transcript.
Such a list might be used to define a table of contents for a podcast or video.
Section titles should be short, rarely more than 5 words.
If it's a short transcript, you might only have 1 or 2 sections and if it's a long transcript, limit it to 16.
These sections should be the main topics or themes of the transcript.
They should be chronologically ordered.
They should be roughly evenly spaced throughout the transcript.
Do not neglect the latter end of the transcript
Do not include minutiae.`;
export const timestampPrompt = `You will be provided with a list of sections as well as a transcript.
The sections will be used as a table of contents for that transcript.
Based on the content of the transcript, please provide the approximate timestamp in seconds where each section begins.
Lines are prefixed with the timestamp in seconds surrounded by square brackets. e.g.
[59.73] Hello I'm going to talk about some topic.
Your job is to analyze the semantics of the text and provide the timestamp (in seconds) where each section begins.
The first timestamp should be 0.`;
async function processAudio(audio_uri) {
const substrate = new Substrate({ apiKey: Deno.env.get("SUBSTRATE_API_KEY") });
const opts = { cache_age: 60 * 60 * 24 * 7 };
const transcribe = new TranscribeSpeech(
{ audio_uri, segment: true, align: true },
opts,
);
const chaptersSchema = {
type: "object",
properties: {
chapters: {
type: "array",
items: { type: "string" },
minItems: 1,
maxItems: 16,
},
},
};
const chapters = new ComputeJSON(
{
prompt: sb.concat(
listChapters,
"\n\nTRANSCRIPT:\n\n",
transcribe.future.text,
),
json_schema: chaptersSchema,
model: "Mixtral8x7BInstruct",
},
opts,
);
const timestampedSchema = {
type: "object",
properties: {
chapters: {
type: "array",
items: {
type: "object",
properties: {
section: { type: "string" },
start: { type: "number" },
},
required: ["section", "start"],
},
},
},
};
const timestamps = new ComputeJSON(
{
prompt: sb.concat(
timestampPrompt,
"SECTIONS: ",
sb.jq<"string">(chapters.future.json_object, ".chapters | @json"),
"\n\nTRANSCRIPT:\n\n",
transcribe.future.text,
),
json_schema: timestampedSchema,
model: "Mixtral8x7BInstruct",
},
opts,
);
const res = await substrate.run(transcribe, chapters, timestamps);
return {
transcript: res.get(transcribe),
timestampedChapters: res.get(timestamps).json_object?.chapters,
};
}
// Render site
export default async function(req: Request): Promise<Response> {
const url = new URL(req.url);
const audio_uri = url.searchParams.get("url");
let html = `
<!doctype html>
<html lang="en">
<head>
Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
Comments
Nobody has commented on this val yet: be the first!
July 23, 2024