1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import Mux from "npm:@mux/mux-node";
import { AssemblyAI } from "npm:assemblyai";
import OpenAI from "npm:openai";
import { zodResponseFormat } from "npm:openai/helpers/zod";
import z from "npm:zod";
import { parseSync } from "npm:subtitle";
const mux = new Mux({
tokenId: Deno.env.get("MUX_TOKEN_ID"),
tokenSecret: Deno.env.get("MUX_TOKEN_SECRET"),
});
const assemblyai = new AssemblyAI({
apiKey: Deno.env.get("ASSEMBLY_AI_KEY"),
});
const openai = new OpenAI({
apiKey: Deno.env.get("OPEN_API_KEY"),
});
async function getMuxAsset(assetId: string) {
if (!assetId) throw "A Mux asset ID is required.";
let asset;
try {
asset = await mux.video.assets.retrieve(assetId);
console.log(asset);
} catch (err) {
throw "Error retrieving Mux Asset. Are you sure that's a valid asset ID?";
}
if (asset.status !== "ready") {
throw "Asset is not ready";
}
if (!asset.tracks.find(track => track.type === "text")) {
throw "Asset does not have a text track";
}
if (!asset.playback_ids.find(playbackId => playbackId.policy === "public")) {
throw "This script currently requires a public playback ID";
}
if (asset.static_renditions.status !== "ready") {
throw "Asset's static renditions are not ready";
}
if (asset.static_renditions.files.filter(file => file.ext === "m4a").length < 1) {
throw "Asset does not have an audio-only static rendition";
}
return asset;
}
// transform ["Matt", "Nick"] into { "A": "Matt" , "B": "Nick" }
function createSpeakersObject(speakersArray: string[]) {
if (speakersArray.length > 26) {
throw new Error("We only support 26 speakers for now");
}
const speakersObject: { [key: string]: string } = {};
speakersArray.forEach((s, i) => {
speakersObject[String.fromCharCode(i + 65)] = s;
});
return speakersObject;
}
type MuxCuePoint = { startTime: number; endTime?: number; value: string };
async function getCuePointsFromMux(asset: Mux.Video.Assets.Asset): Promise<MuxCuePoint[]> {
const transcriptTrack = asset.tracks.find(track => track.type === "text");
const playbackId = asset.playback_ids.find(playbackId => playbackId.policy === "public");
const TRACK_ID = transcriptTrack.id;
const PLAYBACK_ID = playbackId.id;
const vttUrl = `https://stream.mux.com/${PLAYBACK_ID}/text/${TRACK_ID}.vtt`;
const vttResponse = await fetch(vttUrl);
const vttText = await vttResponse.text();
const vttNodes = parseSync(vttText);
const cuePoints = vttNodes
.filter(node => node.type === "cue")
.map(node => ({
startTime: node.data.start / 1000,
endTime: node.data.end / 1000,
value: node.data.text,
}));
return cuePoints;
}
type AssemblyAICuePoint = { startTime: number; endTime?: number; value: { speaker: string; text: string } };
async function getCuePointsFromAssemblyAI(
asset: Mux.Video.Assets.Asset,
speakers: { [key: string]: string },
): Promise<AssemblyAICuePoint[]> {
const playbackId = asset.playback_ids.find(playbackId => playbackId.policy === "public");
const PLAYBACK_ID = playbackId.id;
const aaiTranscript = await assemblyai.transcripts.transcribe({
audio_url: `https://stream.mux.com/${PLAYBACK_ID}/audio.m4a`,
speaker_labels: true,
speakers_expected: Object.keys(speakers).length,
});