-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.ts
247 lines (204 loc) · 7.52 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import { readFileSync, writeFileSync, existsSync, createReadStream } from "fs";
import { basename, extname } from "path";
import path from "path";
import Groq from "groq-sdk";
import { FileState, GoogleAIFileManager } from "@google/generative-ai/server";
import { GoogleGenerativeAI } from "@google/generative-ai";
import { promisify } from "util";
import { exec } from "child_process";
import Anthropic from "@anthropic-ai/sdk";
const PLATFORM: "GROQ" | "GEMINI" = "GROQ";
// =================== TRANSCRIPTION ===================
const groq = new Groq({
apiKey: process.env.GROQ_API_KEY,
});
async function transcribeWithGroq(
videoPath: string,
outputPath: string
): Promise<void> {
console.log("Transcribing audio...", videoPath, " to ", outputPath);
const absoluteVideoPath = path.resolve(videoPath);
if (!existsSync(absoluteVideoPath)) {
throw new Error(`File not found: ${absoluteVideoPath}`);
}
const transcription = await groq.audio.transcriptions.create({
file: createReadStream(absoluteVideoPath),
model: "distil-whisper-large-v3-en", // 13% error rate
// model: "whisper-large-v3", // 10% error rate
// model: "whisper-large-v3-turbo ", // 12$ error rate
response_format: "json",
language: "en", // Don't forget to change when testing other languages
});
console.log("Transcription completed successfully");
writeFileSync(outputPath, transcription.text);
console.log("Transcription saved to:", outputPath);
}
async function transcribeWithGemini(
videoPath: string,
outputPath: string,
summaryPath: string
): Promise<void> {
// Uncomment to delete all files
// const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);
// const files = (await fileManager.listFiles()).files;
// for await (const file of files) {
// await fileManager.deleteFile(file.name);
// }
// console.log(await fileManager.listFiles());
// return;
console.log("Transcribing audio...", videoPath, " to ", outputPath);
const absoluteVideoPath = path.resolve(videoPath);
if (!existsSync(absoluteVideoPath)) {
throw new Error(`File not found: ${absoluteVideoPath}`);
}
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);
const uploadResult = await fileManager.uploadFile(videoPath, {
mimeType: "audio/mp3",
displayName: "Audio sample",
});
let file = await fileManager.getFile(uploadResult.file.name);
while (file.state === FileState.PROCESSING) {
process.stdout.write(".");
await new Promise((resolve) => setTimeout(resolve, 10_000));
file = await fileManager.getFile(uploadResult.file.name);
}
if (file.state === FileState.FAILED) {
throw new Error("Audio processing failed.");
}
console.log(
`Uploaded file ${uploadResult.file.displayName} as: ${uploadResult.file.uri}`
);
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash-002" });
const transcription = await model.generateContent([
"Generate a transcript of the speech.",
{
fileData: {
fileUri: uploadResult.file.uri,
mimeType: uploadResult.file.mimeType,
},
},
]);
console.log("Transcription completed successfully");
writeFileSync(outputPath, transcription.response.text());
console.log("Transcription saved to:", outputPath);
const summary = await model.generateContent([
`Please provide detailed comprehensive summary for the audio.`,
{
fileData: {
fileUri: uploadResult.file.uri,
mimeType: uploadResult.file.mimeType,
},
},
]);
console.log("Summary completed successfully");
writeFileSync(summaryPath, summary.response.text());
console.log("Summary saved to:", summaryPath);
}
// =================== SUMMARIZATION ===================
async function summarizeTranscriptionAndSave(
text: string,
outputPath: string
): Promise<void> {
const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
console.log("Summarizing text...");
// Split the text into chunks of approximately 100,000 characters
const chunkSize = 100000;
const chunks = [];
for (let i = 0; i < text.length; i += chunkSize) {
chunks.push(text.slice(i, i + chunkSize));
}
let fullSummary = "";
for (const chunk of chunks) {
console.log("Summarizing chunk...");
const response = await anthropic.messages.create({
model: "claude-3-sonnet-20240229",
max_tokens: 1000,
messages: [
{
role: "user",
content: `Please provide a concise summary of the following text:\n\n${chunk}`,
},
],
});
fullSummary +=
response.content[0].type === "text"
? response.content[0].text + "\n\n"
: "\n\n";
}
console.log("Final summarization...");
const finalResponse = await anthropic.messages.create({
model: "claude-3-sonnet-20240229",
max_tokens: 2000,
messages: [
{
role: "user",
content: `Please provide a final, comprehensive summary of the following text, which consists of summaries of larger chunks:\n\n${fullSummary}`,
},
],
});
writeFileSync(
outputPath,
finalResponse.content[0].type === "text"
? finalResponse.content[0].text
: ""
);
console.log(`Summary generated: ${outputPath}`);
}
// =================== AUDIO DOWNLOAD ===================
const execAsync = promisify(exec);
function decodeUnicode(str: string): string {
return str.replace(/\\u[\dA-F]{4}/gi, (match) =>
String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16))
);
}
export async function downloadVideo(url: string): Promise<string> {
console.log("Getting audio info...");
const { stdout: infoOutput } = await execAsync(`yt-dlp -j "${url}"`);
const audioInfo = JSON.parse(infoOutput);
const decodedTitle = decodeUnicode(audioInfo.title); // Allow non-latin characters
const audioTitle = decodedTitle
.replace(/[<>:"/\\|?*\x00-\x1F]/g, "") // Remove characters invalid for filenames
.trim()
.replace(/\s+/g, "_");
console.log(`Audio title: ${audioTitle}`);
const outputTemplate = `${audioTitle}.%(ext)s`;
const expectedFilePath = path.resolve(`${audioTitle}.mp3`);
if (existsSync(expectedFilePath)) {
console.log(`Audio file already exists: ${expectedFilePath}`);
return expectedFilePath;
}
console.log("Downloading audio...");
const { stdout: downloadOutput } = await execAsync(
`yt-dlp -f "bestaudio" -x --audio-format mp3 --audio-quality 0 -o "${outputTemplate}" "${url}"`
);
console.log(downloadOutput);
if (!existsSync(expectedFilePath)) {
throw new Error(`Downloaded file not found: ${expectedFilePath}`);
}
console.log(`Audio downloaded: ${expectedFilePath}`);
return expectedFilePath;
}
// =================== MAIN ===================
async function main() {
const videoUrl = Bun.argv[2];
if (!videoUrl) {
console.error("Please provide a YouTube video URL as an argument.");
process.exit(1);
}
const videoPath = await downloadVideo(videoUrl);
const videoTitle = basename(videoPath, extname(videoPath));
const transcriptionPath = `${videoTitle}.md`;
const summaryPath = `summary_${videoTitle}.md`;
if (PLATFORM === "GROQ") {
await transcribeWithGroq(videoPath, transcriptionPath);
const transcriptionText = readFileSync(transcriptionPath, "utf-8");
await summarizeTranscriptionAndSave(transcriptionText, summaryPath);
}
if (PLATFORM === "GEMINI") {
await transcribeWithGemini(videoPath, transcriptionPath, summaryPath);
}
// unlinkSync(videoPath);
console.log("Video file deleted");
}
main();