import { spawn } from "child_process"; import { unlinkSync, existsSync, mkdirSync } from "fs"; import { join } from "path"; export interface DownloadOptions { url: string; outputDir: string; topN: number; threshold: number; } interface RawHeatmapSegment { start_seconds?: number; start_time?: number; end_seconds?: number; end_time?: number; intensity?: number; heat?: number; value?: number; } interface ProcessedSegment { start: number; end: number; intensity: number; totalIntensity: number; segmentCount: number; } interface VideoInfo { title: string; duration: number; heatmap?: RawHeatmapSegment[]; } async function getVideoInfo(url: string): Promise { return new Promise((resolve, reject) => { const ytDlp = spawn("yt-dlp", [ "--dump-json", "--no-download", url, ]); let stdout = ""; let stderr = ""; ytDlp.stdout.on("data", (data) => { stdout += data.toString(); }); ytDlp.stderr.on("data", (data) => { stderr += data.toString(); }); ytDlp.on("close", (code) => { if (code !== 0) { reject(new Error(`yt-dlp failed: ${stderr}`)); return; } try { const info = JSON.parse(stdout); resolve({ title: info.title || "video", duration: info.duration || 0, heatmap: info.heatmap, }); } catch (parseError) { reject(new Error(`Failed to parse video info: ${parseError}`)); } }); ytDlp.on("error", (err) => { reject(new Error(`Failed to run yt-dlp: ${err.message}`)); }); }); } async function downloadFullVideo(url: string, outputPath: string): Promise { return new Promise((resolve, reject) => { // Use minimal options - let yt-dlp use its config file const ytDlp = spawn("yt-dlp", [ "-o", outputPath, url, ]); let stderr = ""; ytDlp.stderr.on("data", (data) => { stderr += data.toString(); }); ytDlp.on("close", (code) => { if (code !== 0) { reject(new Error(`yt-dlp failed: ${stderr}`)); return; } resolve(outputPath); }); ytDlp.on("error", (err) => { reject(new Error(`Failed to run yt-dlp: ${err.message}`)); }); }); } async function extractSegment( inputPath: string, outputPath: string, startTime: number, endTime: number ): Promise { const duration = endTime - startTime; return new Promise((resolve, reject) => { const ffmpeg = spawn("ffmpeg", [ "-ss", startTime.toString(), "-i", inputPath, "-t", duration.toString(), "-c", "copy", outputPath, ]); let stderr = ""; ffmpeg.stderr.on("data", (data) => { stderr += data.toString(); }); ffmpeg.on("close", (code) => { if (code !== 0) { reject(new Error(`ffmpeg failed: ${stderr}`)); return; } resolve(); }); ffmpeg.on("error", (err) => { reject(new Error(`Failed to run ffmpeg: ${err.message}`)); }); }); } function sanitizeFilename(filename: string): string { return filename .replace(/[^a-zA-Z0-9\s\-_]/g, "") .replace(/\s+/g, "_") .substring(0, 100); } function formatTime(seconds: number): string { if (!Number.isFinite(seconds) || seconds < 0) { return "0:00"; } const mins = Math.floor(seconds / 60); const secs = Math.floor(seconds % 60); return `${mins}:${secs.toString().padStart(2, "0")}`; } function getStartTime(segment: RawHeatmapSegment): number { return segment.start_seconds ?? segment.start_time ?? 0; } function getEndTime(segment: RawHeatmapSegment): number { return segment.end_seconds ?? segment.end_time ?? 0; } function getIntensity(segment: RawHeatmapSegment): number { return segment.intensity ?? segment.heat ?? segment.value ?? 0; } interface RawSegment { start: number; end: number; intensity: number; } function clusterHighIntensitySegments( segments: RawSegment[], threshold: number ): ProcessedSegment[] { // Filter to only high-intensity segments const highIntensity = segments.filter(seg => seg.intensity >= threshold); if (highIntensity.length === 0) { return []; } // Sort by start time highIntensity.sort((a, b) => a.start - b.start); // Cluster adjacent/overlapping segments const clusters: ProcessedSegment[] = []; let currentCluster: RawSegment | null = null; for (const seg of highIntensity) { if (!currentCluster) { currentCluster = seg; continue; } // Check if this segment is adjacent or overlapping with current cluster const gap = seg.start - currentCluster.end; const maxGap = 10; // Allow up to 10 second gap if (gap <= maxGap && gap >= -1) { // -1 allows for small overlaps // Merge into current cluster currentCluster.end = Math.max(currentCluster.end, seg.end); } else { // Finalize current cluster and start new one const totalIntensity = highIntensity .filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end) .reduce((sum, s) => sum + s.intensity, 0); clusters.push({ start: currentCluster.start, end: currentCluster.end, intensity: currentCluster.intensity, totalIntensity, segmentCount: highIntensity.filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end ).length, }); currentCluster = seg; } } // Don't forget the last cluster if (currentCluster) { const totalIntensity = highIntensity .filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end) .reduce((sum, s) => sum + s.intensity, 0); clusters.push({ start: currentCluster.start, end: currentCluster.end, intensity: currentCluster.intensity, totalIntensity, segmentCount: highIntensity.filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end ).length, }); } // Sort clusters by total intensity (highest first) clusters.sort((a, b) => b.totalIntensity - a.totalIntensity); return clusters; } function getTopSegments( segments: RawHeatmapSegment[], topN: number, threshold: number ): ProcessedSegment[] { const validSegments = segments .map(seg => ({ start: getStartTime(seg), end: getEndTime(seg), intensity: getIntensity(seg), })) .filter(seg => Number.isFinite(seg.start) && Number.isFinite(seg.end) && Number.isFinite(seg.intensity) && seg.start >= 0 && seg.end > seg.start ); if (validSegments.length === 0) { return []; } const clusters = clusterHighIntensitySegments(validSegments, threshold); return clusters.slice(0, topN); } export async function downloadMostWatchedSegment(options: DownloadOptions): Promise { const { url, outputDir, topN, threshold } = options; if (!existsSync(outputDir)) { mkdirSync(outputDir, { recursive: true }); } console.log("Fetching video information from YouTube..."); const info = await getVideoInfo(url); const safeTitle = sanitizeFilename(info.title); console.log(`Video: ${info.title}`); console.log(`Duration: ${formatTime(info.duration)}`); if (!info.heatmap || info.heatmap.length === 0) { console.log("\nNo heatmap data available."); return; } console.log(`\nHeatmap data found: ${info.heatmap.length} segments`); console.log(`Intensity threshold: ${(threshold * 100).toFixed(0)}%`); console.log(`\nTop ${topN} high-intensity regions:\n`); const topSegments = getTopSegments(info.heatmap, topN, threshold); if (topSegments.length === 0) { console.log("No high-intensity regions found."); return; } for (let i = 0; i < topSegments.length; i++) { const seg = topSegments[i]; const duration = seg.end - seg.start; console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Total Intensity: ${seg.totalIntensity.toFixed(2)}`); } console.log(""); // Download the top segment const topSegment = topSegments[0]; // Step 1: Download full video (uses your yt-dlp config) const fullVideoPath = join(outputDir, `${safeTitle}_full_temp.%(ext)s`); console.log(`Downloading full video (using your yt-dlp config)...`); const downloadedPath = await downloadFullVideo(url, fullVideoPath); // Find the actual file (yt-dlp may have changed extension) // The downloaded path should already be correct, but let's handle the pattern const tempFiles = await new Promise((resolve) => { const glob = spawn("find", [outputDir, "-name", `${safeTitle}_full_temp.*`, "-type", "f"]); let output = ""; glob.stdout.on("data", (data) => { output += data.toString(); }); glob.on("close", () => { resolve(output.split("\n").filter(f => f.length > 0)); }); }); if (tempFiles.length === 0) { throw new Error("Could not find downloaded video file"); } const actualFullPath = tempFiles[0]; console.log(`Downloaded to: ${actualFullPath}`); // Step 2: Extract segment with ffmpeg const outputPath = join(outputDir, `${safeTitle}_segment.${actualFullPath.split(".").pop()}`); console.log(`Extracting segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`); await extractSegment(actualFullPath, outputPath, topSegment.start, topSegment.end); // Clean up temp file try { unlinkSync(actualFullPath); } catch { // Ignore cleanup errors } console.log(`\nSaved to: ${outputPath}`); console.log("Done!"); }