From 4191c194a2fe3c64c45c9e189c46fee9bc6cb974 Mon Sep 17 00:00:00 2001 From: Kilo Code Cloud Date: Wed, 14 Jan 2026 20:25:06 +0000 Subject: [PATCH] feat: download full video then extract segment with ffmpeg --- src/cli/args.ts | 8 --- src/cli/downloader.ts | 138 +++++++++++++++++++++++++----------------- src/cli/index.ts | 4 +- 3 files changed, 84 insertions(+), 66 deletions(-) diff --git a/src/cli/args.ts b/src/cli/args.ts index 40518ce..73dcc3a 100644 --- a/src/cli/args.ts +++ b/src/cli/args.ts @@ -1,7 +1,6 @@ export interface CliArgs { url?: string; output: string; - format: string; topN: number; threshold: number; } @@ -9,7 +8,6 @@ export interface CliArgs { export function parseArgs(): CliArgs { const args: CliArgs = { output: "./downloads", - format: "best", topN: 5, threshold: 0.5, }; @@ -23,9 +21,6 @@ export function parseArgs(): CliArgs { if (arg === "-o" || arg === "--output") { args.output = nextArg || "./downloads"; i++; - } else if (arg === "-f" || arg === "--format") { - args.format = nextArg || "best"; - i++; } else if (arg === "-n" || arg === "--top") { args.topN = parseInt(nextArg || "5", 10); i++; @@ -42,10 +37,8 @@ Arguments: Options: -o, --output Output directory (default: ./downloads) - -f, --format Video format (default: best) -n, --top Number of top regions (default: 5) -t, --threshold <0-1> Intensity threshold (default: 0.5) - Lower = more segments, Higher = stricter -h, --help Show this help message Examples: @@ -54,7 +47,6 @@ Examples: `); process.exit(0); } else if (!arg.startsWith("-") && !arg.includes("bun")) { - // This is likely the URL if (!arg.startsWith("bun") && !arg.includes("node")) { args.url = arg; } diff --git a/src/cli/downloader.ts b/src/cli/downloader.ts index 449400c..96fdfb6 100644 --- a/src/cli/downloader.ts +++ b/src/cli/downloader.ts @@ -1,13 +1,12 @@ import { spawn } from "child_process"; -import { writeFileSync, mkdirSync, existsSync } from "fs"; +import { unlinkSync, existsSync, mkdirSync } from "fs"; import { join } from "path"; export interface DownloadOptions { url: string; outputDir: string; - format: string; topN: number; - intensityThreshold: number; + threshold: number; } interface RawHeatmapSegment { @@ -39,8 +38,6 @@ async function getVideoInfo(url: string): Promise { const ytDlp = spawn("yt-dlp", [ "--dump-json", "--no-download", - "--compat-option", - "no-youtube-channel-redirect", url, ]); @@ -80,19 +77,10 @@ async function getVideoInfo(url: string): Promise { }); } -async function downloadSegment( - url: string, - outputPath: string, - startTime: number, - endTime: number, - format: string -): Promise { - const section = `*${startTime.toFixed(3)}-${endTime.toFixed(3)}`; - +async function downloadFullVideo(url: string, outputPath: string): Promise { return new Promise((resolve, reject) => { + // Use minimal options - let yt-dlp use its config file const ytDlp = spawn("yt-dlp", [ - "-f", format, - "--download-sections", section, "-o", outputPath, url, ]); @@ -108,7 +96,7 @@ async function downloadSegment( reject(new Error(`yt-dlp failed: ${stderr}`)); return; } - resolve(); + resolve(outputPath); }); ytDlp.on("error", (err) => { @@ -117,6 +105,43 @@ async function downloadSegment( }); } +async function extractSegment( + inputPath: string, + outputPath: string, + startTime: number, + endTime: number +): Promise { + const duration = endTime - startTime; + + return new Promise((resolve, reject) => { + const ffmpeg = spawn("ffmpeg", [ + "-ss", startTime.toString(), + "-i", inputPath, + "-t", duration.toString(), + "-c", "copy", + outputPath, + ]); + + let stderr = ""; + + ffmpeg.stderr.on("data", (data) => { + stderr += data.toString(); + }); + + ffmpeg.on("close", (code) => { + if (code !== 0) { + reject(new Error(`ffmpeg failed: ${stderr}`)); + return; + } + resolve(); + }); + + ffmpeg.on("error", (err) => { + reject(new Error(`Failed to run ffmpeg: ${err.message}`)); + }); + }); +} + function sanitizeFilename(filename: string): string { return filename .replace(/[^a-zA-Z0-9\s\-_]/g, "") @@ -176,7 +201,6 @@ function clusterHighIntensitySegments( } // Check if this segment is adjacent or overlapping with current cluster - // Adjacent means: seg.start is within a small gap of currentCluster.end const gap = seg.start - currentCluster.end; const maxGap = 10; // Allow up to 10 second gap @@ -231,7 +255,6 @@ function getTopSegments( topN: number, threshold: number ): ProcessedSegment[] { - // Convert to processed format and filter valid segments const validSegments = segments .map(seg => ({ start: getStartTime(seg), @@ -250,21 +273,17 @@ function getTopSegments( return []; } - // Cluster high-intensity segments const clusters = clusterHighIntensitySegments(validSegments, threshold); - return clusters.slice(0, topN); } export async function downloadMostWatchedSegment(options: DownloadOptions): Promise { - const { url, outputDir, format, topN, intensityThreshold } = options; + const { url, outputDir, topN, threshold } = options; - // Create output directory if it doesn't exist if (!existsSync(outputDir)) { mkdirSync(outputDir, { recursive: true }); } - // Get video info with heatmap data from YouTube console.log("Fetching video information from YouTube..."); const info = await getVideoInfo(url); const safeTitle = sanitizeFilename(info.title); @@ -272,31 +291,22 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom console.log(`Video: ${info.title}`); console.log(`Duration: ${formatTime(info.duration)}`); - // Check for heatmap data if (!info.heatmap || info.heatmap.length === 0) { - console.log("\nNo heatmap data available for this video."); - console.log("Downloading full video instead..."); - - const outputPath = join(outputDir, `${safeTitle}.%(ext)s`); - await downloadSegment(url, outputPath, 0, info.duration, format); + console.log("\nNo heatmap data available."); return; } console.log(`\nHeatmap data found: ${info.heatmap.length} segments`); - console.log(`Intensity threshold: ${(intensityThreshold * 100).toFixed(0)}%`); - console.log(`\nTop ${topN} high-intensity regions (clustered segments):\n`); + console.log(`Intensity threshold: ${(threshold * 100).toFixed(0)}%`); + console.log(`\nTop ${topN} high-intensity regions:\n`); - // Get top clustered segments - const topSegments = getTopSegments(info.heatmap, topN, intensityThreshold); + const topSegments = getTopSegments(info.heatmap, topN, threshold); if (topSegments.length === 0) { - console.log("No high-intensity regions found. Downloading full video..."); - const outputPath = join(outputDir, `${safeTitle}.%(ext)s`); - await downloadSegment(url, outputPath, 0, info.duration, format); + console.log("No high-intensity regions found."); return; } - // Output the top segments for (let i = 0; i < topSegments.length; i++) { const seg = topSegments[i]; const duration = seg.end - seg.start; @@ -307,26 +317,44 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom // Download the top segment const topSegment = topSegments[0]; - const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`); - console.log(`Downloading segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`); - await downloadSegment(url, outputPath, topSegment.start, topSegment.end, format); + // Step 1: Download full video (uses your yt-dlp config) + const fullVideoPath = join(outputDir, `${safeTitle}_full_temp.%(ext)s`); + console.log(`Downloading full video (using your yt-dlp config)...`); + + const downloadedPath = await downloadFullVideo(url, fullVideoPath); - // Save segment info - const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`); - let segmentInfo = `# ${info.title}\n\n`; - segmentInfo += `Top ${topN} high-intensity regions (intensity >= ${(intensityThreshold * 100).toFixed(0)}%):\n\n`; + // Find the actual file (yt-dlp may have changed extension) + // The downloaded path should already be correct, but let's handle the pattern + const tempFiles = await new Promise((resolve) => { + const glob = spawn("find", [outputDir, "-name", `${safeTitle}_full_temp.*`, "-type", "f"]); + let output = ""; + glob.stdout.on("data", (data) => { output += data.toString(); }); + glob.on("close", () => { + resolve(output.split("\n").filter(f => f.length > 0)); + }); + }); - for (let i = 0; i < topSegments.length; i++) { - const seg = topSegments[i]; - const duration = seg.end - seg.start; - segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`; - segmentInfo += ` Duration: ${formatTime(duration)}\n`; - segmentInfo += ` Total Intensity: ${seg.totalIntensity.toFixed(2)}\n`; - segmentInfo += ` Segments: ${seg.segmentCount}\n\n`; + if (tempFiles.length === 0) { + throw new Error("Could not find downloaded video file"); } - writeFileSync(segmentInfoPath, segmentInfo); - console.log(`\nSegment info saved to: ${segmentInfoPath}`); - console.log("Download complete!"); + const actualFullPath = tempFiles[0]; + console.log(`Downloaded to: ${actualFullPath}`); + + // Step 2: Extract segment with ffmpeg + const outputPath = join(outputDir, `${safeTitle}_segment.${actualFullPath.split(".").pop()}`); + console.log(`Extracting segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`); + + await extractSegment(actualFullPath, outputPath, topSegment.start, topSegment.end); + + // Clean up temp file + try { + unlinkSync(actualFullPath); + } catch { + // Ignore cleanup errors + } + + console.log(`\nSaved to: ${outputPath}`); + console.log("Done!"); } diff --git a/src/cli/index.ts b/src/cli/index.ts index 0e0de70..a0a7832 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -11,7 +11,6 @@ async function main() { console.log("Usage: yt-segments [options]"); console.log("Options:"); console.log(" -o, --output Output directory (default: ./downloads)"); - console.log(" -f, --format Video format (default: best)"); console.log(" -n, --top Number of top regions (default: 5)"); console.log(" -t, --threshold <0-1> Intensity threshold (default: 0.5)"); console.log(" -h, --help Show help"); @@ -27,9 +26,8 @@ async function main() { await downloadMostWatchedSegment({ url: args.url, outputDir: args.output, - format: args.format, topN: args.topN, - intensityThreshold: args.threshold, + threshold: args.threshold, }); } catch (error) { console.error("Error:", error instanceof Error ? error.message : error);