feat: download full video then extract segment with ffmpeg

This commit is contained in:
Kilo Code Cloud
2026-01-14 20:25:06 +00:00
parent 52dcbece0a
commit 4191c194a2
3 changed files with 84 additions and 66 deletions

View File

@@ -1,7 +1,6 @@
export interface CliArgs { export interface CliArgs {
url?: string; url?: string;
output: string; output: string;
format: string;
topN: number; topN: number;
threshold: number; threshold: number;
} }
@@ -9,7 +8,6 @@ export interface CliArgs {
export function parseArgs(): CliArgs { export function parseArgs(): CliArgs {
const args: CliArgs = { const args: CliArgs = {
output: "./downloads", output: "./downloads",
format: "best",
topN: 5, topN: 5,
threshold: 0.5, threshold: 0.5,
}; };
@@ -23,9 +21,6 @@ export function parseArgs(): CliArgs {
if (arg === "-o" || arg === "--output") { if (arg === "-o" || arg === "--output") {
args.output = nextArg || "./downloads"; args.output = nextArg || "./downloads";
i++; i++;
} else if (arg === "-f" || arg === "--format") {
args.format = nextArg || "best";
i++;
} else if (arg === "-n" || arg === "--top") { } else if (arg === "-n" || arg === "--top") {
args.topN = parseInt(nextArg || "5", 10); args.topN = parseInt(nextArg || "5", 10);
i++; i++;
@@ -42,10 +37,8 @@ Arguments:
Options: Options:
-o, --output <dir> Output directory (default: ./downloads) -o, --output <dir> Output directory (default: ./downloads)
-f, --format <fmt> Video format (default: best)
-n, --top <num> Number of top regions (default: 5) -n, --top <num> Number of top regions (default: 5)
-t, --threshold <0-1> Intensity threshold (default: 0.5) -t, --threshold <0-1> Intensity threshold (default: 0.5)
Lower = more segments, Higher = stricter
-h, --help Show this help message -h, --help Show this help message
Examples: Examples:
@@ -54,7 +47,6 @@ Examples:
`); `);
process.exit(0); process.exit(0);
} else if (!arg.startsWith("-") && !arg.includes("bun")) { } else if (!arg.startsWith("-") && !arg.includes("bun")) {
// This is likely the URL
if (!arg.startsWith("bun") && !arg.includes("node")) { if (!arg.startsWith("bun") && !arg.includes("node")) {
args.url = arg; args.url = arg;
} }

View File

@@ -1,13 +1,12 @@
import { spawn } from "child_process"; import { spawn } from "child_process";
import { writeFileSync, mkdirSync, existsSync } from "fs"; import { unlinkSync, existsSync, mkdirSync } from "fs";
import { join } from "path"; import { join } from "path";
export interface DownloadOptions { export interface DownloadOptions {
url: string; url: string;
outputDir: string; outputDir: string;
format: string;
topN: number; topN: number;
intensityThreshold: number; threshold: number;
} }
interface RawHeatmapSegment { interface RawHeatmapSegment {
@@ -39,8 +38,6 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
const ytDlp = spawn("yt-dlp", [ const ytDlp = spawn("yt-dlp", [
"--dump-json", "--dump-json",
"--no-download", "--no-download",
"--compat-option",
"no-youtube-channel-redirect",
url, url,
]); ]);
@@ -80,19 +77,10 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
}); });
} }
async function downloadSegment( async function downloadFullVideo(url: string, outputPath: string): Promise<string> {
url: string,
outputPath: string,
startTime: number,
endTime: number,
format: string
): Promise<void> {
const section = `*${startTime.toFixed(3)}-${endTime.toFixed(3)}`;
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
// Use minimal options - let yt-dlp use its config file
const ytDlp = spawn("yt-dlp", [ const ytDlp = spawn("yt-dlp", [
"-f", format,
"--download-sections", section,
"-o", outputPath, "-o", outputPath,
url, url,
]); ]);
@@ -108,7 +96,7 @@ async function downloadSegment(
reject(new Error(`yt-dlp failed: ${stderr}`)); reject(new Error(`yt-dlp failed: ${stderr}`));
return; return;
} }
resolve(); resolve(outputPath);
}); });
ytDlp.on("error", (err) => { ytDlp.on("error", (err) => {
@@ -117,6 +105,43 @@ async function downloadSegment(
}); });
} }
async function extractSegment(
inputPath: string,
outputPath: string,
startTime: number,
endTime: number
): Promise<void> {
const duration = endTime - startTime;
return new Promise((resolve, reject) => {
const ffmpeg = spawn("ffmpeg", [
"-ss", startTime.toString(),
"-i", inputPath,
"-t", duration.toString(),
"-c", "copy",
outputPath,
]);
let stderr = "";
ffmpeg.stderr.on("data", (data) => {
stderr += data.toString();
});
ffmpeg.on("close", (code) => {
if (code !== 0) {
reject(new Error(`ffmpeg failed: ${stderr}`));
return;
}
resolve();
});
ffmpeg.on("error", (err) => {
reject(new Error(`Failed to run ffmpeg: ${err.message}`));
});
});
}
function sanitizeFilename(filename: string): string { function sanitizeFilename(filename: string): string {
return filename return filename
.replace(/[^a-zA-Z0-9\s\-_]/g, "") .replace(/[^a-zA-Z0-9\s\-_]/g, "")
@@ -176,7 +201,6 @@ function clusterHighIntensitySegments(
} }
// Check if this segment is adjacent or overlapping with current cluster // Check if this segment is adjacent or overlapping with current cluster
// Adjacent means: seg.start is within a small gap of currentCluster.end
const gap = seg.start - currentCluster.end; const gap = seg.start - currentCluster.end;
const maxGap = 10; // Allow up to 10 second gap const maxGap = 10; // Allow up to 10 second gap
@@ -231,7 +255,6 @@ function getTopSegments(
topN: number, topN: number,
threshold: number threshold: number
): ProcessedSegment[] { ): ProcessedSegment[] {
// Convert to processed format and filter valid segments
const validSegments = segments const validSegments = segments
.map(seg => ({ .map(seg => ({
start: getStartTime(seg), start: getStartTime(seg),
@@ -250,21 +273,17 @@ function getTopSegments(
return []; return [];
} }
// Cluster high-intensity segments
const clusters = clusterHighIntensitySegments(validSegments, threshold); const clusters = clusterHighIntensitySegments(validSegments, threshold);
return clusters.slice(0, topN); return clusters.slice(0, topN);
} }
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> { export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
const { url, outputDir, format, topN, intensityThreshold } = options; const { url, outputDir, topN, threshold } = options;
// Create output directory if it doesn't exist
if (!existsSync(outputDir)) { if (!existsSync(outputDir)) {
mkdirSync(outputDir, { recursive: true }); mkdirSync(outputDir, { recursive: true });
} }
// Get video info with heatmap data from YouTube
console.log("Fetching video information from YouTube..."); console.log("Fetching video information from YouTube...");
const info = await getVideoInfo(url); const info = await getVideoInfo(url);
const safeTitle = sanitizeFilename(info.title); const safeTitle = sanitizeFilename(info.title);
@@ -272,31 +291,22 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
console.log(`Video: ${info.title}`); console.log(`Video: ${info.title}`);
console.log(`Duration: ${formatTime(info.duration)}`); console.log(`Duration: ${formatTime(info.duration)}`);
// Check for heatmap data
if (!info.heatmap || info.heatmap.length === 0) { if (!info.heatmap || info.heatmap.length === 0) {
console.log("\nNo heatmap data available for this video."); console.log("\nNo heatmap data available.");
console.log("Downloading full video instead...");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSegment(url, outputPath, 0, info.duration, format);
return; return;
} }
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`); console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
console.log(`Intensity threshold: ${(intensityThreshold * 100).toFixed(0)}%`); console.log(`Intensity threshold: ${(threshold * 100).toFixed(0)}%`);
console.log(`\nTop ${topN} high-intensity regions (clustered segments):\n`); console.log(`\nTop ${topN} high-intensity regions:\n`);
// Get top clustered segments const topSegments = getTopSegments(info.heatmap, topN, threshold);
const topSegments = getTopSegments(info.heatmap, topN, intensityThreshold);
if (topSegments.length === 0) { if (topSegments.length === 0) {
console.log("No high-intensity regions found. Downloading full video..."); console.log("No high-intensity regions found.");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSegment(url, outputPath, 0, info.duration, format);
return; return;
} }
// Output the top segments
for (let i = 0; i < topSegments.length; i++) { for (let i = 0; i < topSegments.length; i++) {
const seg = topSegments[i]; const seg = topSegments[i];
const duration = seg.end - seg.start; const duration = seg.end - seg.start;
@@ -307,26 +317,44 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
// Download the top segment // Download the top segment
const topSegment = topSegments[0]; const topSegment = topSegments[0];
const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`);
console.log(`Downloading segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`); // Step 1: Download full video (uses your yt-dlp config)
await downloadSegment(url, outputPath, topSegment.start, topSegment.end, format); const fullVideoPath = join(outputDir, `${safeTitle}_full_temp.%(ext)s`);
console.log(`Downloading full video (using your yt-dlp config)...`);
const downloadedPath = await downloadFullVideo(url, fullVideoPath);
// Save segment info // Find the actual file (yt-dlp may have changed extension)
const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`); // The downloaded path should already be correct, but let's handle the pattern
let segmentInfo = `# ${info.title}\n\n`; const tempFiles = await new Promise<string[]>((resolve) => {
segmentInfo += `Top ${topN} high-intensity regions (intensity >= ${(intensityThreshold * 100).toFixed(0)}%):\n\n`; const glob = spawn("find", [outputDir, "-name", `${safeTitle}_full_temp.*`, "-type", "f"]);
let output = "";
glob.stdout.on("data", (data) => { output += data.toString(); });
glob.on("close", () => {
resolve(output.split("\n").filter(f => f.length > 0));
});
});
for (let i = 0; i < topSegments.length; i++) { if (tempFiles.length === 0) {
const seg = topSegments[i]; throw new Error("Could not find downloaded video file");
const duration = seg.end - seg.start;
segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`;
segmentInfo += ` Duration: ${formatTime(duration)}\n`;
segmentInfo += ` Total Intensity: ${seg.totalIntensity.toFixed(2)}\n`;
segmentInfo += ` Segments: ${seg.segmentCount}\n\n`;
} }
writeFileSync(segmentInfoPath, segmentInfo); const actualFullPath = tempFiles[0];
console.log(`\nSegment info saved to: ${segmentInfoPath}`); console.log(`Downloaded to: ${actualFullPath}`);
console.log("Download complete!");
// Step 2: Extract segment with ffmpeg
const outputPath = join(outputDir, `${safeTitle}_segment.${actualFullPath.split(".").pop()}`);
console.log(`Extracting segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`);
await extractSegment(actualFullPath, outputPath, topSegment.start, topSegment.end);
// Clean up temp file
try {
unlinkSync(actualFullPath);
} catch {
// Ignore cleanup errors
}
console.log(`\nSaved to: ${outputPath}`);
console.log("Done!");
} }

View File

@@ -11,7 +11,6 @@ async function main() {
console.log("Usage: yt-segments <url> [options]"); console.log("Usage: yt-segments <url> [options]");
console.log("Options:"); console.log("Options:");
console.log(" -o, --output <dir> Output directory (default: ./downloads)"); console.log(" -o, --output <dir> Output directory (default: ./downloads)");
console.log(" -f, --format <fmt> Video format (default: best)");
console.log(" -n, --top <num> Number of top regions (default: 5)"); console.log(" -n, --top <num> Number of top regions (default: 5)");
console.log(" -t, --threshold <0-1> Intensity threshold (default: 0.5)"); console.log(" -t, --threshold <0-1> Intensity threshold (default: 0.5)");
console.log(" -h, --help Show help"); console.log(" -h, --help Show help");
@@ -27,9 +26,8 @@ async function main() {
await downloadMostWatchedSegment({ await downloadMostWatchedSegment({
url: args.url, url: args.url,
outputDir: args.output, outputDir: args.output,
format: args.format,
topN: args.topN, topN: args.topN,
intensityThreshold: args.threshold, threshold: args.threshold,
}); });
} catch (error) { } catch (error) {
console.error("Error:", error instanceof Error ? error.message : error); console.error("Error:", error instanceof Error ? error.message : error);