Compare commits

...

10 Commits

Author SHA1 Message Date
Kilo Code Cloud
3b5ed95f06 fix: remove unused variable 2026-01-14 20:31:02 +00:00
Kilo Code Cloud
1c449c3f0c fix: extract video file and output as mkv 2026-01-14 20:30:26 +00:00
Kilo Code Cloud
f8e903cb13 fix: remove unused variable 2026-01-14 20:25:33 +00:00
Kilo Code Cloud
4191c194a2 feat: download full video then extract segment with ffmpeg 2026-01-14 20:25:06 +00:00
Kilo Code Cloud
52dcbece0a feat: cluster adjacent high-intensity segments 2026-01-14 20:15:06 +00:00
Kilo Code Cloud
49bc42f4c9 fix: use raw intensity to find visual heatmap bumps 2026-01-14 20:11:43 +00:00
Kilo Code Cloud
9ea4d4ab33 feat: output top 10 segment candidates with timestamps and duration 2026-01-14 20:05:28 +00:00
Kilo Code Cloud
3c263a78c6 feat: find segment with highest integral jump in heatmap 2026-01-14 20:01:48 +00:00
Kilo Code Cloud
44415040d5 feat: use integral (primitive) to find most watched segment 2026-01-14 19:56:13 +00:00
Kilo Code Cloud
8f4344e763 feat: use raw intensity values to find most watched segment 2026-01-14 19:54:54 +00:00
3 changed files with 219 additions and 128 deletions

View File

@@ -1,15 +1,15 @@
export interface CliArgs {
url?: string;
output: string;
format: string;
peakThreshold: number;
topN: number;
threshold: number;
}
export function parseArgs(): CliArgs {
const args: CliArgs = {
output: "./downloads",
format: "best",
peakThreshold: 0.3,
topN: 5,
threshold: 0.5,
};
const rawArgs = Bun.argv;
@@ -21,14 +21,14 @@ export function parseArgs(): CliArgs {
if (arg === "-o" || arg === "--output") {
args.output = nextArg || "./downloads";
i++;
} else if (arg === "-f" || arg === "--format") {
args.format = nextArg || "best";
} else if (arg === "-n" || arg === "--top") {
args.topN = parseInt(nextArg || "5", 10);
i++;
} else if (arg === "-t" || arg === "--threshold") {
args.peakThreshold = parseFloat(nextArg || "0.3");
args.threshold = parseFloat(nextArg || "0.5");
i++;
} else if (arg === "-h" || arg === "--help") {
console.log(`YouTube Peak Segment Downloader
console.log(`YouTube Most Watched Segments Downloader
Usage: yt-segments <url> [options]
@@ -36,20 +36,17 @@ Arguments:
<url> YouTube video URL (required)
Options:
-o, --output <dir> Output directory (default: ./downloads)
-f, --format <fmt> Video format (default: best)
-t, --threshold <n> Peak detection threshold 0.1-1.0 (default: 0.3)
Lower = more segments detected as peaks
-h, --help Show this help message
-o, --output <dir> Output directory (default: ./downloads)
-n, --top <num> Number of top regions (default: 5)
-t, --threshold <0-1> Intensity threshold (default: 0.5)
-h, --help Show this help message
Examples:
yt-segments "https://www.youtube.com/watch?v=abc123"
yt-segments "https://youtu.be/abc123" -o ./videos -f mp4
yt-segments "https://www.youtube.com/watch?v=abc123" -t 0.5
yt-segments "https://youtu.be/abc123" -o ./videos -t 0.6
`);
process.exit(0);
} else if (!arg.startsWith("-") && !arg.includes("bun")) {
// This is likely the URL
if (!arg.startsWith("bun") && !arg.includes("node")) {
args.url = arg;
}

View File

@@ -1,12 +1,12 @@
import { spawn } from "child_process";
import { writeFileSync, mkdirSync, existsSync } from "fs";
import { unlinkSync, existsSync, mkdirSync } from "fs";
import { join } from "path";
export interface DownloadOptions {
url: string;
outputDir: string;
format: string;
peakThreshold: number;
topN: number;
threshold: number;
}
interface RawHeatmapSegment {
@@ -23,7 +23,8 @@ interface ProcessedSegment {
start: number;
end: number;
intensity: number;
peakScore: number;
totalIntensity: number;
segmentCount: number;
}
interface VideoInfo {
@@ -37,8 +38,6 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
const ytDlp = spawn("yt-dlp", [
"--dump-json",
"--no-download",
"--compat-option",
"no-youtube-channel-redirect",
url,
]);
@@ -78,20 +77,14 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
});
}
async function downloadSegment(
url: string,
outputPath: string,
startTime: number,
endTime: number,
format: string
): Promise<void> {
const section = `*${startTime.toFixed(3)}-${endTime.toFixed(3)}`;
async function downloadFullVideo(url: string, outputDir: string, safeTitle: string): Promise<string> {
// Download to a temp pattern, yt-dlp will fill in the filename
const tempPattern = join(outputDir, `${safeTitle}_temp_%(id)s.%(ext)s`);
return new Promise((resolve, reject) => {
// Use minimal options - let yt-dlp use its config file
const ytDlp = spawn("yt-dlp", [
"-f", format,
"--download-sections", section,
"-o", outputPath,
"-o", tempPattern,
url,
]);
@@ -106,7 +99,7 @@ async function downloadSegment(
reject(new Error(`yt-dlp failed: ${stderr}`));
return;
}
resolve();
resolve(tempPattern);
});
ytDlp.on("error", (err) => {
@@ -115,6 +108,69 @@ async function downloadSegment(
});
}
async function findVideoFile(outputDir: string, safeTitle: string): Promise<string | null> {
// Video extensions to look for (not subtitles)
const videoExtensions = ["mp4", "mkv", "webm", "mov", "avi", "m4v"];
return new Promise((resolve) => {
const glob = spawn("find", [outputDir, "-name", `${safeTitle}_temp_*`, "-type", "f"]);
let output = "";
glob.stdout.on("data", (data) => { output += data.toString(); });
glob.on("close", () => {
const files = output.split("\n").filter(f => f.length > 0);
// Find video file (not subtitle)
for (const file of files) {
const ext = file.split(".").pop()?.toLowerCase();
if (ext && videoExtensions.includes(ext)) {
resolve(file);
return;
}
}
// If no video file, return null
resolve(null);
});
});
}
async function extractSegment(
inputPath: string,
outputPath: string,
startTime: number,
endTime: number
): Promise<void> {
const duration = endTime - startTime;
return new Promise((resolve, reject) => {
const ffmpeg = spawn("ffmpeg", [
"-ss", startTime.toString(),
"-i", inputPath,
"-t", duration.toString(),
"-c", "copy",
outputPath,
]);
let stderr = "";
ffmpeg.stderr.on("data", (data) => {
stderr += data.toString();
});
ffmpeg.on("close", (code) => {
if (code !== 0) {
reject(new Error(`ffmpeg failed: ${stderr}`));
return;
}
resolve();
});
ffmpeg.on("error", (err) => {
reject(new Error(`Failed to run ffmpeg: ${err.message}`));
});
});
}
function sanitizeFilename(filename: string): string {
return filename
.replace(/[^a-zA-Z0-9\s\-_]/g, "")
@@ -143,68 +199,111 @@ function getIntensity(segment: RawHeatmapSegment): number {
return segment.intensity ?? segment.heat ?? segment.value ?? 0;
}
function findPeakSegments(
segments: RawHeatmapSegment[],
threshold: number = 0.3
interface RawSegment {
start: number;
end: number;
intensity: number;
}
function clusterHighIntensitySegments(
segments: RawSegment[],
threshold: number
): ProcessedSegment[] {
if (segments.length < 3) {
const highIntensity = segments.filter(seg => seg.intensity >= threshold);
if (highIntensity.length === 0) {
return [];
}
// Convert to processed format
const processed = segments
highIntensity.sort((a, b) => a.start - b.start);
const clusters: ProcessedSegment[] = [];
let currentCluster: RawSegment | null = null;
for (const seg of highIntensity) {
if (!currentCluster) {
currentCluster = seg;
continue;
}
const gap = seg.start - currentCluster.end;
const maxGap = 10;
if (gap <= maxGap && gap >= -1) {
currentCluster.end = Math.max(currentCluster.end, seg.end);
} else {
const totalIntensity = highIntensity
.filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end)
.reduce((sum, s) => sum + s.intensity, 0);
clusters.push({
start: currentCluster.start,
end: currentCluster.end,
intensity: currentCluster.intensity,
totalIntensity,
segmentCount: highIntensity.filter(s =>
s.start >= currentCluster!.start && s.end <= currentCluster!.end
).length,
});
currentCluster = seg;
}
}
if (currentCluster) {
const totalIntensity = highIntensity
.filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end)
.reduce((sum, s) => sum + s.intensity, 0);
clusters.push({
start: currentCluster.start,
end: currentCluster.end,
intensity: currentCluster.intensity,
totalIntensity,
segmentCount: highIntensity.filter(s =>
s.start >= currentCluster.start && s.end <= currentCluster.end
).length,
});
}
clusters.sort((a, b) => b.totalIntensity - a.totalIntensity);
return clusters;
}
function getTopSegments(
segments: RawHeatmapSegment[],
topN: number,
threshold: number
): ProcessedSegment[] {
const validSegments = segments
.map(seg => ({
start: getStartTime(seg),
end: getEndTime(seg),
intensity: getIntensity(seg),
peakScore: 0,
}))
.filter(seg =>
Number.isFinite(seg.start) &&
Number.isFinite(seg.end) &&
Number.isFinite(seg.intensity)
Number.isFinite(seg.intensity) &&
seg.start >= 0 &&
seg.end > seg.start
);
if (processed.length < 3) {
if (validSegments.length === 0) {
return [];
}
// Calculate peak score for each segment
// A peak is where intensity is significantly higher than neighbors
const scored = processed.map((seg, i) => {
const prevIntensity = i > 0 ? processed[i - 1].intensity : seg.intensity;
const nextIntensity = i < processed.length - 1 ? processed[i + 1].intensity : seg.intensity;
// Peak score = how much higher this segment is compared to average of neighbors
const avgNeighborIntensity = (prevIntensity + nextIntensity) / 2;
const peakScore = avgNeighborIntensity > 0
? (seg.intensity - avgNeighborIntensity) / avgNeighborIntensity
: 0;
return {
...seg,
peakScore,
};
});
// Filter segments that are true peaks (higher than neighbors)
const peaks = scored.filter(seg => seg.peakScore > threshold);
// Sort by peak score (highest peaks first)
peaks.sort((a, b) => b.peakScore - a.peakScore);
return peaks;
const clusters = clusterHighIntensitySegments(validSegments, threshold);
return clusters.slice(0, topN);
}
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
const { url, outputDir, format, peakThreshold } = options;
const { url, outputDir, topN, threshold } = options;
// Create output directory if it doesn't exist
if (!existsSync(outputDir)) {
mkdirSync(outputDir, { recursive: true });
}
// Get video info with heatmap data from YouTube
console.log("Fetching video information from YouTube...");
const info = await getVideoInfo(url);
const safeTitle = sanitizeFilename(info.title);
@@ -212,63 +311,59 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
console.log(`Video: ${info.title}`);
console.log(`Duration: ${formatTime(info.duration)}`);
// Check for heatmap data
if (!info.heatmap || info.heatmap.length === 0) {
console.log("\nNo heatmap data available for this video.");
console.log("Downloading full video instead...");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSegment(url, outputPath, 0, info.duration, format);
console.log("\nNo heatmap data available.");
return;
}
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
console.log(`Intensity threshold: ${(threshold * 100).toFixed(0)}%`);
console.log(`\nTop ${topN} high-intensity regions:\n`);
// Find peak segments (segments that stand out from their neighbors)
const peakSegments = findPeakSegments(info.heatmap, peakThreshold);
const topSegments = getTopSegments(info.heatmap, topN, threshold);
if (peakSegments.length === 0) {
console.log("No significant peak segments found.");
console.log("Downloading full video...");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSegment(url, outputPath, 0, info.duration, format);
if (topSegments.length === 0) {
console.log("No high-intensity regions found.");
return;
}
// Get the top peak segment
const topPeak = peakSegments[0];
console.log(`\nTop peak segment:`);
console.log(` Time: ${formatTime(topPeak.start)} - ${formatTime(topPeak.end)}`);
console.log(` Duration: ${formatTime(topPeak.end - topPeak.start)}`);
console.log(` Peak Score: ${(topPeak.peakScore * 100).toFixed(1)}%`);
console.log(` Base Intensity: ${(topPeak.intensity * 100).toFixed(1)}%`);
// Download the peak segment
const outputPath = join(outputDir, `${safeTitle}_peak.%(ext)s`);
console.log(`\nDownloading peak segment...`);
await downloadSegment(url, outputPath, topPeak.start, topPeak.end, format);
// Save segment info
const segmentInfoPath = join(outputDir, `${safeTitle}_peak_info.txt`);
let segmentInfo = `# ${info.title}\n\n`;
segmentInfo += `Peak segment (stands out from surrounding content):\n`;
segmentInfo += ` Start: ${formatTime(topPeak.start)} (${topPeak.start.toFixed(1)}s)\n`;
segmentInfo += ` End: ${formatTime(topPeak.end)} (${topPeak.end.toFixed(1)}s)\n`;
segmentInfo += ` Duration: ${formatTime(topPeak.end - topPeak.start)}\n`;
segmentInfo += ` Peak Score: ${(topPeak.peakScore * 100).toFixed(1)}%\n`;
segmentInfo += ` Intensity: ${(topPeak.intensity * 100).toFixed(1)}%\n\n`;
if (peakSegments.length > 1) {
segmentInfo += `Other peaks:\n`;
for (let i = 1; i < Math.min(peakSegments.length, 5); i++) {
const seg = peakSegments[i];
segmentInfo += ` ${formatTime(seg.start)} - ${formatTime(seg.end)} (score: ${(seg.peakScore * 100).toFixed(1)}%)\n`;
}
for (let i = 0; i < topSegments.length; i++) {
const seg = topSegments[i];
const duration = seg.end - seg.start;
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Total Intensity: ${seg.totalIntensity.toFixed(2)}`);
}
writeFileSync(segmentInfoPath, segmentInfo);
console.log(`\nSegment info saved to: ${segmentInfoPath}`);
console.log("Download complete!");
console.log("");
const topSegment = topSegments[0];
// Step 1: Download full video (uses your yt-dlp config)
console.log(`Downloading full video (using your yt-dlp config)...`);
await downloadFullVideo(url, outputDir, safeTitle);
// Find video file (not subtitle)
const videoPath = await findVideoFile(outputDir, safeTitle);
if (!videoPath) {
throw new Error("No video file found. Your yt-dlp config may be downloading subtitles instead.");
}
console.log(`Downloaded video: ${videoPath}`);
// Step 2: Extract segment with ffmpeg to mkv
const outputPath = join(outputDir, `${safeTitle}_segment.mkv`);
console.log(`Extracting segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`);
await extractSegment(videoPath, outputPath, topSegment.start, topSegment.end);
// Clean up temp files
try {
unlinkSync(videoPath);
} catch {
// Ignore cleanup errors
}
console.log(`\nSaved to: ${outputPath}`);
console.log("Done!");
}

View File

@@ -10,25 +10,24 @@ async function main() {
console.error("Error: YouTube URL is required");
console.log("Usage: yt-segments <url> [options]");
console.log("Options:");
console.log(" -o, --output <dir> Output directory (default: ./downloads)");
console.log(" -f, --format <fmt> Video format (default: best)");
console.log(" -t, --threshold <n> Peak detection threshold (default: 0.3)");
console.log(" -h, --help Show help");
console.log(" -o, --output <dir> Output directory (default: ./downloads)");
console.log(" -n, --top <num> Number of top regions (default: 5)");
console.log(" -t, --threshold <0-1> Intensity threshold (default: 0.5)");
console.log(" -h, --help Show help");
process.exit(1);
}
console.log(`Downloading peak segment from: ${args.url}`);
console.log(`Analyzing video: ${args.url}`);
console.log(`Output directory: ${args.output}`);
console.log(`Format: ${args.format}`);
console.log(`Peak threshold: ${args.peakThreshold}`);
console.log("");
console.log(`Intensity threshold: ${(args.threshold * 100).toFixed(0)}%`);
console.log(`Top ${args.topN} regions\n`);
try {
await downloadMostWatchedSegment({
url: args.url,
outputDir: args.output,
format: args.format,
peakThreshold: args.peakThreshold,
topN: args.topN,
threshold: args.threshold,
});
} catch (error) {
console.error("Error:", error instanceof Error ? error.message : error);