feat: cluster adjacent high-intensity segments

This commit is contained in:
Kilo Code Cloud
2026-01-14 20:15:06 +00:00
parent 49bc42f4c9
commit 52dcbece0a
3 changed files with 123 additions and 25 deletions

View File

@@ -3,13 +3,15 @@ export interface CliArgs {
output: string;
format: string;
topN: number;
threshold: number;
}
export function parseArgs(): CliArgs {
const args: CliArgs = {
output: "./downloads",
format: "best",
topN: 10,
topN: 5,
threshold: 0.5,
};
const rawArgs = Bun.argv;
@@ -25,7 +27,10 @@ export function parseArgs(): CliArgs {
args.format = nextArg || "best";
i++;
} else if (arg === "-n" || arg === "--top") {
args.topN = parseInt(nextArg || "10", 10);
args.topN = parseInt(nextArg || "5", 10);
i++;
} else if (arg === "-t" || arg === "--threshold") {
args.threshold = parseFloat(nextArg || "0.5");
i++;
} else if (arg === "-h" || arg === "--help") {
console.log(`YouTube Most Watched Segments Downloader
@@ -38,12 +43,14 @@ Arguments:
Options:
-o, --output <dir> Output directory (default: ./downloads)
-f, --format <fmt> Video format (default: best)
-n, --top <num> Number of top segments to show (default: 10)
-n, --top <num> Number of top regions (default: 5)
-t, --threshold <0-1> Intensity threshold (default: 0.5)
Lower = more segments, Higher = stricter
-h, --help Show this help message
Examples:
yt-segments "https://www.youtube.com/watch?v=abc123"
yt-segments "https://youtu.be/abc123" -o ./videos -n 5
yt-segments "https://youtu.be/abc123" -o ./videos -t 0.6
`);
process.exit(0);
} else if (!arg.startsWith("-") && !arg.includes("bun")) {

View File

@@ -7,6 +7,7 @@ export interface DownloadOptions {
outputDir: string;
format: string;
topN: number;
intensityThreshold: number;
}
interface RawHeatmapSegment {
@@ -23,6 +24,8 @@ interface ProcessedSegment {
start: number;
end: number;
intensity: number;
totalIntensity: number;
segmentCount: number;
}
interface VideoInfo {
@@ -142,9 +145,91 @@ function getIntensity(segment: RawHeatmapSegment): number {
return segment.intensity ?? segment.heat ?? segment.value ?? 0;
}
function getTopSegmentsByIntensity(
interface RawSegment {
start: number;
end: number;
intensity: number;
}
function clusterHighIntensitySegments(
segments: RawSegment[],
threshold: number
): ProcessedSegment[] {
// Filter to only high-intensity segments
const highIntensity = segments.filter(seg => seg.intensity >= threshold);
if (highIntensity.length === 0) {
return [];
}
// Sort by start time
highIntensity.sort((a, b) => a.start - b.start);
// Cluster adjacent/overlapping segments
const clusters: ProcessedSegment[] = [];
let currentCluster: RawSegment | null = null;
for (const seg of highIntensity) {
if (!currentCluster) {
currentCluster = seg;
continue;
}
// Check if this segment is adjacent or overlapping with current cluster
// Adjacent means: seg.start is within a small gap of currentCluster.end
const gap = seg.start - currentCluster.end;
const maxGap = 10; // Allow up to 10 second gap
if (gap <= maxGap && gap >= -1) { // -1 allows for small overlaps
// Merge into current cluster
currentCluster.end = Math.max(currentCluster.end, seg.end);
} else {
// Finalize current cluster and start new one
const totalIntensity = highIntensity
.filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end)
.reduce((sum, s) => sum + s.intensity, 0);
clusters.push({
start: currentCluster.start,
end: currentCluster.end,
intensity: currentCluster.intensity,
totalIntensity,
segmentCount: highIntensity.filter(s =>
s.start >= currentCluster!.start && s.end <= currentCluster!.end
).length,
});
currentCluster = seg;
}
}
// Don't forget the last cluster
if (currentCluster) {
const totalIntensity = highIntensity
.filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end)
.reduce((sum, s) => sum + s.intensity, 0);
clusters.push({
start: currentCluster.start,
end: currentCluster.end,
intensity: currentCluster.intensity,
totalIntensity,
segmentCount: highIntensity.filter(s =>
s.start >= currentCluster.start && s.end <= currentCluster.end
).length,
});
}
// Sort clusters by total intensity (highest first)
clusters.sort((a, b) => b.totalIntensity - a.totalIntensity);
return clusters;
}
function getTopSegments(
segments: RawHeatmapSegment[],
topN: number
topN: number,
threshold: number
): ProcessedSegment[] {
// Convert to processed format and filter valid segments
const validSegments = segments
@@ -165,13 +250,14 @@ function getTopSegmentsByIntensity(
return [];
}
// Sort by raw intensity (highest first) - this matches visual "bumps" in heatmap
validSegments.sort((a, b) => b.intensity - a.intensity);
return validSegments.slice(0, topN);
// Cluster high-intensity segments
const clusters = clusterHighIntensitySegments(validSegments, threshold);
return clusters.slice(0, topN);
}
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
const { url, outputDir, format, topN } = options;
const { url, outputDir, format, topN, intensityThreshold } = options;
// Create output directory if it doesn't exist
if (!existsSync(outputDir)) {
@@ -197,13 +283,14 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
}
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
console.log(`\nTop ${topN} segments by intensity (visual heatmap bumps):\n`);
console.log(`Intensity threshold: ${(intensityThreshold * 100).toFixed(0)}%`);
console.log(`\nTop ${topN} high-intensity regions (clustered segments):\n`);
// Get top segments by raw intensity
const topSegments = getTopSegmentsByIntensity(info.heatmap, topN);
// Get top clustered segments
const topSegments = getTopSegments(info.heatmap, topN, intensityThreshold);
if (topSegments.length === 0) {
console.log("No valid segments found. Downloading full video...");
console.log("No high-intensity regions found. Downloading full video...");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSegment(url, outputPath, 0, info.duration, format);
return;
@@ -213,7 +300,7 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
for (let i = 0; i < topSegments.length; i++) {
const seg = topSegments[i];
const duration = seg.end - seg.start;
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Intensity: ${(seg.intensity * 100).toFixed(1)}%`);
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Total Intensity: ${seg.totalIntensity.toFixed(2)}`);
}
console.log("");
@@ -228,14 +315,15 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
// Save segment info
const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`);
let segmentInfo = `# ${info.title}\n\n`;
segmentInfo += `Top ${topN} segments by intensity (highest re-watch rate):\n\n`;
segmentInfo += `Top ${topN} high-intensity regions (intensity >= ${(intensityThreshold * 100).toFixed(0)}%):\n\n`;
for (let i = 0; i < topSegments.length; i++) {
const seg = topSegments[i];
const duration = seg.end - seg.start;
segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`;
segmentInfo += ` Duration: ${formatTime(duration)}\n`;
segmentInfo += ` Intensity: ${(seg.intensity * 100).toFixed(1)}%\n\n`;
segmentInfo += ` Total Intensity: ${seg.totalIntensity.toFixed(2)}\n`;
segmentInfo += ` Segments: ${seg.segmentCount}\n\n`;
}
writeFileSync(segmentInfoPath, segmentInfo);

View File

@@ -12,14 +12,16 @@ async function main() {
console.log("Options:");
console.log(" -o, --output <dir> Output directory (default: ./downloads)");
console.log(" -f, --format <fmt> Video format (default: best)");
console.log(" -n, --top <num> Number of top segments (default: 10)");
console.log(" -n, --top <num> Number of top regions (default: 5)");
console.log(" -t, --threshold <0-1> Intensity threshold (default: 0.5)");
console.log(" -h, --help Show help");
process.exit(1);
}
console.log(`Analyzing video: ${args.url}`);
console.log(`Output directory: ${args.output}`);
console.log(`Top ${args.topN} segments by integral jump\n`);
console.log(`Intensity threshold: ${(args.threshold * 100).toFixed(0)}%`);
console.log(`Top ${args.topN} regions\n`);
try {
await downloadMostWatchedSegment({
@@ -27,6 +29,7 @@ async function main() {
outputDir: args.output,
format: args.format,
topN: args.topN,
intensityThreshold: args.threshold,
});
} catch (error) {
console.error("Error:", error instanceof Error ? error.message : error);