feat: cluster adjacent high-intensity segments
This commit is contained in:
@@ -3,13 +3,15 @@ export interface CliArgs {
|
|||||||
output: string;
|
output: string;
|
||||||
format: string;
|
format: string;
|
||||||
topN: number;
|
topN: number;
|
||||||
|
threshold: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function parseArgs(): CliArgs {
|
export function parseArgs(): CliArgs {
|
||||||
const args: CliArgs = {
|
const args: CliArgs = {
|
||||||
output: "./downloads",
|
output: "./downloads",
|
||||||
format: "best",
|
format: "best",
|
||||||
topN: 10,
|
topN: 5,
|
||||||
|
threshold: 0.5,
|
||||||
};
|
};
|
||||||
|
|
||||||
const rawArgs = Bun.argv;
|
const rawArgs = Bun.argv;
|
||||||
@@ -25,7 +27,10 @@ export function parseArgs(): CliArgs {
|
|||||||
args.format = nextArg || "best";
|
args.format = nextArg || "best";
|
||||||
i++;
|
i++;
|
||||||
} else if (arg === "-n" || arg === "--top") {
|
} else if (arg === "-n" || arg === "--top") {
|
||||||
args.topN = parseInt(nextArg || "10", 10);
|
args.topN = parseInt(nextArg || "5", 10);
|
||||||
|
i++;
|
||||||
|
} else if (arg === "-t" || arg === "--threshold") {
|
||||||
|
args.threshold = parseFloat(nextArg || "0.5");
|
||||||
i++;
|
i++;
|
||||||
} else if (arg === "-h" || arg === "--help") {
|
} else if (arg === "-h" || arg === "--help") {
|
||||||
console.log(`YouTube Most Watched Segments Downloader
|
console.log(`YouTube Most Watched Segments Downloader
|
||||||
@@ -38,12 +43,14 @@ Arguments:
|
|||||||
Options:
|
Options:
|
||||||
-o, --output <dir> Output directory (default: ./downloads)
|
-o, --output <dir> Output directory (default: ./downloads)
|
||||||
-f, --format <fmt> Video format (default: best)
|
-f, --format <fmt> Video format (default: best)
|
||||||
-n, --top <num> Number of top segments to show (default: 10)
|
-n, --top <num> Number of top regions (default: 5)
|
||||||
|
-t, --threshold <0-1> Intensity threshold (default: 0.5)
|
||||||
|
Lower = more segments, Higher = stricter
|
||||||
-h, --help Show this help message
|
-h, --help Show this help message
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
yt-segments "https://www.youtube.com/watch?v=abc123"
|
yt-segments "https://www.youtube.com/watch?v=abc123"
|
||||||
yt-segments "https://youtu.be/abc123" -o ./videos -n 5
|
yt-segments "https://youtu.be/abc123" -o ./videos -t 0.6
|
||||||
`);
|
`);
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
} else if (!arg.startsWith("-") && !arg.includes("bun")) {
|
} else if (!arg.startsWith("-") && !arg.includes("bun")) {
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ export interface DownloadOptions {
|
|||||||
outputDir: string;
|
outputDir: string;
|
||||||
format: string;
|
format: string;
|
||||||
topN: number;
|
topN: number;
|
||||||
|
intensityThreshold: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RawHeatmapSegment {
|
interface RawHeatmapSegment {
|
||||||
@@ -23,6 +24,8 @@ interface ProcessedSegment {
|
|||||||
start: number;
|
start: number;
|
||||||
end: number;
|
end: number;
|
||||||
intensity: number;
|
intensity: number;
|
||||||
|
totalIntensity: number;
|
||||||
|
segmentCount: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface VideoInfo {
|
interface VideoInfo {
|
||||||
@@ -142,9 +145,91 @@ function getIntensity(segment: RawHeatmapSegment): number {
|
|||||||
return segment.intensity ?? segment.heat ?? segment.value ?? 0;
|
return segment.intensity ?? segment.heat ?? segment.value ?? 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getTopSegmentsByIntensity(
|
interface RawSegment {
|
||||||
|
start: number;
|
||||||
|
end: number;
|
||||||
|
intensity: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clusterHighIntensitySegments(
|
||||||
|
segments: RawSegment[],
|
||||||
|
threshold: number
|
||||||
|
): ProcessedSegment[] {
|
||||||
|
// Filter to only high-intensity segments
|
||||||
|
const highIntensity = segments.filter(seg => seg.intensity >= threshold);
|
||||||
|
|
||||||
|
if (highIntensity.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by start time
|
||||||
|
highIntensity.sort((a, b) => a.start - b.start);
|
||||||
|
|
||||||
|
// Cluster adjacent/overlapping segments
|
||||||
|
const clusters: ProcessedSegment[] = [];
|
||||||
|
let currentCluster: RawSegment | null = null;
|
||||||
|
|
||||||
|
for (const seg of highIntensity) {
|
||||||
|
if (!currentCluster) {
|
||||||
|
currentCluster = seg;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this segment is adjacent or overlapping with current cluster
|
||||||
|
// Adjacent means: seg.start is within a small gap of currentCluster.end
|
||||||
|
const gap = seg.start - currentCluster.end;
|
||||||
|
const maxGap = 10; // Allow up to 10 second gap
|
||||||
|
|
||||||
|
if (gap <= maxGap && gap >= -1) { // -1 allows for small overlaps
|
||||||
|
// Merge into current cluster
|
||||||
|
currentCluster.end = Math.max(currentCluster.end, seg.end);
|
||||||
|
} else {
|
||||||
|
// Finalize current cluster and start new one
|
||||||
|
const totalIntensity = highIntensity
|
||||||
|
.filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end)
|
||||||
|
.reduce((sum, s) => sum + s.intensity, 0);
|
||||||
|
|
||||||
|
clusters.push({
|
||||||
|
start: currentCluster.start,
|
||||||
|
end: currentCluster.end,
|
||||||
|
intensity: currentCluster.intensity,
|
||||||
|
totalIntensity,
|
||||||
|
segmentCount: highIntensity.filter(s =>
|
||||||
|
s.start >= currentCluster!.start && s.end <= currentCluster!.end
|
||||||
|
).length,
|
||||||
|
});
|
||||||
|
|
||||||
|
currentCluster = seg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't forget the last cluster
|
||||||
|
if (currentCluster) {
|
||||||
|
const totalIntensity = highIntensity
|
||||||
|
.filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end)
|
||||||
|
.reduce((sum, s) => sum + s.intensity, 0);
|
||||||
|
|
||||||
|
clusters.push({
|
||||||
|
start: currentCluster.start,
|
||||||
|
end: currentCluster.end,
|
||||||
|
intensity: currentCluster.intensity,
|
||||||
|
totalIntensity,
|
||||||
|
segmentCount: highIntensity.filter(s =>
|
||||||
|
s.start >= currentCluster.start && s.end <= currentCluster.end
|
||||||
|
).length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort clusters by total intensity (highest first)
|
||||||
|
clusters.sort((a, b) => b.totalIntensity - a.totalIntensity);
|
||||||
|
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTopSegments(
|
||||||
segments: RawHeatmapSegment[],
|
segments: RawHeatmapSegment[],
|
||||||
topN: number
|
topN: number,
|
||||||
|
threshold: number
|
||||||
): ProcessedSegment[] {
|
): ProcessedSegment[] {
|
||||||
// Convert to processed format and filter valid segments
|
// Convert to processed format and filter valid segments
|
||||||
const validSegments = segments
|
const validSegments = segments
|
||||||
@@ -165,13 +250,14 @@ function getTopSegmentsByIntensity(
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by raw intensity (highest first) - this matches visual "bumps" in heatmap
|
// Cluster high-intensity segments
|
||||||
validSegments.sort((a, b) => b.intensity - a.intensity);
|
const clusters = clusterHighIntensitySegments(validSegments, threshold);
|
||||||
return validSegments.slice(0, topN);
|
|
||||||
|
return clusters.slice(0, topN);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
|
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
|
||||||
const { url, outputDir, format, topN } = options;
|
const { url, outputDir, format, topN, intensityThreshold } = options;
|
||||||
|
|
||||||
// Create output directory if it doesn't exist
|
// Create output directory if it doesn't exist
|
||||||
if (!existsSync(outputDir)) {
|
if (!existsSync(outputDir)) {
|
||||||
@@ -197,13 +283,14 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
|
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
|
||||||
console.log(`\nTop ${topN} segments by intensity (visual heatmap bumps):\n`);
|
console.log(`Intensity threshold: ${(intensityThreshold * 100).toFixed(0)}%`);
|
||||||
|
console.log(`\nTop ${topN} high-intensity regions (clustered segments):\n`);
|
||||||
|
|
||||||
// Get top segments by raw intensity
|
// Get top clustered segments
|
||||||
const topSegments = getTopSegmentsByIntensity(info.heatmap, topN);
|
const topSegments = getTopSegments(info.heatmap, topN, intensityThreshold);
|
||||||
|
|
||||||
if (topSegments.length === 0) {
|
if (topSegments.length === 0) {
|
||||||
console.log("No valid segments found. Downloading full video...");
|
console.log("No high-intensity regions found. Downloading full video...");
|
||||||
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
|
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
|
||||||
await downloadSegment(url, outputPath, 0, info.duration, format);
|
await downloadSegment(url, outputPath, 0, info.duration, format);
|
||||||
return;
|
return;
|
||||||
@@ -213,7 +300,7 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
|
|||||||
for (let i = 0; i < topSegments.length; i++) {
|
for (let i = 0; i < topSegments.length; i++) {
|
||||||
const seg = topSegments[i];
|
const seg = topSegments[i];
|
||||||
const duration = seg.end - seg.start;
|
const duration = seg.end - seg.start;
|
||||||
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Intensity: ${(seg.intensity * 100).toFixed(1)}%`);
|
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Total Intensity: ${seg.totalIntensity.toFixed(2)}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("");
|
console.log("");
|
||||||
@@ -228,14 +315,15 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
|
|||||||
// Save segment info
|
// Save segment info
|
||||||
const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`);
|
const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`);
|
||||||
let segmentInfo = `# ${info.title}\n\n`;
|
let segmentInfo = `# ${info.title}\n\n`;
|
||||||
segmentInfo += `Top ${topN} segments by intensity (highest re-watch rate):\n\n`;
|
segmentInfo += `Top ${topN} high-intensity regions (intensity >= ${(intensityThreshold * 100).toFixed(0)}%):\n\n`;
|
||||||
|
|
||||||
for (let i = 0; i < topSegments.length; i++) {
|
for (let i = 0; i < topSegments.length; i++) {
|
||||||
const seg = topSegments[i];
|
const seg = topSegments[i];
|
||||||
const duration = seg.end - seg.start;
|
const duration = seg.end - seg.start;
|
||||||
segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`;
|
segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`;
|
||||||
segmentInfo += ` Duration: ${formatTime(duration)}\n`;
|
segmentInfo += ` Duration: ${formatTime(duration)}\n`;
|
||||||
segmentInfo += ` Intensity: ${(seg.intensity * 100).toFixed(1)}%\n\n`;
|
segmentInfo += ` Total Intensity: ${seg.totalIntensity.toFixed(2)}\n`;
|
||||||
|
segmentInfo += ` Segments: ${seg.segmentCount}\n\n`;
|
||||||
}
|
}
|
||||||
|
|
||||||
writeFileSync(segmentInfoPath, segmentInfo);
|
writeFileSync(segmentInfoPath, segmentInfo);
|
||||||
|
|||||||
@@ -12,14 +12,16 @@ async function main() {
|
|||||||
console.log("Options:");
|
console.log("Options:");
|
||||||
console.log(" -o, --output <dir> Output directory (default: ./downloads)");
|
console.log(" -o, --output <dir> Output directory (default: ./downloads)");
|
||||||
console.log(" -f, --format <fmt> Video format (default: best)");
|
console.log(" -f, --format <fmt> Video format (default: best)");
|
||||||
console.log(" -n, --top <num> Number of top segments (default: 10)");
|
console.log(" -n, --top <num> Number of top regions (default: 5)");
|
||||||
|
console.log(" -t, --threshold <0-1> Intensity threshold (default: 0.5)");
|
||||||
console.log(" -h, --help Show help");
|
console.log(" -h, --help Show help");
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Analyzing video: ${args.url}`);
|
console.log(`Analyzing video: ${args.url}`);
|
||||||
console.log(`Output directory: ${args.output}`);
|
console.log(`Output directory: ${args.output}`);
|
||||||
console.log(`Top ${args.topN} segments by integral jump\n`);
|
console.log(`Intensity threshold: ${(args.threshold * 100).toFixed(0)}%`);
|
||||||
|
console.log(`Top ${args.topN} regions\n`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await downloadMostWatchedSegment({
|
await downloadMostWatchedSegment({
|
||||||
@@ -27,6 +29,7 @@ async function main() {
|
|||||||
outputDir: args.output,
|
outputDir: args.output,
|
||||||
format: args.format,
|
format: args.format,
|
||||||
topN: args.topN,
|
topN: args.topN,
|
||||||
|
intensityThreshold: args.threshold,
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error:", error instanceof Error ? error.message : error);
|
console.error("Error:", error instanceof Error ? error.message : error);
|
||||||
|
|||||||
Reference in New Issue
Block a user