diff --git a/src/cli/args.ts b/src/cli/args.ts index aad9328..40518ce 100644 --- a/src/cli/args.ts +++ b/src/cli/args.ts @@ -3,13 +3,15 @@ export interface CliArgs { output: string; format: string; topN: number; + threshold: number; } export function parseArgs(): CliArgs { const args: CliArgs = { output: "./downloads", format: "best", - topN: 10, + topN: 5, + threshold: 0.5, }; const rawArgs = Bun.argv; @@ -25,7 +27,10 @@ export function parseArgs(): CliArgs { args.format = nextArg || "best"; i++; } else if (arg === "-n" || arg === "--top") { - args.topN = parseInt(nextArg || "10", 10); + args.topN = parseInt(nextArg || "5", 10); + i++; + } else if (arg === "-t" || arg === "--threshold") { + args.threshold = parseFloat(nextArg || "0.5"); i++; } else if (arg === "-h" || arg === "--help") { console.log(`YouTube Most Watched Segments Downloader @@ -36,14 +41,16 @@ Arguments: YouTube video URL (required) Options: - -o, --output Output directory (default: ./downloads) - -f, --format Video format (default: best) - -n, --top Number of top segments to show (default: 10) - -h, --help Show this help message + -o, --output Output directory (default: ./downloads) + -f, --format Video format (default: best) + -n, --top Number of top regions (default: 5) + -t, --threshold <0-1> Intensity threshold (default: 0.5) + Lower = more segments, Higher = stricter + -h, --help Show this help message Examples: yt-segments "https://www.youtube.com/watch?v=abc123" - yt-segments "https://youtu.be/abc123" -o ./videos -n 5 + yt-segments "https://youtu.be/abc123" -o ./videos -t 0.6 `); process.exit(0); } else if (!arg.startsWith("-") && !arg.includes("bun")) { diff --git a/src/cli/downloader.ts b/src/cli/downloader.ts index e6c565c..449400c 100644 --- a/src/cli/downloader.ts +++ b/src/cli/downloader.ts @@ -7,6 +7,7 @@ export interface DownloadOptions { outputDir: string; format: string; topN: number; + intensityThreshold: number; } interface RawHeatmapSegment { @@ -23,6 +24,8 @@ interface ProcessedSegment { start: number; end: number; intensity: number; + totalIntensity: number; + segmentCount: number; } interface VideoInfo { @@ -142,9 +145,91 @@ function getIntensity(segment: RawHeatmapSegment): number { return segment.intensity ?? segment.heat ?? segment.value ?? 0; } -function getTopSegmentsByIntensity( +interface RawSegment { + start: number; + end: number; + intensity: number; +} + +function clusterHighIntensitySegments( + segments: RawSegment[], + threshold: number +): ProcessedSegment[] { + // Filter to only high-intensity segments + const highIntensity = segments.filter(seg => seg.intensity >= threshold); + + if (highIntensity.length === 0) { + return []; + } + + // Sort by start time + highIntensity.sort((a, b) => a.start - b.start); + + // Cluster adjacent/overlapping segments + const clusters: ProcessedSegment[] = []; + let currentCluster: RawSegment | null = null; + + for (const seg of highIntensity) { + if (!currentCluster) { + currentCluster = seg; + continue; + } + + // Check if this segment is adjacent or overlapping with current cluster + // Adjacent means: seg.start is within a small gap of currentCluster.end + const gap = seg.start - currentCluster.end; + const maxGap = 10; // Allow up to 10 second gap + + if (gap <= maxGap && gap >= -1) { // -1 allows for small overlaps + // Merge into current cluster + currentCluster.end = Math.max(currentCluster.end, seg.end); + } else { + // Finalize current cluster and start new one + const totalIntensity = highIntensity + .filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end) + .reduce((sum, s) => sum + s.intensity, 0); + + clusters.push({ + start: currentCluster.start, + end: currentCluster.end, + intensity: currentCluster.intensity, + totalIntensity, + segmentCount: highIntensity.filter(s => + s.start >= currentCluster!.start && s.end <= currentCluster!.end + ).length, + }); + + currentCluster = seg; + } + } + + // Don't forget the last cluster + if (currentCluster) { + const totalIntensity = highIntensity + .filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end) + .reduce((sum, s) => sum + s.intensity, 0); + + clusters.push({ + start: currentCluster.start, + end: currentCluster.end, + intensity: currentCluster.intensity, + totalIntensity, + segmentCount: highIntensity.filter(s => + s.start >= currentCluster.start && s.end <= currentCluster.end + ).length, + }); + } + + // Sort clusters by total intensity (highest first) + clusters.sort((a, b) => b.totalIntensity - a.totalIntensity); + + return clusters; +} + +function getTopSegments( segments: RawHeatmapSegment[], - topN: number + topN: number, + threshold: number ): ProcessedSegment[] { // Convert to processed format and filter valid segments const validSegments = segments @@ -165,13 +250,14 @@ function getTopSegmentsByIntensity( return []; } - // Sort by raw intensity (highest first) - this matches visual "bumps" in heatmap - validSegments.sort((a, b) => b.intensity - a.intensity); - return validSegments.slice(0, topN); + // Cluster high-intensity segments + const clusters = clusterHighIntensitySegments(validSegments, threshold); + + return clusters.slice(0, topN); } export async function downloadMostWatchedSegment(options: DownloadOptions): Promise { - const { url, outputDir, format, topN } = options; + const { url, outputDir, format, topN, intensityThreshold } = options; // Create output directory if it doesn't exist if (!existsSync(outputDir)) { @@ -197,13 +283,14 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom } console.log(`\nHeatmap data found: ${info.heatmap.length} segments`); - console.log(`\nTop ${topN} segments by intensity (visual heatmap bumps):\n`); + console.log(`Intensity threshold: ${(intensityThreshold * 100).toFixed(0)}%`); + console.log(`\nTop ${topN} high-intensity regions (clustered segments):\n`); - // Get top segments by raw intensity - const topSegments = getTopSegmentsByIntensity(info.heatmap, topN); + // Get top clustered segments + const topSegments = getTopSegments(info.heatmap, topN, intensityThreshold); if (topSegments.length === 0) { - console.log("No valid segments found. Downloading full video..."); + console.log("No high-intensity regions found. Downloading full video..."); const outputPath = join(outputDir, `${safeTitle}.%(ext)s`); await downloadSegment(url, outputPath, 0, info.duration, format); return; @@ -213,7 +300,7 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom for (let i = 0; i < topSegments.length; i++) { const seg = topSegments[i]; const duration = seg.end - seg.start; - console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Intensity: ${(seg.intensity * 100).toFixed(1)}%`); + console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Total Intensity: ${seg.totalIntensity.toFixed(2)}`); } console.log(""); @@ -228,14 +315,15 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom // Save segment info const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`); let segmentInfo = `# ${info.title}\n\n`; - segmentInfo += `Top ${topN} segments by intensity (highest re-watch rate):\n\n`; + segmentInfo += `Top ${topN} high-intensity regions (intensity >= ${(intensityThreshold * 100).toFixed(0)}%):\n\n`; for (let i = 0; i < topSegments.length; i++) { const seg = topSegments[i]; const duration = seg.end - seg.start; segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`; segmentInfo += ` Duration: ${formatTime(duration)}\n`; - segmentInfo += ` Intensity: ${(seg.intensity * 100).toFixed(1)}%\n\n`; + segmentInfo += ` Total Intensity: ${seg.totalIntensity.toFixed(2)}\n`; + segmentInfo += ` Segments: ${seg.segmentCount}\n\n`; } writeFileSync(segmentInfoPath, segmentInfo); diff --git a/src/cli/index.ts b/src/cli/index.ts index b82acf9..0e0de70 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -10,16 +10,18 @@ async function main() { console.error("Error: YouTube URL is required"); console.log("Usage: yt-segments [options]"); console.log("Options:"); - console.log(" -o, --output Output directory (default: ./downloads)"); - console.log(" -f, --format Video format (default: best)"); - console.log(" -n, --top Number of top segments (default: 10)"); - console.log(" -h, --help Show help"); + console.log(" -o, --output Output directory (default: ./downloads)"); + console.log(" -f, --format Video format (default: best)"); + console.log(" -n, --top Number of top regions (default: 5)"); + console.log(" -t, --threshold <0-1> Intensity threshold (default: 0.5)"); + console.log(" -h, --help Show help"); process.exit(1); } console.log(`Analyzing video: ${args.url}`); console.log(`Output directory: ${args.output}`); - console.log(`Top ${args.topN} segments by integral jump\n`); + console.log(`Intensity threshold: ${(args.threshold * 100).toFixed(0)}%`); + console.log(`Top ${args.topN} regions\n`); try { await downloadMostWatchedSegment({ @@ -27,6 +29,7 @@ async function main() { outputDir: args.output, format: args.format, topN: args.topN, + intensityThreshold: args.threshold, }); } catch (error) { console.error("Error:", error instanceof Error ? error.message : error);